In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from src.dqn.DQNAgent import DQNAgent
from src.dqn.ReplayBuffer import ReplayBuffer
from src.ProbBidClearing import ProbBidClearing

In [2]:
# load day-ahead and real-time prices
DAP = pd.read_csv("./data/CAISO_DAP.csv")
RTP = pd.read_csv("./data/CAISO_RTP.csv")

# read datetime and drop duplicate data
RTP["Date"] = pd.to_datetime(RTP["Date"], format="%m/%d/%Y %I:%M:%S %p")
RTP = RTP.drop_duplicates(subset=["Date", "hub"])
DAP["Date"] = pd.to_datetime(DAP["Date"], format="%m/%d/%Y %I:%M:%S %p")
DAP = DAP.drop_duplicates(subset=["Date", "zone"])

# pivot data
DAP_pivoted = DAP.pivot(index="Date", columns="zone", values="price")
RTP_pivoted = RTP.pivot(index="Date", columns="hub", values="price")

# rename 'Date' column to 'ts'
DAP_pivoted.index.names = ["ts"]
RTP_pivoted.index.names = ["ts"]

# merge dataframes on index
CAISO_PRICES = pd.merge(DAP_pivoted, RTP_pivoted, on=["ts"], how="outer")
CAISO_PRICES = CAISO_PRICES.ffill().reset_index()

In [3]:
# form datasets
PGAE_NP15 = CAISO_PRICES[["ts", "PGAE", "TH_NP15"]].rename(
    columns={"PGAE": "dap", "TH_NP15": "rtp"}
)
PGAE_ZP26 = CAISO_PRICES[["ts", "PGAE", "TH_ZP26"]].rename(
    columns={"PGAE": "dap", "TH_ZP26": "rtp"}
)
SCE_SP15 = CAISO_PRICES[["ts", "SCE", "TH_SP15"]].rename(
    columns={"SCE": "dap", "TH_SP15": "rtp"}
)
SDGE_SP15 = CAISO_PRICES[["ts", "SDGE", "TH_SP15"]].rename(
    columns={"SDGE": "dap", "TH_SP15": "rtp"}
)

In [4]:
# Hyperparameters
lr = 1e-3
batchsize = 64
maxlength = 1000
episodes = 300
initialsize = 500
tau = 100
epsilon = 0.2
gamma = 0.99

In [5]:
# init the prob clearer
std = 10                 # standard deviation 
risky_mean = 20          # bid above RTP
conservative_mean = -15  # bid below RTP

clearer = ProbBidClearing(std, risky_mean, conservative_mean)
prob_clear_function = clearer.norm_prob_clear

In [6]:
# init DQN Agent
agent = DQNAgent(lr=lr, 
                 prob_clear=prob_clear_function,
                 attitude="honest",
                 data=PGAE_NP15)

# init Replay Buffer
buffer = ReplayBuffer(maxlength=maxlength)

In [None]:
agent.train(buffer, gamma, initialsize, batchsize, tau, episodes)

0.0
0.0
-12.628555
-17.679977
0.0
0.0
-10.102844000000001
0.0
0.0
27.782821000000002
0.0
37.885665
0.0
0.0
0.0
0.0
0.0
0.0
0.0
30.308532000000007
0.0
0.0
-17.679977
-7.577133000000002
27.782821000000002
30.308532000000007
0.0
0.0
0.0
-22.731399
-0.0
27.782821000000002
0.0
0.0
-20.205688000000002
0.0
0.0
0.0
0.0
-15.154266000000003
0.0
37.885665
0.0
30.308532000000007
-20.205688000000002
27.782821000000002
27.782821000000002
0.0
0.0
0.0
-22.731399
0.0
27.782821000000002
0.0
0.0
-15.154266000000003
0.0
0.0
0.0
0.0
0.0
0.0
0.0
-17.679977
0.0
-20.205688000000002
27.782821000000002
0.0
-22.731399
32.834243
-17.679977
0.0
-15.154266000000003
32.834243
0.0
32.834243
-2.5257110000000003
-15.154266000000003
0.0
32.834243
0.0
0.0
0.0
0.0
47.98850900000001
32.834243
0.0
-22.731399
0.0
-20.205688000000002
35.359954
0.0
0.0
0.0
0.0
-12.628555
0.0
-12.628555
-0.0
32.834243
0.0
-17.679977
-15.154266000000003
0.0
0.0
30.308532000000007
0.0
0.0
0.0
0.0
0.0
0.0
42.937087000000005
-22.731399
-15.15426600

  states = torch.FloatTensor(states)


0.0
0.0
0.0
0.0
27.782821000000002
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
35.359954
32.834243
0.0
0.0
0.0
0.0
0.0
0.0
32.834243
0.0
30.308532000000007
0.0
0.0
40.411376000000004
0.0
0.0
0.0
45.462798
0.0
0.0
27.782821000000002
45.462798
0.0
0.0
0.0
0.0
0.0
0.0
0.0
30.308532000000007
0.0
0.0
0.0
0.0
27.782821000000002
0.0
0.0
0.0
0.0
0.0
0.0
0.0
35.359954
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
32.834243
0.0
35.359954
0.0
32.834243
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
37.885665
27.782821000000002
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
37.885665
0.0
0.0
0.0
0.0
0.0
0.0
30.308532000000007
0.0
32.834243
0.0
35.359954
27.782821000000002
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
45.462798
0.0
0.0
0.0
0.0
0.0
0.0
0.0
27.782821000000002
27.782821000000002
0.0
0.0
30.308532000000007
0.0
0.0
0.0
30.308532000000007
35.359954
0.0
0.0
0.0
0.0
35.359954
0.0
0.0
32.834243
0.0
0.0
0.0
0.0
0.0
40.411376000000004
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
