In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from src.dqn.DQNAgent import DQNAgent
from src.dqn.ReplayBuffer import ReplayBuffer
from src.ProbBidClearing import ProbBidClearing

In [2]:
# load day-ahead and real-time prices
DAP = pd.read_csv("./data/CAISO_DAP.csv")
RTP = pd.read_csv("./data/CAISO_RTP.csv")

# read datetime and drop duplicate data
RTP["Date"] = pd.to_datetime(RTP["Date"], format="%m/%d/%Y %I:%M:%S %p")
RTP = RTP.drop_duplicates(subset=["Date", "hub"])
DAP["Date"] = pd.to_datetime(DAP["Date"], format="%m/%d/%Y %I:%M:%S %p")
DAP = DAP.drop_duplicates(subset=["Date", "zone"])

# pivot data
DAP_pivoted = DAP.pivot(index="Date", columns="zone", values="price")
RTP_pivoted = RTP.pivot(index="Date", columns="hub", values="price")

# rename 'Date' column to 'ts'
DAP_pivoted.index.names = ["ts"]
RTP_pivoted.index.names = ["ts"]

# merge dataframes on index
CAISO_PRICES = pd.merge(DAP_pivoted, RTP_pivoted, on=["ts"], how="outer")
CAISO_PRICES = CAISO_PRICES.ffill().reset_index()

In [3]:
# form datasets
PGAE_NP15 = CAISO_PRICES[["ts", "PGAE", "TH_NP15"]].rename(
    columns={"PGAE": "dap", "TH_NP15": "rtp"}
)
PGAE_ZP26 = CAISO_PRICES[["ts", "PGAE", "TH_ZP26"]].rename(
    columns={"PGAE": "dap", "TH_ZP26": "rtp"}
)
SCE_SP15 = CAISO_PRICES[["ts", "SCE", "TH_SP15"]].rename(
    columns={"SCE": "dap", "TH_SP15": "rtp"}
)
SDGE_SP15 = CAISO_PRICES[["ts", "SDGE", "TH_SP15"]].rename(
    columns={"SDGE": "dap", "TH_SP15": "rtp"}
)

In [4]:
# Hyperparameters
lr = 1e-3
batchsize = 64
maxlength = 1000
episodes = 10
initialsize = 500
tau = 100
epsilon = 0.2
gamma = 0.99

In [5]:
# init the prob clearer
std = 10                 # standard deviation 
risky_mean = 20          # bid above RTP
conservative_mean = -15  # bid below RTP

clearer = ProbBidClearing(std, risky_mean, conservative_mean)
prob_clear_function = clearer.norm_prob_clear

In [7]:
# init DQN Agent
agent = DQNAgent(lr=lr, 
                 prob_clear=prob_clear_function,
                 attitude="honest",
                 data=PGAE_NP15,
                 # delay_steps=50
                )

# init Replay Buffer
buffer = ReplayBuffer(maxlength=maxlength)

In [None]:
agent.train(buffer, gamma, initialsize, batchsize, tau, episodes)

  states = torch.FloatTensor(states)


Episode 0, Average Return: 21662.181343335084
