In [169]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_validate, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler, normalize, MinMaxScaler, PowerTransformer

from scipy.stats.mstats import winsorize

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

from datetime import datetime

import seaborn as sns

# Data processing

## Get data

In [170]:
df_raw = pd.read_csv('exploratory.csv')

print(df_raw.dtypes)

ID_PATH_FLOWMONTH           int64
ISO                        object
PEAKTYPE                   object
HEDGETYPE                  object
SOURCEID                    int64
SINKID                      int64
MW_NET                    float64
MW_BUY                    float64
MW_SELL                   float64
DC_MCP_WAVG               float64
DC_AUCTION_TYPE_FIRST      object
DC_AUCTION_MONTH_FIRST     object
FW_DC_FIRST                 int64
FLOWMONTH                  object
AUCTION_ID                 object
AUCTION_MONTH              object
AUCTION_TYPE               object
FW                          int64
MCP                       float64
CONG                      float64
FUTURE_AUCTION_ID          object
FUTURE_AUCTION_MONTH       object
FUTURE_AUCTION_TYPE        object
FUTURE_FW                 float64
FUTURE_MCP                float64
dtype: object


In [171]:
df_raw.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,AUCTION_MONTH,AUCTION_TYPE,FW,MCP,CONG,FUTURE_AUCTION_ID,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,2023-02-01,Mar,1,-0.07,0.2016,PJMISO:Apr:1:2023-03-01,2023-03-01,Apr,0.0,1.05
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-05-01,Jun,2,23.79,46.4896,PJMISO:Aug:1:2023-07-01,2023-07-01,Aug,0.0,19.41
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-05-01,Jun,2,23.79,46.4896,PJMISO:Jul:1:2023-06-01,2023-06-01,Jul,1.0,20.41
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-06-01,Jul,1,20.41,46.4896,PJMISO:Aug:1:2023-07-01,2023-07-01,Aug,0.0,19.41
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,2023-08-01,Sep,3,-5.88,-0.4024,PJMISO:Dec:1:2023-11-01,2023-11-01,Dec,0.0,-4.56


In [172]:
df_raw.describe()

Unnamed: 0,ID_PATH_FLOWMONTH,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,FW_DC_FIRST,FW,MCP,CONG,FUTURE_FW,FUTURE_MCP
count,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2019640.0,2019640.0
mean,3.260349e+16,438486500.0,447223500.0,10.54111,14.28585,3.744738,26.16037,8.869535,5.589818,32.99981,16.19917,2.311615,29.40816
std,5.322809e+18,692626500.0,691540500.0,18.37164,16.40692,8.016405,321.3635,2.451281,2.603479,339.6788,564.0391,2.196493,323.2869
min,-9.223338e+18,48824.0,48824.0,-182.8,0.0,0.0,-5760.43,2.0,1.0,-6740.72,-27043.18,0.0,-10979.64
25%,-4.577779e+18,21601790.0,31065430.0,0.0,4.2,0.0,-25.19202,7.0,4.0,-24.41,-30.0184,0.0,-25.05
50%,8.05898e+16,38368010.0,44460860.0,8.0,9.1,0.0,-3.11,9.0,6.0,1.72,0.2592,2.0,2.11
75%,4.634187e+18,1084391000.0,1084391000.0,16.5,18.8,5.1,16.11641,11.0,8.0,35.15,37.5776,4.0,37.31
max,9.223273e+18,2156114000.0,2156114000.0,471.8,471.8,182.8,13378.85,12.0,11.0,13690.67,26366.86,10.0,17803.15


## Placeholder for subsetting

In [173]:
df_sample = df_raw.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'MW_NET',
    'MW_BUY',
    'MW_SELL',
    'DC_MCP_WAVG',
    'DC_AUCTION_TYPE_FIRST',
    'DC_AUCTION_MONTH_FIRST',
    'FW_DC_FIRST',
    'FLOWMONTH',
    # 'AUCTION_ID',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    # 'FUTURE_AUCTION_ID',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP'
]).copy()

df_sample.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,FLOWMONTH,AUCTION_MONTH,AUCTION_TYPE,FW,MCP,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,2023-04-01,2023-02-01,Mar,1,-0.07,0.2016,2023-03-01,Apr,0.0,1.05
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-08-01,2023-05-01,Jun,2,23.79,46.4896,2023-07-01,Aug,0.0,19.41
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-08-01,2023-05-01,Jun,2,23.79,46.4896,2023-06-01,Jul,1.0,20.41
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-08-01,2023-06-01,Jul,1,20.41,46.4896,2023-07-01,Aug,0.0,19.41
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,2023-12-01,2023-08-01,Sep,3,-5.88,-0.4024,2023-11-01,Dec,0.0,-4.56


# Apply bid strategy

In [174]:
df_bids = df_sample.copy()

df_bids['AUCTION_MONTH'] = pd.to_datetime(df_bids['AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FUTURE_AUCTION_MONTH'] = pd.to_datetime(df_bids['FUTURE_AUCTION_MONTH'], format='%Y-%m-%d')

df_bids['BID'] = df_bids['DC_MCP_WAVG']*1.2 ## bid 20% higher than DC's clearing price

In [175]:
sorted_auction_months = sorted(df_bids['AUCTION_MONTH'].unique())
print(sorted_auction_months)

[Timestamp('2023-02-01 00:00:00'), Timestamp('2023-03-01 00:00:00'), Timestamp('2023-05-01 00:00:00'), Timestamp('2023-06-01 00:00:00'), Timestamp('2023-07-01 00:00:00'), Timestamp('2023-08-01 00:00:00'), Timestamp('2023-09-01 00:00:00'), Timestamp('2023-10-01 00:00:00'), Timestamp('2023-11-01 00:00:00'), Timestamp('2023-12-01 00:00:00'), Timestamp('2024-01-01 00:00:00'), Timestamp('2024-02-01 00:00:00'), Timestamp('2024-03-01 00:00:00')]


# Get portfolio outcomes

In [176]:
## df_bids is really a big set of potential trades

## To look at our portfolio we will do the following iteratively for each AUCTION_MONTH:
## 1) Consider potential trades:
##      a) Consider path-flowmonths that are not in our holdings
##      b) Subset to FW >= 3
## 2) See what clears, add it to *Holdings*
## 3) Get costs and revenue
## 4) Move on to the next AUCTION_MONTH & repeat

In [177]:
auction_month = sorted_auction_months[6]
print(auction_month)

2023-09-01 00:00:00


In [182]:
df_holdings = pd.DataFrame(columns=df_bids.columns)

for auction_month in sorted_auction_months:

    ## 1) Consider potential trades
    ## 1a) Get broad set of potential trades that we are not already holding
    df_potential_trades = df_bids[
        (df_bids['AUCTION_MONTH'] == auction_month) & 
        (~df_bids['ID_PATH_FLOWMONTH'].isin(df_holdings['ID_PATH_FLOWMONTH']))
    ]

    # df_potential_trades[~df_potential_trades['FUTURE_FW'].isna()] ## don't drop these, just have to hold (unless we remap these nodes)

    ## 1b) Only consider trades with enough opportunities to sell
    df_potential_trades = df_potential_trades[df_potential_trades['FW']>=3]
    if df_potential_trades.empty: continue ## Need this because, for example, standing in March, there are no FWs satisfying FW > 3 (planning year is June-June)
    
    ## 2) See what clears and add it to holdings
    ## Check what clears
    df_potential_trades['CLEARED_ENTRY'] = (df_potential_trades['BID'] > df_potential_trades['MCP']).astype(int) ## this logic assumes only BUYS
    ## Only keep what clears
    df_potential_trades = df_potential_trades[df_potential_trades['CLEARED_ENTRY'] == 1]
    
    ## 3) Get costs and revenue
    ## 3a) Entering trades
    df_potential_trades['MCP_ENTRY'] = df_potential_trades['MCP']
    
    ## 3b) Exiting trades
    ## 3b.i) Calculate offers for each potential exit
    df_potential_trades['OFFER_EXIT'] = df_potential_trades['MCP_ENTRY'] * 1.2
    df_potential_trades['CLEARED_EXIT'] = (df_potential_trades['OFFER_EXIT'] < df_potential_trades['FUTURE_MCP']).astype(int) ## this logic assumes only selling a previous BUY
    
    ## 3b.ii) Get the first month where we would exit a given path
    ## Sort by CLEARED_EXIT and then FUTURE_AUCTION_MONTH. Gives us the first thing to clear or the first record
    df_sorted = df_potential_trades.sort_values(by=['ID_PATH_FLOWMONTH', 'AUCTION_MONTH', 'CLEARED_EXIT', 'FUTURE_AUCTION_MONTH'], ascending=[True, True, False, True])
    df_first_rows = df_sorted.groupby(['ID_PATH_FLOWMONTH', 'AUCTION_MONTH']).first().reset_index()
    ## Handling cases where everything is empty <-- double check this or when it occurs
    df_first_rows = df_first_rows.dropna(how='all')
    if df_first_rows.empty: 
        print(f"Month with empty df_first_rows: {auction_month}")
        continue

    ## Add the kept trades to holdings
    df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)



  df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)


In [183]:
df_holdings.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID,CLEARED_ENTRY,MCP_ENTRY,OFFER_EXIT,CLEARED_EXIT
0,-9221875954374263028,PJMISO,WEPEAK,Obligation,51241,1356162213,8.0,8.0,0.0,-13.399222,...,-11.5904,2023-08-01,Sep,7.0,-21.42,-16.079067,1.0,-22.94,-27.528,1.0
1,-9221774299757105218,PJMISO,ONPEAK,Obligation,135389799,338269,30.0,30.0,0.0,448.187725,...,4752.9024,2023-07-01,Aug,9.0,846.35,537.82527,1.0,449.64,539.568,1.0
2,-9221237361624466890,PJMISO,ONPEAK,Obligation,50403,2155502045,20.0,20.0,0.0,148.274847,...,34.6896,2023-10-01,Nov,6.0,113.13,177.929816,1.0,84.48,101.376,1.0
3,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,69.3,69.3,0.0,12.029896,...,14.0992,2023-06-01,Jul,8.0,2.74,14.435875,1.0,13.44,16.128,0.0
4,-9220427141204159986,PJMISO,ONPEAK,Obligation,32417633,32417825,31.9,31.9,0.0,191.613014,...,40.3728,2023-06-01,Jul,7.0,169.62,229.935617,1.0,180.91,217.092,0.0


In [185]:
df_holdings.groupby('AUCTION_MONTH').count()

Unnamed: 0_level_0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID,CLEARED_ENTRY,MCP_ENTRY,OFFER_EXIT,CLEARED_EXIT
AUCTION_MONTH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-05-01,21517,21517,21517,21517,21517,21517,21517,21517,21517,21517,...,21517,21085,21085,21085,21085,21517,21517,21517,21517,21517
2023-06-01,6849,6849,6849,6849,6849,6849,6849,6849,6849,6849,...,6849,6849,6849,6849,6849,6849,6849,6849,6849,6849
2023-07-01,4669,4669,4669,4669,4669,4669,4669,4669,4669,4669,...,4669,4669,4669,4669,4669,4669,4669,4669,4669,4669
2023-08-01,3095,3095,3095,3095,3095,3095,3095,3095,3095,3095,...,3095,3095,3095,3095,3095,3095,3095,3095,3095,3095
2023-09-01,2479,2479,2479,2479,2479,2479,2479,2479,2479,2479,...,2479,2396,2396,2396,2396,2479,2479,2479,2479,2479
2023-10-01,2992,2992,2992,2992,2992,2992,2992,2992,2992,2992,...,2992,2992,2992,2992,2992,2992,2992,2992,2992,2992
2023-11-01,2214,2214,2214,2214,2214,2214,2214,2214,2214,2214,...,2214,2214,2214,2214,2214,2214,2214,2214,2214,2214
2023-12-01,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,...,1524,1523,1523,1523,1523,1524,1524,1524,1524,1524
2024-01-01,674,674,674,674,674,674,674,674,674,674,...,674,674,674,674,674,674,674,674,674,674
