In [76]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_validate, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler, normalize, MinMaxScaler, PowerTransformer

from scipy.stats.mstats import winsorize

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

from datetime import datetime

import seaborn as sns

# Data processing

## Get data

In [77]:
df_raw = pd.read_csv('exploratory2_flow2023.csv')

print(df_raw.dtypes)

ID_PATH_FLOWMONTH           int64
ISO                        object
PEAKTYPE                   object
HEDGETYPE                  object
SOURCEID                    int64
SINKID                      int64
CLUSTERID_LOW_SRC          object
CLUSTERID_LOW_SNK          object
SOURCE_ZONE                object
SINK_ZONE                  object
MW_NET                    float64
MW_BUY                    float64
MW_SELL                   float64
DC_MCP_WAVG               float64
DC_AUCTION_TYPE_FIRST      object
DC_AUCTION_MONTH_FIRST     object
FW_DC_FIRST                 int64
FLOWMONTH                  object
AUCTION_ID                 object
AUCTION_MONTH              object
AUCTION_TYPE               object
FW                          int64
MCP                       float64
CONG                      float64
FUTURE_AUCTION_ID          object
FUTURE_AUCTION_MONTH       object
FUTURE_AUCTION_TYPE        object
FUTURE_FW                 float64
FUTURE_MCP                float64
MAXMCP_LAST18 

In [78]:
df_raw.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_FW,FUTURE_MCP,MAXMCP_LAST18,MINPOSMCP_LAST18,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,MEDIANCONG,MEDIANCONG_INSEASON,CONG_LOW_T4,CONG_TOP_T4
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0.0,1.05,7.95,0.83,0.68,0.32,-3.888,-10.1472,-279.1856,8.4324
1,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,1.0,13.99,30.150909,6.529787,0.72,0.28,24.2192,61.06645,-237.1028,164.8903
2,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,5.0,23.88,30.150909,6.529787,0.72,0.28,24.2192,61.06645,-237.1028,164.8903
3,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,3.0,16.79,30.150909,6.529787,0.72,0.28,24.2192,61.06645,-237.1028,164.8903
4,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,4.0,8.7,30.150909,6.529787,0.72,0.28,24.2192,61.06645,-237.1028,164.8903


In [79]:
df_raw.describe()

Unnamed: 0,ID_PATH_FLOWMONTH,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,FW_DC_FIRST,FW,MCP,...,FUTURE_FW,FUTURE_MCP,MAXMCP_LAST18,MINPOSMCP_LAST18,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,MEDIANCONG,MEDIANCONG_INSEASON,CONG_LOW_T4,CONG_TOP_T4
count,1691864.0,1691864.0,1691864.0,1691864.0,1691864.0,1691864.0,1691864.0,1691864.0,1691864.0,1691864.0,...,1689919.0,1689919.0,1689303.0,1498042.0,1691864.0,1691864.0,1691864.0,1688166.0,1646891.0,1651599.0
mean,2.222451e+16,431621700.0,409268700.0,7.336574,11.62013,4.28356,37.67711,8.623383,5.419218,64.58546,...,2.219655,63.15042,389.2853,57.77314,0.4220933,0.5779067,27.00273,32.99345,-441.5415,550.5386
std,5.321426e+18,677075000.0,670414900.0,17.61361,15.08256,8.715571,559.2906,2.596139,2.629227,759.6594,...,2.161406,674.0969,1078.804,203.2121,0.3315265,0.3315265,305.9581,399.3108,1065.19,1204.124
min,-9.223338e+18,49284.0,49093.0,-196.0,0.0,0.0,-13571.06,2.0,1.0,-15884.62,...,0.0,-15884.62,-3749.9,0.00340426,0.0,0.0,-5895.472,-8713.094,-57548.93,0.004
25%,-4.565703e+18,20648200.0,21601800.0,0.0,1.5,0.0,-49.24225,7.0,3.0,-45.83,...,0.0,-40.69,27.505,3.033191,0.08,0.28,-13.0,-22.0752,-440.3349,69.665
50%,3.140783e+16,40243890.0,40243870.0,8.0,8.0,0.0,-5.2,9.0,5.0,2.89,...,2.0,4.96,110.15,10.98,0.4,0.6,1.796,2.1876,-162.9952,195.7094
75%,4.609211e+18,1084391000.0,135389800.0,12.9,15.0,8.0,35.82793,11.0,7.0,72.89,...,3.0,77.16,336.64,36.47375,0.72,0.92,27.784,40.1608,-57.3468,530.5012
max,9.223079e+18,2156114000.0,2156114000.0,530.5,530.5,225.2,11494.82,12.0,11.0,20984.04,...,10.0,20984.04,92197.26,5497.15,1.0,1.0,8703.502,16284.71,-0.002,53462.36


## Placeholder for subsetting

In [80]:
df_sample = df_raw[
    (df_raw['MW_NET'] != 0) & ## Drop paths that they bought up and totally exited (like an annual flip)
    (df_raw['DC_MCP_WAVG'] != 0) &
    (df_raw['HEDGETYPE'] == 'Obligation') &
    (df_raw['SINK_ZONE'] != 'DPL') &
    (df_raw['SOURCE_ZONE'] != 'DPL')
].copy()

df_sample = df_sample.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'CLUSTERID_LOW_SRC',
    'CLUSTERID_LOW_SNK',
    'SOURCE_ZONE',
    'SINK_ZONE',
    'MW_NET',
    'MW_BUY',
    'MW_SELL',
    'DC_MCP_WAVG',
    'DC_AUCTION_TYPE_FIRST',
    'DC_AUCTION_MONTH_FIRST',
    'FW_DC_FIRST',
    'FLOWMONTH',
    # 'AUCTION_ID',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    # 'FUTURE_AUCTION_ID',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP',
    # 'MAXMCP_LAST18',
    # 'MINPOSMCP_LAST18',
    'WINRATE_BUY_LASTMARK',
    'WINRATE_SELL_LASTMARK',
    # 'MEDIANCONG',
    # 'MEDIANCONG_INSEASON',
    # 'CONG_LOW_T4',
    # 'CONG_TOP_T4'
])

df_sample.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,AUCTION_TYPE,FW,MCP,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,Mar,1,-0.07,0.2016,2023-03-01,Apr,0.0,1.05,0.68,0.32
1,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Nov,6,5.0,23.7072,2023-03-01,Apr,1.0,13.99,0.72,0.28
2,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Nov,6,5.0,23.7072,2022-11-01,Dec,5.0,23.88,0.72,0.28
3,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Nov,6,5.0,23.7072,2023-01-01,Feb,3.0,16.79,0.72,0.28
4,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Nov,6,5.0,23.7072,2022-12-01,Jan,4.0,8.7,0.72,0.28


# Apply bid strategy

In [81]:
df_bids = df_sample.copy()
df_bids['AUCTION_MONTH'] = pd.to_datetime(df_bids['AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FUTURE_AUCTION_MONTH'] = pd.to_datetime(df_bids['FUTURE_AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FLOWMONTH'] = pd.to_datetime(df_bids['FLOWMONTH'], format='%Y-%m-%d')

## Determine whether to flip path (can't sell if we don't own, so flip those)
df_bids['FLIPPED'] = np.where(df_bids['MW_BUY'] >= df_bids['MW_SELL'], 0, 1) ## Flip where they are on the sell side

## Flip variables we need to
########################################################################################
df_bids['SOURCEID2'] = np.where(df_bids['FLIPPED']==0, df_bids['SOURCEID'], df_bids['SINKID'])
df_bids['SINKID2'] = np.where(df_bids['FLIPPED']==0, df_bids['SINKID'], df_bids['SOURCEID'])
df_bids['SOURCEID'] = df_bids['SOURCEID2']
df_bids['SINKID'] = df_bids['SINKID2']
df_bids = df_bids.drop(columns=['SOURCEID2', 'SINKID2'])

df_bids['SOURCE_ZONE2'] = np.where(df_bids['FLIPPED']==0, df_bids['SOURCE_ZONE'], df_bids['SINK_ZONE'])
df_bids['SINK_ZONE2'] = np.where(df_bids['FLIPPED']==0, df_bids['SINK_ZONE'], df_bids['SOURCE_ZONE'])
df_bids['SOURCE_ZONE'] = df_bids['SOURCE_ZONE2']
df_bids['SINK_ZONE'] = df_bids['SINK_ZONE2']
df_bids = df_bids.drop(columns=['SOURCE_ZONE2', 'SINK_ZONE2'])

df_bids['CLUSTERID_LOW_SRC2'] = np.where(df_bids['FLIPPED']==0, df_bids['CLUSTERID_LOW_SRC'], df_bids['CLUSTERID_LOW_SNK'])
df_bids['CLUSTERID_LOW_SNK2'] = np.where(df_bids['FLIPPED']==0, df_bids['CLUSTERID_LOW_SNK'], df_bids['CLUSTERID_LOW_SRC'])
df_bids['CLUSTERID_LOW_SRC'] = df_bids['CLUSTERID_LOW_SRC2']
df_bids['CLUSTERID_LOW_SNK'] = df_bids['CLUSTERID_LOW_SNK2']
df_bids = df_bids.drop(columns=['CLUSTERID_LOW_SRC2', 'CLUSTERID_LOW_SNK2'])

## Flip prices / congestion
########################################################################################
df_bids['DC_MCP_WAVG_FOR_BID'] = np.where(df_bids['FLIPPED']==0, df_bids['DC_MCP_WAVG'], (-1*df_bids['DC_MCP_WAVG']))
df_bids['MCP'] = np.where(df_bids['FLIPPED']==0, df_bids['MCP'], (-1*df_bids['MCP']))
df_bids['CONG'] = np.where(df_bids['FLIPPED']==0, df_bids['CONG'], (-1*df_bids['CONG']))
df_bids['FUTURE_MCP'] = np.where(df_bids['FLIPPED']==0, df_bids['FUTURE_MCP'], (-1*df_bids['FUTURE_MCP']))
# df_bids['MAXMCP_LAST18'] = np.where(df_bids['FLIPPED']==0, df_bids['MAXMCP_LAST18'], (-1*df_bids['MAXMCP_LAST18']))
# df_bids['MINMCP_LAST18'] = np.where(df_bids['FLIPPED']==0, df_bids['MINMCP_LAST18'], (-1*df_bids['MINMCP_LAST18']))
df_bids['WINRATE_BUY_LASTMARK'] = np.where(df_bids['FLIPPED']==0, df_bids['WINRATE_BUY_LASTMARK'], (1-df_bids['WINRATE_BUY_LASTMARK']))
df_bids['WINRATE_SELL_LASTMARK'] = np.where(df_bids['FLIPPED']==0, df_bids['WINRATE_SELL_LASTMARK'], (1-df_bids['WINRATE_SELL_LASTMARK']))
# df_bids['MEDIANCONG'] = np.where(df_bids['FLIPPED']==0, df_bids['MEDIANCONG'], (-1*df_bids['MEDIANCONG']))
# df_bids['MEDIANCONG_INSEASON'] = np.where(df_bids['FLIPPED']==0, df_bids['MEDIANCONG_INSEASON'], (-1*df_bids['MEDIANCONG_INSEASON']))
# df_bids['CONG_LOW_T4'] = np.where(df_bids['FLIPPED']==0, df_bids['CONG_LOW_T4'], (-1*df_bids['CONG_LOW_T4']))
# df_bids['CONG_TOP_T4'] = np.where(df_bids['FLIPPED']==0, df_bids['CONG_TOP_T4'], (-1*df_bids['CONG_TOP_T4']))


########################################################################################
## Want to bid at a "premium" to DC: means bidding higher on long and bidding lower on short
## recall that we flipped these
conditions = [
    (df_bids['MW_BUY'] >= df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] > 0), ## DC Buy, PF MCP --> go long
    (df_bids['MW_BUY'] >= df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] < 0), ## DC Buy, CF MCP --> go short
    (df_bids['MW_BUY'] < df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] > 0), ## DC Sell, PF MCP --> go short
    (df_bids['MW_BUY'] < df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] < 0) ## DC Sell, CF MCP --> go long
]

choices = [
    (df_bids['DC_MCP_WAVG_FOR_BID'] * 1.2), ## Long: bid 20% higher than DC's clearing price
    (df_bids['DC_MCP_WAVG_FOR_BID'] * 0.8), ## Short: bid 20% lower than DC's clearing price
    (df_bids['DC_MCP_WAVG_FOR_BID'] * 0.8), ## Short: bid 20% lower than DC's clearing price
    (df_bids['DC_MCP_WAVG_FOR_BID'] * 1.2), ## Long: bid 20% higher than DC's clearing price
]
df_bids = df_bids.drop(columns=['DC_MCP_WAVG_FOR_BID'])

df_bids['BID_ENTRY'] = np.select(conditions, choices, default=np.nan)
df_bids = df_bids[~df_bids['BID_ENTRY'].isna()]

# df_bids['ENTRY_TYPE'] = np.where(df_bids['MW_BUY'] >= df_bids['MW_SELL'], 'BUY', 'SELL')
df_bids['ENTRY_TYPE'] = 'BUY' ## have to enter as a buy, can't sell what we don't have
df_bids['FLOW_TYPE'] = np.where(df_bids['MCP'] >= 0, 'PF', 'CF')

## Get an indicator for whether entry cleared
df_bids['CLEARED_ENTRY'] = np.where(
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['BID_ENTRY'] > df_bids['MCP']) | 
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['BID_ENTRY'] < df_bids['MCP']),
    1,
    0
).astype(int)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,FLIPPED,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,Apr,0.0,1.05,0.68,0.32,0,-12.912,BUY,CF,0
1,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Apr,1.0,13.99,0.72,0.28,0,-7.84,BUY,PF,0
2,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Dec,5.0,23.88,0.72,0.28,0,-7.84,BUY,PF,0
3,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Feb,3.0,16.79,0.72,0.28,0,-7.84,BUY,PF,0
4,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,Jan,4.0,8.7,0.72,0.28,0,-7.84,BUY,PF,0


In [82]:
df_bids['ENTRY_TYPE'].unique()

array(['BUY'], dtype=object)

In [83]:
## We can also tabulate exits now. If our bid clear, the OFFER_EXIT is based on MCP (which is entry cost)

# df_potential_trades['OFFER_EXIT'] = df_potential_trades['MCP_ENTRY'] * 1.2
# df_potential_trades['CLEARED_EXIT'] = (df_potential_trades['OFFER_EXIT'] < df_potential_trades['FUTURE_MCP']).astype(int) ## this logic assumes only selling a previous BUY

## Want to try to exit at a greater premium if we are more forward from flow
## If forward >= 2 --> 30% premium, FW1 --> 20%, FW0 --> exit at cost
df_bids['OFFER_PREMIUM'] = np.select(
    [df_bids['FUTURE_FW'] >= 2,
     df_bids['FUTURE_FW'] == 1,
     df_bids['FUTURE_FW'] == 0],
     [30, 20, 0]
) 

## Want to bid at a "premium" to DC: means bidding higher on long and bidding lower on short
conditions_exit = [
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['FLOW_TYPE'] == 'PF'), ## Long
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['FLOW_TYPE'] == 'CF'), ## Short
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['FLOW_TYPE'] == 'PF'), ## Short
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['FLOW_TYPE'] == 'CF') ## Long
]

choices_exit = [
    (df_bids['MCP'] * (1 + df_bids['OFFER_PREMIUM']/100)), ## Long: sell for more than we bought
    (df_bids['MCP'] * (1 - df_bids['OFFER_PREMIUM']/100)), ## Short: buy back for less than we bought
    (df_bids['MCP'] * (1 - df_bids['OFFER_PREMIUM']/100)), ## Short: buy back for less than we bought (want to buy it back for less than we sold it)
    (df_bids['MCP'] * (1 + df_bids['OFFER_PREMIUM']/100)), ## Long: sell for more than we bought (want someone to pay us more to take it back)
]

## Get an indicator for whether entry cleared
df_bids['OFFER_EXIT'] = np.select(conditions_exit, choices_exit, default=np.nan)
df_bids['EXIT_TYPE'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY', 'SELL', 'BUY'
)
## Get an indicator for whether entry cleared
df_bids['CLEARED_EXIT'] = np.where(
    (df_bids['EXIT_TYPE'] == 'BUY') & (df_bids['OFFER_EXIT'] > df_bids['FUTURE_MCP']) | 
    (df_bids['EXIT_TYPE'] == 'SELL') & (df_bids['OFFER_EXIT'] < df_bids['FUTURE_MCP']),
    1,
    0
).astype(int)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,WINRATE_SELL_LASTMARK,FLIPPED,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0.32,0,-12.912,BUY,CF,0,0,-0.07,SELL,1
1,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,0.28,0,-7.84,BUY,PF,0,20,6.0,SELL,1
2,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,0.28,0,-7.84,BUY,PF,0,30,6.5,SELL,1
3,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,0.28,0,-7.84,BUY,PF,0,30,6.5,SELL,1
4,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,0.28,0,-7.84,BUY,PF,0,30,6.5,SELL,1


In [84]:
df_bids['WINRATE'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY',
    df_bids['WINRATE_BUY_LASTMARK'],
    df_bids['WINRATE_SELL_LASTMARK']
)

df_bids['RANK_WINRATE'] = df_bids.groupby(['AUCTION_MONTH','FLOWMONTH'])['WINRATE'].rank(method="first", ascending=False).astype(int)

In [85]:
## Want to know whether Long / Short a given cluster
## Long / Short is determined by whether entered as a BUY or a SELL

df_bids['LONGSHORT_SRC'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY',
    -1, ## short the source if you buy
    +1 ## long the source if you sell
)

df_bids['LONGSHORT_SNK'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY',
    +1, ## long the sink if you buy
    -1 ## short the sink if you sell
)

df_bids['LONGCLUSTER'] = np.where(
    df_bids['LONGSHORT_SNK'] == 1,
    df_bids['CLUSTERID_LOW_SNK'],
    df_bids['CLUSTERID_LOW_SRC']   
)

df_bids['SHORTCLUSTER'] = np.where(
    df_bids['LONGSHORT_SNK'] == -1,
    df_bids['CLUSTERID_LOW_SNK'],
    df_bids['CLUSTERID_LOW_SRC']   
)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0,-0.07,SELL,1,0.68,2499,-1,1,PPL:92,51252
1,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,20,6.0,SELL,1,0.72,11440,-1,1,COMED:84,33092311
2,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,30,6.5,SELL,1,0.72,11441,-1,1,COMED:84,33092311
3,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,30,6.5,SELL,1,0.72,11442,-1,1,COMED:84,33092311
4,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,30,6.5,SELL,1,0.72,11443,-1,1,COMED:84,33092311


# Get portfolio outcomes

## Get trades and holdings

In [86]:
## df_bids is really a big set of potential trades

## To look at our portfolio we will do the following iteratively for each AUCTION_MONTH:
## 1) Consider potential trades:
##      a) Consider path-flowmonths that are not in our holdings
##      b) Subset to FW >= 3
## 2) See what clears, add it to *Holdings*
## 3) Get costs and revenue
## 4) Move on to the next AUCTION_MONTH & repeat

In [87]:
df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0,-0.07,SELL,1,0.68,2499,-1,1,PPL:92,51252
1,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,20,6.0,SELL,1,0.72,11440,-1,1,COMED:84,33092311
2,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,30,6.5,SELL,1,0.72,11441,-1,1,COMED:84,33092311
3,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,30,6.5,SELL,1,0.72,11442,-1,1,COMED:84,33092311
4,-9223309453048397859,PJMISO,OFF7X8,Obligation,33092311,32417599,33092311,COMED:84,,COMED,...,30,6.5,SELL,1,0.72,11443,-1,1,COMED:84,33092311


In [88]:
## A function to itertively keep trades based on the following diversification criteria:
##      Remove trades we already have 
##      Only allow 1 Long and 1 Short trade per cluster-flowmonth

def select_trades_longshort(df):

    ## Sort the trade set by our ranking
    df = df.sort_values(by=['RANK_WINRATE', 'AUCTION_MONTH', 'FLOWMONTH'])

    ## Initialize an empty df to store the selected trades
    selected_trades = pd.DataFrame(columns=df.columns)

    ## Iterate through unique 'FLOWMONTH' groups
    for (auction, flow) in df.groupby(['AUCTION_MONTH', 'FLOWMONTH']).groups.keys():
        ## Get all trades for the current group
        group_trades = df[(df['AUCTION_MONTH'] == auction) & (df['FLOWMONTH'] == flow)].copy()

        ## While there are still potential trades in the group
        while not group_trades.empty:
            ## Select the first trade (highest ranked) from the sorted group
            selected_trade = group_trades.head(1).reset_index()
            selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)

            ## Remove all trades that share the same long/short cluster
            group_trades = group_trades[
                ## Drop trades based on the cluster criteria
                (group_trades['LONGCLUSTER'] != selected_trade['LONGCLUSTER'].values[0]) &
                (group_trades['SHORTCLUSTER'] != selected_trade['SHORTCLUSTER'].values[0]) &
                ## Drop trades that share a node we are already trading in that flow month
                (group_trades['SOURCEID'] != selected_trade['SOURCEID'].values[0]) &
                (group_trades['SINKID'] != selected_trade['SOURCEID'].values[0]) &
                (group_trades['SOURCEID'] != selected_trade['SINKID'].values[0]) &
                (group_trades['SINKID'] != selected_trade['SINKID'].values[0])
            ]

    return selected_trades

In [104]:
## A function to itertively keep trades based on the following diversification criteria:
##      Remove trades we already have 
##      Only allow 1 Long and 1 Short trade per cluster-flowmonth

## Modifications:
##      Check if we have that cluster in holdings
##      Allow us to re-invest in clusters we have exited
##      Allow up to 5 long / short positions per cluster

## df is the df_potential_trades
## df_holdings is to constrain what we are bringing in
## n is the number of long / short appearances for cluster

def select_trades_longshort2(df, df_hold, n):

    ## Sort the trade set by our ranking
    df = df.sort_values(by=['RANK_WINRATE', 'AUCTION_MONTH', 'FLOWMONTH'])

    ## Initialize an empty df to store the selected trades
    selected_trades = pd.DataFrame(columns=df.columns)

    ## Iterate through unique 'FLOWMONTH' groups
    for (auction, flow) in df.groupby(['AUCTION_MONTH', 'FLOWMONTH']).groups.keys():
        ## Get all trades for the current group
        group_trades = df[(df['AUCTION_MONTH'] == auction) & (df['FLOWMONTH'] == flow)].copy()

        ## current holdings: Want contracts that we haven't exited that we purchased prior to this auction
        df_currenthold = df_hold[(df_hold['CLEARED_EXIT'] == 0) & (pd.to_datetime(df_hold['AUCTION_MONTH']) < pd.to_datetime(auction))]
        ## current long / short exposure (trade wise) to each cluster
        df_longcluster = df_currenthold[df_currenthold['FLOWMONTH']==flow].groupby('LONGCLUSTER').size().reset_index(name='count')
        df_shortcluster = df_currenthold[df_currenthold['FLOWMONTH']==flow].groupby('SHORTCLUSTER').size().reset_index(name='count')

        ## While there are still potential trades in the group
        while not group_trades.empty:
            
            ## Drop trades where we already have max exposure in the cluster
            group_trades = group_trades[
                ~(group_trades['LONGCLUSTER'].isin(df_longcluster[df_longcluster['count']>=n]['LONGCLUSTER'])) &
                ~(group_trades['SHORTCLUSTER'].isin(df_shortcluster[df_shortcluster['count']>=n]['SHORTCLUSTER']))
            ]

            ## Select the first trade (highest ranked) from the sorted group
            selected_trade = group_trades.head(1).reset_index()
            selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)

            ## Add the selected trade's long and short cluster
            ## Long side
            this_longcluster = selected_trade['LONGCLUSTER'][0] ## get the cluster id
            if this_longcluster in df_longcluster['LONGCLUSTER'].values:
                # df_longcluster.loc[df_longcluster['LONGCLUSTER'] == this_longcluster, 'count'] += 1
                df_longcluster['count'] = np.where(df_longcluster['LONGCLUSTER'] == this_longcluster,
                                                   df_longcluster['count']+1,
                                                   df_longcluster['count'])
            else:
                df_longcluster = pd.concat([df_longcluster, pd.DataFrame({'LONGCLUSTER': [this_longcluster], 'count': [1]})], ignore_index=True)
            ## Short side
            this_shortcluster = selected_trade['SHORTCLUSTER'][0] ## get the cluster id
            if this_shortcluster in df_shortcluster['SHORTCLUSTER'].values:
                # df_shortcluster.loc[df_shortcluster['SHORTCLUSTER'] == shortcluster_value, 'count'] += 1
                df_shortcluster['count'] = np.where(df_shortcluster['SHORTCLUSTER'] == this_shortcluster,
                                                    df_shortcluster['count']+1,
                                                    df_shortcluster['count'])
            else:
                df_shortcluster = pd.concat([df_shortcluster, pd.DataFrame({'SHORTCLUSTER': [this_shortcluster], 'count': [1]})], ignore_index=True)

            ## Remove all trades that share the same long/short cluster
            group_trades = group_trades[
                ## Drop trades based on the cluster criteria
                ~(group_trades['LONGCLUSTER'].isin(df_longcluster[df_longcluster['count']>=n]['LONGCLUSTER'])) &
                ~(group_trades['SHORTCLUSTER'].isin(df_shortcluster[df_shortcluster['count']>=n]['SHORTCLUSTER'])) &
                ## Drop trades that share a node we are already trading in that flow month
                (group_trades['SOURCEID'] != selected_trade['SOURCEID'].values[0]) &
                (group_trades['SINKID'] != selected_trade['SOURCEID'].values[0]) &
                (group_trades['SOURCEID'] != selected_trade['SINKID'].values[0]) &
                (group_trades['SINKID'] != selected_trade['SINKID'].values[0])
            ]

    return selected_trades

In [105]:
sorted_auction_months = sorted(df_bids['AUCTION_MONTH'].unique())
df_holdings = pd.DataFrame(columns=df_bids.columns)

for auction_month in sorted_auction_months:

    ## 1) Consider potential trades
    ## 1a) Get broad set of potential trades that we are not already holding
    df_potential_trades = df_bids[
        (df_bids['AUCTION_MONTH'] == auction_month) & 
        (~df_bids['ID_PATH_FLOWMONTH'].isin(df_holdings['ID_PATH_FLOWMONTH']))
    ]

    # df_potential_trades[~df_potential_trades['FUTURE_FW'].isna()] ## don't drop these, just have to hold (unless we remap these nodes)

    ## 1b) Only consider trades with enough opportunities to sell
    df_potential_trades = df_potential_trades[df_potential_trades['FW']>=3]
    if df_potential_trades.empty: continue ## Need this because, for example, standing in March, there are no FWs satisfying FW > 3 (planning year is June-June)

    ## 1c) This is where we select trades / diversify
    ## 1c.i) This function keeps 1 long (and 1 short) trade per cluster
    # df_selected_trades = select_trades_longshort(df_potential_trades)
    df_selected_trades = select_trades_longshort2(df_potential_trades, df_holdings, n=5)

    
    ## 2) See what clears and add it to holdings
    ## Only keep what clears
    df_selected_trades = df_selected_trades[df_selected_trades['CLEARED_ENTRY'] == 1]
    # df_selected_trades = df_potential_trades[df_potential_trades['CLEARED_ENTRY'] == 1]
    
    ## 3) Get where we exit trades
    ## Sort by CLEARED_EXIT and then FUTURE_AUCTION_MONTH. Gives us the first thing to clear or the first record
    df_sorted = df_selected_trades.sort_values(by=['ID_PATH_FLOWMONTH', 'AUCTION_MONTH', 'CLEARED_EXIT', 'FUTURE_AUCTION_MONTH'], ascending=[True, True, False, True])
    df_first_rows = df_sorted.groupby(['ID_PATH_FLOWMONTH', 'AUCTION_MONTH']).first().reset_index()
    ## Handling cases where everything is empty <-- (this shouldn't happen)
    df_first_rows = df_first_rows.dropna(how='all')
    if df_first_rows.empty: 
        print(f"Month with empty df_first_rows: {auction_month}")
        continue

    ## Add the kept trades to holdings
    df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)



  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=Tru

In [106]:
## Get cost and revenue
df_holdings.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER,index
0,-9221460865949686737,PJMISO,ONPEAK,Obligation,32417783,48934179,COMED:34,48934179,COMED,COMED,...,101.803,SELL,1,0.0,9571,-1,1,48934179,COMED:34,176.0
1,-9203997878608118911,PJMISO,OFFPEAK,Obligation,51231,71856699,51231,PENELEC:22,PENELEC,PENELEC,...,-81.249,SELL,0,0.0,9586,-1,1,PENELEC:22,51231,1749.0
2,-9188118450618556672,PJMISO,ONPEAK,Obligation,33092315,32417727,33092315,32417727,COMED,COMED,...,-606.488,SELL,1,1.0,12,-1,1,32417727,33092315,3111.0
3,-9187535238290547254,PJMISO,OFFPEAK,Obligation,35010337,36181313,35010337,DOM:62,,DOM,...,-329.14,SELL,1,1.0,15,-1,1,DOM:62,35010337,3221.0
4,-9181889461913598730,PJMISO,OFFPEAK,Obligation,1552845545,32418569,1552845545,AEP:75,OVEC,AEP,...,-62.552,SELL,1,0.0,7484,-1,1,AEP:75,1552845545,3617.0


In [107]:
df_holdings['FLOWMONTH'].unique()

<DatetimeArray>
['2023-03-01 00:00:00', '2023-05-01 00:00:00', '2023-01-01 00:00:00',
 '2023-02-01 00:00:00', '2023-04-01 00:00:00', '2023-12-01 00:00:00',
 '2023-10-01 00:00:00', '2023-09-01 00:00:00', '2023-11-01 00:00:00']
Length: 9, dtype: datetime64[ns]

## Calculate revenue

In [108]:
df_revenue = df_holdings.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'CLUSTERID_LOW_SRC',
    'CLUSTERID_LOW_SNK',
    'SOURCE_ZONE',
    'SINK_ZONE',
    'FLOWMONTH',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP',
    'BID_ENTRY',
    'ENTRY_TYPE',
    'FLOW_TYPE',
    'CLEARED_ENTRY',
    'OFFER_EXIT',
    'EXIT_TYPE',
    'CLEARED_EXIT'
]).copy()

df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9221460865949686737,PJMISO,ONPEAK,Obligation,32417783,48934179,COMED:34,48934179,COMED,COMED,...,Aug,7.0,102.31,158.503956,BUY,PF,1,101.803,SELL,1
1,-9203997878608118911,PJMISO,OFFPEAK,Obligation,51231,71856699,51231,PENELEC:22,PENELEC,PENELEC,...,Aug,7.0,-245.8,-21.205715,BUY,CF,1,-81.249,SELL,0
2,-9188118450618556672,PJMISO,ONPEAK,Obligation,33092315,32417727,33092315,32417727,COMED,COMED,...,Apr,1.0,-150.49,-223.702793,BUY,CF,1,-606.488,SELL,1
3,-9187535238290547254,PJMISO,OFFPEAK,Obligation,35010337,36181313,35010337,DOM:62,,DOM,...,Aug,5.0,-263.52,152.496994,BUY,CF,1,-329.14,SELL,1
4,-9181889461913598730,PJMISO,OFFPEAK,Obligation,1552845545,32418569,1552845545,AEP:75,OVEC,AEP,...,Aug,5.0,-42.85,-30.326648,BUY,CF,1,-62.552,SELL,1


In [109]:
## Need to assert this
df_revenue[df_revenue['CLEARED_ENTRY']==0].head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT


In [110]:
df_revenue['COST'] = df_revenue['MCP']

df_revenue['REVENUE_MONTH'] = np.where(
    df_revenue['CLEARED_EXIT']==1,
    df_revenue['FUTURE_AUCTION_MONTH'],
    df_revenue['FLOWMONTH']
)

df_revenue['REVENUE'] = np.where(
    df_revenue['CLEARED_EXIT']==1,
    df_revenue['FUTURE_MCP'],
    df_revenue['CONG']
)

df_revenue['PROFIT'] = np.where(
    df_revenue['ENTRY_TYPE'] == 'BUY',
    df_revenue['REVENUE'] - df_revenue['COST'],
    df_revenue['COST'] - df_revenue['REVENUE']
)

In [111]:
df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,COST,REVENUE_MONTH,REVENUE,PROFIT
0,-9221460865949686737,PJMISO,ONPEAK,Obligation,32417783,48934179,COMED:34,48934179,COMED,COMED,...,BUY,PF,1,101.803,SELL,1,78.31,2022-07-01,102.31,24.0
1,-9203997878608118911,PJMISO,OFFPEAK,Obligation,51231,71856699,51231,PENELEC:22,PENELEC,PENELEC,...,BUY,CF,1,-81.249,SELL,0,-116.07,2023-03-01,-388.1115,-272.0415
2,-9188118450618556672,PJMISO,ONPEAK,Obligation,33092315,32417727,33092315,32417727,COMED,COMED,...,BUY,CF,1,-606.488,SELL,1,-758.11,2023-03-01,-150.49,607.62
3,-9187535238290547254,PJMISO,OFFPEAK,Obligation,35010337,36181313,35010337,DOM:62,,DOM,...,BUY,CF,1,-329.14,SELL,1,-470.2,2022-07-01,-263.52,206.68
4,-9181889461913598730,PJMISO,OFFPEAK,Obligation,1552845545,32418569,1552845545,AEP:75,OVEC,AEP,...,BUY,CF,1,-62.552,SELL,1,-89.36,2022-07-01,-42.85,46.51


# Profit and Loss

## PnL by flow month

In [112]:
df_pnl_flowmonth = df_revenue.groupby('FLOWMONTH').agg(
    PROFIT = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

df_pnl_flowmonth = df_pnl_flowmonth.sort_values(by=['FLOWMONTH'])
df_pnl_flowmonth['PROFIT_CUMSUM'] = df_pnl_flowmonth['PROFIT'].cumsum()

df_pnl_flowmonth['PROFIT_SCALED'] = df_pnl_flowmonth['PROFIT'] / df_pnl_flowmonth['TRADE_COUNT']
df_pnl_flowmonth['PROFITSCALED_CUMSUM'] = df_pnl_flowmonth['PROFIT_SCALED'].cumsum()

df_pnl_flowmonth.head(12)

Unnamed: 0,FLOWMONTH,PROFIT,TRADE_COUNT,PROFIT_CUMSUM,PROFIT_SCALED,PROFITSCALED_CUMSUM
0,2023-01-01,298215.4268,965,298215.4268,309.03153,309.03153
1,2023-02-01,228192.2732,1134,526407.7,201.227754,510.259285
2,2023-03-01,61362.7106,1145,587770.4106,53.591887,563.851171
3,2023-04-01,105958.9028,1284,693729.3134,82.52251,646.373681
4,2023-05-01,161944.3392,1502,855673.6526,107.819134,754.192815
5,2023-09-01,-273.7728,177,855399.8798,-1.546739,752.646076
6,2023-10-01,-1433.6036,363,853966.2762,-3.949321,748.696755
7,2023-11-01,-3779.1637,522,850187.1125,-7.239777,741.456978
8,2023-12-01,39630.5876,687,889817.7001,57.686445,799.143423


In [113]:
df_pnl_flowmonth = df_revenue.groupby(['FLOWMONTH']).agg(
    PROFIT_TOTAL = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count'),
    PROFIT_PF = ('PROFIT', lambda x: x[df_revenue['FLOW_TYPE'] == 'PF'].sum()),
    TRADE_COUNT_PF = ('ID_PATH_FLOWMONTH', lambda x: x[df_revenue['FLOW_TYPE'] == 'PF'].count()),
    PROFIT_CF = ('PROFIT', lambda x: x[df_revenue['FLOW_TYPE'] == 'CF'].sum()),
    TRADE_COUNT_CF = ('ID_PATH_FLOWMONTH', lambda x: x[df_revenue['FLOW_TYPE'] == 'CF'].count())
).reset_index()

df_pnl_flowmonth = df_pnl_flowmonth.sort_values(by=['FLOWMONTH'])
df_pnl_flowmonth['PROFIT_TOTAL_CUMSUM'] = df_pnl_flowmonth['PROFIT_TOTAL'].cumsum()
df_pnl_flowmonth['PROFIT_PF_CUMSUM'] = df_pnl_flowmonth['PROFIT_PF'].cumsum()
df_pnl_flowmonth['PROFIT_CF_CUMSUM'] = df_pnl_flowmonth['PROFIT_CF'].cumsum()

## Return per MW
df_pnl_flowmonth['PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_TOTAL'] / df_pnl_flowmonth['TRADE_COUNT']
df_pnl_flowmonth['PF_PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_PF'] / df_pnl_flowmonth['TRADE_COUNT_PF']
df_pnl_flowmonth['CF_PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_CF'] / df_pnl_flowmonth['TRADE_COUNT_CF']


## Try to format?
df_pnl_flowmonth['PROFIT_TOTAL'] = df_pnl_flowmonth['PROFIT_TOTAL'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_TOTAL_CUMSUM'] = df_pnl_flowmonth['PROFIT_TOTAL_CUMSUM'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_PF'] = df_pnl_flowmonth['PROFIT_PF'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_PF_CUMSUM'] = df_pnl_flowmonth['PROFIT_PF_CUMSUM'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_CF'] = df_pnl_flowmonth['PROFIT_CF'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_CF_CUMSUM'] = df_pnl_flowmonth['PROFIT_CF_CUMSUM'].apply(lambda x: "${:,.2f}".format(x))

df_pnl_flowmonth['PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_PER_MW'].apply(lambda x: "$/MW {:,.2f}".format(x))
df_pnl_flowmonth['PF_PROFIT_PER_MW'] = df_pnl_flowmonth['PF_PROFIT_PER_MW'].apply(lambda x: "$/MW {:,.2f}".format(x))
df_pnl_flowmonth['CF_PROFIT_PER_MW'] = df_pnl_flowmonth['CF_PROFIT_PER_MW'].apply(lambda x: "$/MW {:,.2f}".format(x))

df_pnl_flowmonth = df_pnl_flowmonth[
    ['FLOWMONTH', 
     'PROFIT_TOTAL', 'PROFIT_TOTAL_CUMSUM',
     'PROFIT_PER_MW',
     'PROFIT_PF', 'PROFIT_PF_CUMSUM', 'PF_PROFIT_PER_MW',
     'PROFIT_CF', 'PROFIT_CF_CUMSUM', 'CF_PROFIT_PER_MW',
     'TRADE_COUNT', 
     'TRADE_COUNT_PF', 'TRADE_COUNT_CF']
]

df_pnl_flowmonth.head(12)

Unnamed: 0,FLOWMONTH,PROFIT_TOTAL,PROFIT_TOTAL_CUMSUM,PROFIT_PER_MW,PROFIT_PF,PROFIT_PF_CUMSUM,PF_PROFIT_PER_MW,PROFIT_CF,PROFIT_CF_CUMSUM,CF_PROFIT_PER_MW,TRADE_COUNT,TRADE_COUNT_PF,TRADE_COUNT_CF
0,2023-01-01,"$298,215.43","$298,215.43",$/MW 309.03,"$38,627.54","$38,627.54",$/MW 351.16,"$259,587.88","$259,587.88",$/MW 303.61,965,110,855
1,2023-02-01,"$228,192.27","$526,407.70",$/MW 201.23,"$-14,312.32","$24,315.23",$/MW -104.47,"$242,504.59","$502,092.47",$/MW 243.23,1134,137,997
2,2023-03-01,"$61,362.71","$587,770.41",$/MW 53.59,"$-69,363.26","$-45,048.03",$/MW -281.96,"$130,725.97","$632,818.44",$/MW 145.41,1145,246,899
3,2023-04-01,"$105,958.90","$693,729.31",$/MW 82.52,"$-39,821.09","$-84,869.12",$/MW -168.02,"$145,779.99","$778,598.43",$/MW 139.24,1284,237,1047
4,2023-05-01,"$161,944.34","$855,673.65",$/MW 107.82,"$3,298.91","$-81,570.21",$/MW 12.17,"$158,645.43","$937,243.86",$/MW 128.88,1502,271,1231
5,2023-09-01,$-273.77,"$855,399.88",$/MW -1.55,"$-4,099.74","$-85,669.95",$/MW -64.06,"$3,825.97","$941,069.83",$/MW 33.86,177,64,113
6,2023-10-01,"$-1,433.60","$853,966.28",$/MW -3.95,"$-9,817.84","$-95,487.79",$/MW -105.57,"$8,384.24","$949,454.06",$/MW 31.05,363,93,270
7,2023-11-01,"$-3,779.16","$850,187.11",$/MW -7.24,"$-4,402.72","$-99,890.50",$/MW -30.57,$623.55,"$950,077.62",$/MW 1.65,522,144,378
8,2023-12-01,"$39,630.59","$889,817.70",$/MW 57.69,"$12,963.97","$-86,926.53",$/MW 91.30,"$26,666.61","$976,744.23",$/MW 48.93,687,142,545


## PnL by zone

In [30]:
## sometimes zone is missing and that throws off the calculation
print(df_revenue['SOURCE_ZONE'].unique())
print(df_revenue['SINK_ZONE'].unique())

['COMED' 'PENELEC' None 'OVEC' 'PSEG' 'PECO' 'DOM' 'PEPCO' 'DEOK' 'PPL'
 'METED' 'DAY' 'AEP' 'JCPL' 'ATSI' 'APS' 'AECO' 'BGE' 'EKPC' 'DUQ' 'RECO']
['COMED' 'PENELEC' 'DOM' 'AEP' 'PSEG' 'PEPCO' 'BGE' 'PPL' 'DAY' 'APS'
 'METED' 'JCPL' 'ATSI' 'AECO' 'DEOK' None 'EKPC' 'PECO' 'DUQ' 'RECO'
 'OVEC']


In [32]:
# df_pnl_zone = df_revenue.copy().groupby('SINK_ZONE').agg(
#     PROFIT = ('PROFIT', 'sum'),
#     TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
# ).reset_index()

df_pnl_zone = df_revenue.copy().groupby('SOURCE_ZONE').agg(
    PROFIT = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

print(f"Are there duplicate IDs?: {df_revenue.duplicated(subset=['ID_PATH_FLOWMONTH']).sum()}")
print(f"Are there missing profits?: {df_revenue['PROFIT'].isnull().sum()}")

df_pnl_zone['PROFIT_TOTAL'] = df_pnl_zone['PROFIT'].sum()

df_pnl_zone['PROFIT_SCALED'] = df_pnl_zone['PROFIT'] / df_pnl_zone['TRADE_COUNT']

df_pnl_zone.head(50)

Are there duplicate IDs?: 0
Are there missing profits?: 0


Unnamed: 0,SOURCE_ZONE,PROFIT,TRADE_COUNT,PROFIT_TOTAL,PROFIT_SCALED
0,AECO,3201.5774,160,769613.1167,20.009859
1,AEP,173781.12,880,769613.1167,197.478545
2,APS,38744.8856,474,769613.1167,81.740265
3,ATSI,52316.2324,442,769613.1167,118.362517
4,BGE,16701.6941,122,769613.1167,136.899132
5,COMED,76613.9066,936,769613.1167,81.852464
6,DAY,5648.9042,129,769613.1167,43.789955
7,DEOK,26438.2656,208,769613.1167,127.107046
8,DOM,59881.787,711,769613.1167,84.221923
9,DUQ,-3782.5895,55,769613.1167,-68.774355


## PnL in a cash flow sense

In [33]:
df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,COST,REVENUE_MONTH,REVENUE,PROFIT
0,-9221460865949686737,PJMISO,ONPEAK,Obligation,32417783,48934179,COMED:34,48934179,COMED,COMED,...,BUY,PF,1,101.803,SELL,1,78.31,2022-07-01,102.31,24.0
1,-9203997878608118911,PJMISO,OFFPEAK,Obligation,51231,71856699,51231,PENELEC:22,PENELEC,PENELEC,...,BUY,CF,1,-81.249,SELL,0,-116.07,2023-03-01,-388.1115,-272.0415
2,-9188118450618556672,PJMISO,ONPEAK,Obligation,33092315,32417727,33092315,32417727,COMED,COMED,...,BUY,CF,1,-606.488,SELL,1,-758.11,2023-03-01,-150.49,607.62
3,-9187535238290547254,PJMISO,OFFPEAK,Obligation,35010337,36181313,35010337,DOM:62,,DOM,...,BUY,CF,1,-329.14,SELL,1,-470.2,2022-07-01,-263.52,206.68
4,-9181889461913598730,PJMISO,OFFPEAK,Obligation,1552845545,32418569,1552845545,AEP:75,OVEC,AEP,...,BUY,CF,1,-62.552,SELL,1,-89.36,2022-07-01,-42.85,46.51


In [34]:
df_entry = df_revenue.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'ENTRY_TYPE',
    'AUCTION_MONTH',
    'COST'
]).copy()
df_entry.rename(columns={'AUCTION_MONTH': 'MONTH_CF'}, inplace=True)
df_entry.rename(columns={'COST': 'CASHFLOW'}, inplace=True)
## If it was a long FW position, cost is negative (out-flow)
df_entry['CASHFLOW'] = np.where(
    df_entry['ENTRY_TYPE'] == 'SELL',
    df_entry['CASHFLOW'],
    df_entry['CASHFLOW']*(-1)
)

df_exit = df_revenue.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'ENTRY_TYPE',
    'REVENUE_MONTH',
    'REVENUE'
]).copy()
df_exit.rename(columns={'REVENUE_MONTH': 'MONTH_CF'}, inplace=True)
df_exit.rename(columns={'REVENUE': 'CASHFLOW'}, inplace=True)

## If it was a long FW position, revenue is positive (in-flow)
df_exit['CASHFLOW'] = np.where(
    df_exit['ENTRY_TYPE'] == 'BUY',
    df_exit['CASHFLOW'],
    df_exit['CASHFLOW']*(-1)
)

df_pnl_cashflow = pd.concat([df_entry, df_exit]).reset_index(drop=True)
df_pnl_cashflow.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,ENTRY_TYPE,MONTH_CF,CASHFLOW
0,-9221460865949686737,PJMISO,ONPEAK,Obligation,32417783,48934179,2023-03-01,BUY,2022-05-01,-78.31
1,-9203997878608118911,PJMISO,OFFPEAK,Obligation,51231,71856699,2023-03-01,BUY,2022-05-01,116.07
2,-9188118450618556672,PJMISO,ONPEAK,Obligation,33092315,32417727,2023-05-01,BUY,2022-05-01,758.11
3,-9187535238290547254,PJMISO,OFFPEAK,Obligation,35010337,36181313,2023-01-01,BUY,2022-05-01,470.2
4,-9181889461913598730,PJMISO,OFFPEAK,Obligation,1552845545,32418569,2023-01-01,BUY,2022-05-01,89.36


In [35]:
df_pnl_cashflow = df_pnl_cashflow.groupby('MONTH_CF').agg(
    PROFIT = ('CASHFLOW', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

df_pnl_cashflow = df_pnl_cashflow.sort_values(by=['MONTH_CF'])
df_pnl_cashflow['PROFIT_CUMSUM'] = df_pnl_cashflow['PROFIT'].cumsum()

df_pnl_cashflow['PROFIT_SCALED'] = df_pnl_cashflow['PROFIT'] / df_pnl_cashflow['TRADE_COUNT']
df_pnl_cashflow['PROFITSCALED_CUMSUM'] = df_pnl_cashflow['PROFIT_SCALED'].cumsum()

df_pnl_cashflow.head(20)

Unnamed: 0,MONTH_CF,PROFIT,TRADE_COUNT,PROFIT_CUMSUM,PROFIT_SCALED,PROFITSCALED_CUMSUM
0,2022-05-01,108712.14,736,108712.14,147.706712,147.706712
1,2022-06-01,197215.3,825,305927.44,239.048848,386.75556
2,2022-07-01,145653.39,1144,451580.83,127.319397,514.074957
3,2022-08-01,188280.94,796,639861.77,236.533844,750.608802
4,2022-09-01,154997.95,741,794859.72,209.174022,959.782823
5,2022-10-01,85722.1,536,880581.82,159.929291,1119.712114
6,2022-11-01,17257.18,1310,897839.0,13.17342,1132.885534
7,2022-12-01,23508.62,307,921347.62,76.575309,1209.460843
8,2023-01-01,26869.846,615,948217.466,43.690807,1253.15165
9,2023-02-01,-20044.0152,584,928173.4508,-34.321944,1218.829706


In [37]:
print(f"Sum of profit column from flow-month PnL: {df_pnl_flowmonth['PROFIT'].sum()}")
print(f"Sum of profit column from cash-flow-month PnL: {df_pnl_cashflow['PROFIT'].sum()}")

Sum of profit column from flow-month PnL: 798685.4471999999
Sum of profit column from cash-flow-month PnL: 798685.4471999998


# Get portfolios to look at

In [38]:
df_holdings.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER,index
0,-9221460865949686737,PJMISO,ONPEAK,Obligation,32417783,48934179,COMED:34,48934179,COMED,COMED,...,101.803,SELL,1,0.0,9571,-1,1,48934179,COMED:34,176.0
1,-9203997878608118911,PJMISO,OFFPEAK,Obligation,51231,71856699,51231,PENELEC:22,PENELEC,PENELEC,...,-81.249,SELL,0,0.0,9586,-1,1,PENELEC:22,51231,1749.0
2,-9188118450618556672,PJMISO,ONPEAK,Obligation,33092315,32417727,33092315,32417727,COMED,COMED,...,-606.488,SELL,1,1.0,12,-1,1,32417727,33092315,3111.0
3,-9187535238290547254,PJMISO,OFFPEAK,Obligation,35010337,36181313,35010337,DOM:62,,DOM,...,-329.14,SELL,1,1.0,15,-1,1,DOM:62,35010337,3221.0
4,-9181889461913598730,PJMISO,OFFPEAK,Obligation,1552845545,32418569,1552845545,AEP:75,OVEC,AEP,...,-62.552,SELL,1,0.0,7484,-1,1,AEP:75,1552845545,3617.0


In [39]:
flowmonths = sorted(df_holdings['FLOWMONTH'].unique())
print(flowmonths)

## Worst: Nov 2023
## Best: Jan 2023
print(flowmonths[0])
print(flowmonths[7])

[Timestamp('2023-01-01 00:00:00'), Timestamp('2023-02-01 00:00:00'), Timestamp('2023-03-01 00:00:00'), Timestamp('2023-04-01 00:00:00'), Timestamp('2023-05-01 00:00:00'), Timestamp('2023-09-01 00:00:00'), Timestamp('2023-10-01 00:00:00'), Timestamp('2023-11-01 00:00:00'), Timestamp('2023-12-01 00:00:00')]
2023-01-01 00:00:00
2023-11-01 00:00:00


In [42]:
thismonth = flowmonths[7]
print(thismonth)

df_takealook = df_holdings[df_holdings['FLOWMONTH']==thismonth].copy()
df_takealook['PROFIT'] = np.where(
    df_takealook['ENTRY_TYPE']=='BUY',
    (df_takealook['CONG']-df_takealook['MCP']) * df_takealook['CLEARED_ENTRY'],
    (df_takealook['MCP']-df_takealook['CONG'])  * df_takealook['CLEARED_ENTRY']
)
print(df_takealook['PROFIT'].sum())

df_takealook = df_takealook.filter(items=[
    'FLOWMONTH',
    'AUCTION_MONTH',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'ENTRY_TYPE',
    'BID_ENTRY',
    # 'MWS_ROUNDED2'
])

print(df_takealook.dtypes)

## YE auction_dates are one month ahead of VB auction_months
df_takealook['AUCTION_MONTH'] = df_takealook['AUCTION_MONTH'] + pd.DateOffset(months=1)
df_takealook.rename(columns={'AUCTION_MONTH': 'AUCTION_DATE'}, inplace=True)

df_takealook.rename(columns={'FLOWMONTH': 'CONTRACT_STARTDATE'}, inplace=True)
df_takealook.rename(columns={'ENTRY_TYPE': 'TRADE_TYPE'}, inplace=True)
df_takealook.rename(columns={'BID_ENTRY': 'BID'}, inplace=True)
# df_takealook.rename(columns={'MWS_ROUNDED2': 'PATHSIZE'}, inplace=True)
df_takealook['CONTRACT_TYPE'] = "M"

# df_takealook.to_csv('~/Downloads/dc_jan23.csv', index=False)

df_takealook.head()

2023-11-01 00:00:00
-10656.824399999994
FLOWMONTH        datetime64[ns]
AUCTION_MONTH    datetime64[ns]
PEAKTYPE                 object
HEDGETYPE                object
SOURCEID                 object
SINKID                   object
ENTRY_TYPE               object
BID_ENTRY               float64
dtype: object


Unnamed: 0,CONTRACT_STARTDATE,AUCTION_DATE,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,TRADE_TYPE,BID,CONTRACT_TYPE
5287,2023-11-01,2023-06-01,OFF7X8,Obligation,34497125,61482313,BUY,132.402257,M
5289,2023-11-01,2023-06-01,OFF7X8,Obligation,1292916186,2156109456,BUY,15.887283,M
5298,2023-11-01,2023-06-01,ONPEAK,Obligation,50803,31928439,BUY,-139.374413,M
5301,2023-11-01,2023-06-01,ONPEAK,Obligation,40243783,40243775,BUY,-2.480617,M
5305,2023-11-01,2023-06-01,OFF7X8,Obligation,47012281,51300,BUY,-0.052678,M


# whatever