In [204]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_validate, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler, normalize, MinMaxScaler, PowerTransformer

from scipy.stats.mstats import winsorize

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

from datetime import datetime

import seaborn as sns

# Data processing

## Get data

In [205]:
df_raw = pd.read_csv('exploratory2.csv')

print(df_raw.dtypes)

ID_PATH_FLOWMONTH           int64
ISO                        object
PEAKTYPE                   object
HEDGETYPE                  object
SOURCEID                    int64
SINKID                      int64
CLUSTERID_LOW_SRC          object
CLUSTERID_LOW_SNK          object
SOURCE_ZONE                object
SINK_ZONE                  object
MW_NET                    float64
MW_BUY                    float64
MW_SELL                   float64
DC_MCP_WAVG               float64
DC_AUCTION_TYPE_FIRST      object
DC_AUCTION_MONTH_FIRST     object
FW_DC_FIRST                 int64
FLOWMONTH                  object
AUCTION_ID                 object
AUCTION_MONTH              object
AUCTION_TYPE               object
FW                          int64
MCP                       float64
CONG                      float64
FUTURE_AUCTION_ID          object
FUTURE_AUCTION_MONTH       object
FUTURE_AUCTION_TYPE        object
FUTURE_FW                 float64
FUTURE_MCP                float64
MAXMCP_LAST18 

In [206]:
df_raw.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_FW,FUTURE_MCP,MAXMCP_LAST18,MINPOSMCP_LAST18,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,MEDIANCONG,MEDIANCONG_INSEASON,CONG_LOW_T4,CONG_TOP_T4
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0.0,1.05,7.95,0.83,0.68,0.32,-3.888,-10.1472,-279.1856,8.4324
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0.0,19.41,162.527273,12.7,0.0,1.0,16.452,8.2636,-17.4509,147.895175
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,1.0,20.41,162.527273,12.7,0.0,1.0,16.452,8.2636,-17.4509,147.895175
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0.0,19.41,162.527273,12.7,0.44,0.56,18.22,8.2636,-17.4509,147.895175
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,COMED:76,COMED:268,COMED,COMED,...,0.0,-4.56,5.98,0.277021,0.96,0.04,-3.89,-7.2012,-52.92265,5.036275


In [207]:
df_raw.describe()

Unnamed: 0,ID_PATH_FLOWMONTH,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,FW_DC_FIRST,FW,MCP,...,FUTURE_FW,FUTURE_MCP,MAXMCP_LAST18,MINPOSMCP_LAST18,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,MEDIANCONG,MEDIANCONG_INSEASON,CONG_LOW_T4,CONG_TOP_T4
count,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,...,2019640.0,2019640.0,2018881.0,1780339.0,2023091.0,2023091.0,2023091.0,2014954.0,1969123.0,1985510.0
mean,3.260349e+16,438486500.0,447223500.0,10.54111,14.28585,3.744738,26.16037,8.869535,5.589818,32.99981,...,2.311615,29.40816,269.4552,46.05098,0.4197102,0.5802898,20.43855,24.46035,-386.7724,416.8736
std,5.322809e+18,692626500.0,691540500.0,18.37164,16.40692,8.016405,321.3635,2.451281,2.603479,339.6788,...,2.196493,323.2869,792.1377,178.2004,0.2970548,0.2970548,273.2802,331.9475,1075.307,944.5375
min,-9.223338e+18,48824.0,48824.0,-182.8,0.0,0.0,-5760.43,2.0,1.0,-6740.72,...,0.0,-10979.64,-3804.93,0.00341333,0.0,0.0,-6821.336,-8599.746,-53812.3,0.0136
25%,-4.577779e+18,21601790.0,31065430.0,0.0,4.2,0.0,-25.19202,7.0,4.0,-24.41,...,0.0,-25.05,17.66,2.17,0.16,0.333333,-14.2296,-21.602,-393.7804,52.6196
50%,8.05898e+16,38368010.0,44460860.0,8.0,9.1,0.0,-3.11,9.0,6.0,1.72,...,2.0,2.11,74.51,7.78,0.44,0.56,0.952,0.96,-147.2692,150.7448
75%,4.634187e+18,1084391000.0,1084391000.0,16.5,18.8,5.1,16.11641,11.0,8.0,35.15,...,4.0,37.31,237.11,26.9,0.666667,0.84,20.6688,28.26,-47.55593,400.3368
max,9.223273e+18,2156114000.0,2156114000.0,471.8,471.8,182.8,13378.85,12.0,11.0,13690.67,...,10.0,17803.15,23610.44,5497.15,1.0,1.0,9951.554,16284.71,-0.008,23747.1


## Placeholder for subsetting

In [208]:
df_sample = df_raw[
    (df_raw['MW_NET'] != 0) & ## Drop paths that they bought up and totally exited (like an annual flip)
    (df_raw['DC_MCP_WAVG'] != 0) &
    (df_raw['HEDGETYPE'] == 'Obligation') &
    (df_raw['SINK_ZONE'] != 'DPL') &
    (df_raw['SOURCE_ZONE'] != 'DPL')
].copy()

df_sample = df_sample.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'CLUSTERID_LOW_SRC',
    'CLUSTERID_LOW_SNK',
    'SOURCE_ZONE',
    'SINK_ZONE',
    'MW_NET',
    'MW_BUY',
    'MW_SELL',
    'DC_MCP_WAVG',
    'DC_AUCTION_TYPE_FIRST',
    'DC_AUCTION_MONTH_FIRST',
    'FW_DC_FIRST',
    'FLOWMONTH',
    # 'AUCTION_ID',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    # 'FUTURE_AUCTION_ID',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP',
    # 'MAXMCP_LAST18',
    # 'MINPOSMCP_LAST18',
    'WINRATE_BUY_LASTMARK',
    'WINRATE_SELL_LASTMARK',
    # 'MEDIANCONG',
    # 'MEDIANCONG_INSEASON',
    # 'CONG_LOW_T4',
    # 'CONG_TOP_T4'
])

df_sample.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,AUCTION_TYPE,FW,MCP,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,Mar,1,-0.07,0.2016,2023-03-01,Apr,0.0,1.05,0.68,0.32
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,Jun,2,23.79,46.4896,2023-07-01,Aug,0.0,19.41,0.0,1.0
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,Jun,2,23.79,46.4896,2023-06-01,Jul,1.0,20.41,0.0,1.0
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,Jul,1,20.41,46.4896,2023-07-01,Aug,0.0,19.41,0.44,0.56
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,COMED:76,COMED:268,COMED,COMED,...,Sep,3,-5.88,-0.4024,2023-11-01,Dec,0.0,-4.56,0.96,0.04


# Apply bid strategy

In [209]:
df_bids = df_sample.copy()
df_bids['AUCTION_MONTH'] = pd.to_datetime(df_bids['AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FUTURE_AUCTION_MONTH'] = pd.to_datetime(df_bids['FUTURE_AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FLOWMONTH'] = pd.to_datetime(df_bids['FLOWMONTH'], format='%Y-%m-%d')

## Want to bid at a "premium" to DC: means bidding higher on long and bidding lower on short
conditions = [
    (df_bids['MW_BUY'] >= df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] > 0), ## DC Buy, PF MCP --> go long
    (df_bids['MW_BUY'] >= df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] < 0), ## DC Buy, CF MCP --> go short
    (df_bids['MW_BUY'] < df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] > 0), ## DC Sell, PF MCP --> go short
    (df_bids['MW_BUY'] < df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] < 0) ## DC Sell, CF MCP --> go long
]

choices = [
    (df_bids['DC_MCP_WAVG'] * 1.2), ## Long: bid 20% higher than DC's clearing price
    (df_bids['DC_MCP_WAVG'] * 0.8), ## Short: bid 20% lower than DC's clearing price
    (df_bids['DC_MCP_WAVG'] * 0.8), ## Short: bid 20% lower than DC's clearing price
    (df_bids['DC_MCP_WAVG'] * 1.2), ## Long: bid 20% higher than DC's clearing price
]

df_bids['BID_ENTRY'] = np.select(conditions, choices, default=np.nan)
df_bids = df_bids[~df_bids['BID_ENTRY'].isna()]

df_bids['ENTRY_TYPE'] = np.where(df_bids['MW_BUY'] >= df_bids['MW_SELL'], 'BUY', 'SELL')
df_bids['FLOW_TYPE'] = np.where(df_bids['MCP'] >= 0, 'PF', 'CF')

## Get an indicator for whether entry cleared
df_bids['CLEARED_ENTRY'] = np.where(
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['BID_ENTRY'] > df_bids['MCP']) | 
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['BID_ENTRY'] < df_bids['MCP']),
    1,
    0
).astype(int)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,2023-03-01,Apr,0.0,1.05,0.68,0.32,-12.912,BUY,CF,0
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,2023-07-01,Aug,0.0,19.41,0.0,1.0,26.358201,BUY,PF,1
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,2023-06-01,Jul,1.0,20.41,0.0,1.0,26.358201,BUY,PF,1
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,2023-07-01,Aug,0.0,19.41,0.44,0.56,26.358201,BUY,PF,1
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,COMED:76,COMED:268,COMED,COMED,...,2023-11-01,Dec,0.0,-4.56,0.96,0.04,-21.56,BUY,CF,0


In [210]:
## We can also tabulate exits now. If our bid clear, the OFFER_EXIT is based on MCP (which is entry cost)

# df_potential_trades['OFFER_EXIT'] = df_potential_trades['MCP_ENTRY'] * 1.2
# df_potential_trades['CLEARED_EXIT'] = (df_potential_trades['OFFER_EXIT'] < df_potential_trades['FUTURE_MCP']).astype(int) ## this logic assumes only selling a previous BUY

## Want to try to exit at a greater premium if we are more forward from flow
## If forward >= 2 --> 30% premium, FW1 --> 20%, FW0 --> exit at cost
df_bids['OFFER_PREMIUM'] = np.select(
    [df_bids['FUTURE_FW'] >= 2,
     df_bids['FUTURE_FW'] == 1,
     df_bids['FUTURE_FW'] == 0],
     [30, 20, 0]
) 

## Want to bid at a "premium" to DC: means bidding higher on long and bidding lower on short
conditions_exit = [
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['FLOW_TYPE'] == 'PF'), ## Long
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['FLOW_TYPE'] == 'CF'), ## Short
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['FLOW_TYPE'] == 'PF'), ## Short
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['FLOW_TYPE'] == 'CF') ## Long
]

choices_exit = [
    (df_bids['MCP'] * (1 + df_bids['OFFER_PREMIUM']/100)), ## Long: sell for more than we bought
    (df_bids['MCP'] * (1 - df_bids['OFFER_PREMIUM']/100)), ## Short: buy back for less than we bought
    (df_bids['MCP'] * (1 - df_bids['OFFER_PREMIUM']/100)), ## Short: buy back for less than we bought (want to buy it back for less than we sold it)
    (df_bids['MCP'] * (1 + df_bids['OFFER_PREMIUM']/100)), ## Long: sell for more than we bought (want someone to pay us more to take it back)
]

## Get an indicator for whether entry cleared
df_bids['OFFER_EXIT'] = np.select(conditions_exit, choices_exit, default=np.nan)
df_bids['EXIT_TYPE'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY', 'SELL', 'BUY'
)
## Get an indicator for whether entry cleared
df_bids['CLEARED_EXIT'] = np.where(
    (df_bids['EXIT_TYPE'] == 'BUY') & (df_bids['OFFER_EXIT'] > df_bids['FUTURE_MCP']) | 
    (df_bids['EXIT_TYPE'] == 'SELL') & (df_bids['OFFER_EXIT'] < df_bids['FUTURE_MCP']),
    1,
    0
).astype(int)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,WINRATE_BUY_LASTMARK,WINRATE_SELL_LASTMARK,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0.68,0.32,-12.912,BUY,CF,0,0,-0.07,SELL,1
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0.0,1.0,26.358201,BUY,PF,1,0,23.79,SELL,0
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0.0,1.0,26.358201,BUY,PF,1,20,28.548,SELL,0
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0.44,0.56,26.358201,BUY,PF,1,0,20.41,SELL,0
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,COMED:76,COMED:268,COMED,COMED,...,0.96,0.04,-21.56,BUY,CF,0,0,-5.88,SELL,1


In [211]:
df_bids['WINRATE'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY',
    df_bids['WINRATE_BUY_LASTMARK'],
    df_bids['WINRATE_SELL_LASTMARK']
)

df_bids['RANK_WINRATE'] = df_bids.groupby(['AUCTION_MONTH','FLOWMONTH'])['WINRATE'].rank(method="first", ascending=False).astype(int)

In [212]:
## Want to know whether Long / Short a given cluster
## Long / Short is determined by whether entered as a BUY or a SELL

df_bids['LONGSHORT_SRC'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY',
    -1, ## short the source if you buy
    +1 ## long the source if you sell
)

df_bids['LONGSHORT_SNK'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY',
    +1, ## long the sink if you buy
    -1 ## short the sink if you sell
)

df_bids['LONGCLUSTER'] = np.where(
    df_bids['LONGSHORT_SNK'] == 1,
    df_bids['CLUSTERID_LOW_SNK'],
    df_bids['CLUSTERID_LOW_SRC']   
)

df_bids['SHORTCLUSTER'] = np.where(
    df_bids['LONGSHORT_SNK'] == -1,
    df_bids['CLUSTERID_LOW_SNK'],
    df_bids['CLUSTERID_LOW_SRC']   
)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0,-0.07,SELL,1,0.68,495,-1,1,PPL:92,51252
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0,23.79,SELL,0,0.0,2183,-1,1,AEP:274,ATSI:86
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,20,28.548,SELL,0,0.0,2184,-1,1,AEP:274,ATSI:86
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0,20.41,SELL,0,0.44,2861,-1,1,AEP:274,ATSI:86
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,COMED:76,COMED:268,COMED,COMED,...,0,-5.88,SELL,1,0.96,359,-1,1,COMED:268,COMED:76


# Get portfolio outcomes

## Get trades and holdings

In [213]:
## df_bids is really a big set of potential trades

## To look at our portfolio we will do the following iteratively for each AUCTION_MONTH:
## 1) Consider potential trades:
##      a) Consider path-flowmonths that are not in our holdings
##      b) Subset to FW >= 3
## 2) See what clears, add it to *Holdings*
## 3) Get costs and revenue
## 4) Move on to the next AUCTION_MONTH & repeat

In [214]:
df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,51252,PPL:92,PPL,PPL,...,0,-0.07,SELL,1,0.68,495,-1,1,PPL:92,51252
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0,23.79,SELL,0,0.0,2183,-1,1,AEP:274,ATSI:86
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,20,28.548,SELL,0,0.0,2184,-1,1,AEP:274,ATSI:86
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,ATSI:86,AEP:274,ATSI,AEP,...,0,20.41,SELL,0,0.44,2861,-1,1,AEP:274,ATSI:86
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,COMED:76,COMED:268,COMED,COMED,...,0,-5.88,SELL,1,0.96,359,-1,1,COMED:268,COMED:76


In [215]:
## A function to itertively keep trades based on the following diversification criteria:
##      Remove trades we already have 
##      Only allow 1 Long and 1 Short trade per cluster-flowmonth

def select_trades_longshort(df):

    ## Sort the trade set by our ranking
    df = df.sort_values(by=['RANK_WINRATE', 'AUCTION_MONTH', 'FLOWMONTH'])

    ## Initialize an empty df to store the selected trades
    selected_trades = pd.DataFrame(columns=df.columns)

    ## Iterate through unique 'FLOWMONTH' groups
    for (auction, flow) in df.groupby(['AUCTION_MONTH', 'FLOWMONTH']).groups.keys():
        ## Get all trades for the current group
        group_trades = df[(df['AUCTION_MONTH'] == auction) & (df['FLOWMONTH'] == flow)].copy()

        ## While there are still potential trades in the group
        while not group_trades.empty:
            ## Select the first trade (highest ranked) from the sorted group
            selected_trade = group_trades.head(1).reset_index()
            selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)

            ## Don't need this if we just drop similar trades immediately
            # ## Add the selected trade's long cluster and short cluster to those dfs
            # longcluster = np.where(
            #     selected_trade['LONGSHORT_SNK'] == 1,
            #     selected_trade['CLUSTERID_LOW_SNK'],
            #     selected_trade['CLUSTERID_LOW_SRC']
            # )
            # df_temp_longclusters = pd.concat([df_temp_longclusters, longcluster], ignore_index=True)

            # shortcluster = np.where(
            #     selected_trade['LONGSHORT_SNK'] == -1,
            #     selected_trade['CLUSTERID_LOW_SNK'],
            #     selected_trade['CLUSTERID_LOW_SRC']
            # )
            # df_temp_shortclusters = pd.concat([df_temp_shortclusters, shortcluster], ignore_index=True)

            ## Remove all trades that share the same long/short cluster
            group_trades = group_trades[
                ## Drop trades based on the cluster criteria
                (group_trades['LONGCLUSTER'] != selected_trade['LONGCLUSTER'].values[0]) &
                (group_trades['SHORTCLUSTER'] != selected_trade['SHORTCLUSTER'].values[0]) &
                ## Drop trades that share a node we are already trading in that flow month
                (group_trades['SOURCEID'] != selected_trade['SOURCEID'].values[0]) &
                (group_trades['SINKID'] != selected_trade['SOURCEID'].values[0]) &
                (group_trades['SOURCEID'] != selected_trade['SINKID'].values[0]) &
                (group_trades['SINKID'] != selected_trade['SINKID'].values[0])
            ]

    return selected_trades

In [216]:
sorted_auction_months = sorted(df_bids['AUCTION_MONTH'].unique())
df_holdings = pd.DataFrame(columns=df_bids.columns)

for auction_month in sorted_auction_months:

    ## 1) Consider potential trades
    ## 1a) Get broad set of potential trades that we are not already holding
    df_potential_trades = df_bids[
        (df_bids['AUCTION_MONTH'] == auction_month) & 
        (~df_bids['ID_PATH_FLOWMONTH'].isin(df_holdings['ID_PATH_FLOWMONTH']))
    ]

    # df_potential_trades[~df_potential_trades['FUTURE_FW'].isna()] ## don't drop these, just have to hold (unless we remap these nodes)

    ## 1b) Only consider trades with enough opportunities to sell
    df_potential_trades = df_potential_trades[df_potential_trades['FW']>=3]
    if df_potential_trades.empty: continue ## Need this because, for example, standing in March, there are no FWs satisfying FW > 3 (planning year is June-June)

    ## 1c) This is where we select trades / diversify
    ## 1c.i) This function keeps 1 long (and 1 short) trade per cluster
    df_selected_trades = select_trades_longshort(df_potential_trades)

    
    ## 2) See what clears and add it to holdings
    ## Only keep what clears
    df_selected_trades = df_selected_trades[df_selected_trades['CLEARED_ENTRY'] == 1]
    # df_selected_trades = df_potential_trades[df_potential_trades['CLEARED_ENTRY'] == 1]
    
    ## 3) Get where we exit trades
    ## Sort by CLEARED_EXIT and then FUTURE_AUCTION_MONTH. Gives us the first thing to clear or the first record
    df_sorted = df_selected_trades.sort_values(by=['ID_PATH_FLOWMONTH', 'AUCTION_MONTH', 'CLEARED_EXIT', 'FUTURE_AUCTION_MONTH'], ascending=[True, True, False, True])
    df_first_rows = df_sorted.groupby(['ID_PATH_FLOWMONTH', 'AUCTION_MONTH']).first().reset_index()
    ## Handling cases where everything is empty <-- (this shouldn't happen)
    df_first_rows = df_first_rows.dropna(how='all')
    if df_first_rows.empty: 
        print(f"Month with empty df_first_rows: {auction_month}")
        continue

    ## Add the kept trades to holdings
    df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)



  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)
  selected_trades = pd.concat([selected_trades, selected_trade], ignore_index=True)


In [217]:
## Get cost and revenue
df_holdings.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER,index
0,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,METED:39,METED:39,METED,METED,...,-810.498,BUY,0,1.0,1,1,-1,METED:39,METED:39,80.0
1,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,AEP:331,AEP:81,AEP,AEP,...,17.472,SELL,0,0.0,9437,-1,1,AEP:81,AEP:331,358.0
2,-9220066552863087576,PJMISO,WEPEAK,Obligation,27677106,19623499,APS:26,19623499,APS,APS,...,-84.292,BUY,1,1.0,1,1,-1,APS:26,19623499,460.0
3,-9219936064532111128,PJMISO,OFF7X8,Obligation,50641,1048039,PPL:52,PPL:21,PPL,PPL,...,26.628,BUY,1,1.0,1,1,-1,PPL:52,PPL:21,524.0
4,-9212397093433755705,PJMISO,OFF7X8,Obligation,51231,50769,51231,PENELEC:47,PENELEC,PENELEC,...,-35.61,BUY,1,1.0,11,1,-1,51231,PENELEC:47,1359.0


In [218]:
df_holdings['FLOWMONTH'].unique()

<DatetimeArray>
['2023-12-01 00:00:00', '2024-03-01 00:00:00', '2024-02-01 00:00:00',
 '2023-10-01 00:00:00', '2024-04-01 00:00:00', '2024-05-01 00:00:00',
 '2023-09-01 00:00:00', '2023-11-01 00:00:00', '2024-01-01 00:00:00']
Length: 9, dtype: datetime64[ns]

## Calculate revenue

In [219]:
df_revenue = df_holdings.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'CLUSTERID_LOW_SRC',
    'CLUSTERID_LOW_SNK',
    'SOURCE_ZONE',
    'SINK_ZONE',
    'FLOWMONTH',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP',
    'BID_ENTRY',
    'ENTRY_TYPE',
    'FLOW_TYPE',
    'CLEARED_ENTRY',
    'OFFER_EXIT',
    'EXIT_TYPE',
    'CLEARED_EXIT'
]).copy()

df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,METED:39,METED:39,METED,METED,...,Aug,4.0,-760.07,-870.494062,SELL,CF,1,-810.498,BUY,0
1,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,AEP:331,AEP:81,AEP,AEP,...,Aug,7.0,3.36,14.435875,BUY,PF,1,17.472,SELL,0
2,-9220066552863087576,PJMISO,WEPEAK,Obligation,27677106,19623499,APS:26,19623499,APS,APS,...,Aug,6.0,-87.02,-113.545756,SELL,CF,1,-84.292,BUY,1
3,-9219936064532111128,PJMISO,OFF7X8,Obligation,50641,1048039,PPL:52,PPL:21,PPL,PPL,...,Aug,2.0,1.81,-43.531898,SELL,PF,1,26.628,BUY,1
4,-9212397093433755705,PJMISO,OFF7X8,Obligation,51231,50769,51231,PENELEC:47,PENELEC,PENELEC,...,Apr,0.0,-218.94,-122.617058,SELL,CF,1,-35.61,BUY,1


In [220]:
## Need to assert this
df_revenue[df_revenue['CLEARED_ENTRY']==0].head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT


In [221]:
df_revenue['COST'] = df_revenue['MCP']

df_revenue['REVENUE_MONTH'] = np.where(
    df_revenue['CLEARED_EXIT']==1,
    df_revenue['FUTURE_AUCTION_MONTH'],
    df_revenue['FLOWMONTH']
)

df_revenue['REVENUE'] = np.where(
    df_revenue['CLEARED_EXIT']==1,
    df_revenue['FUTURE_MCP'],
    df_revenue['CONG']
)

df_revenue['PROFIT'] = np.where(
    df_revenue['ENTRY_TYPE'] == 'BUY',
    df_revenue['REVENUE'] - df_revenue['COST'],
    df_revenue['COST'] - df_revenue['REVENUE']
)

In [222]:
df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,COST,REVENUE_MONTH,REVENUE,PROFIT
0,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,METED:39,METED:39,METED,METED,...,SELL,CF,1,-810.498,BUY,0,-623.46,2023-12-01,-344.0512,-279.4088
1,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,AEP:331,AEP:81,AEP,AEP,...,BUY,PF,1,17.472,SELL,0,13.44,2024-03-01,14.0992,0.6592
2,-9220066552863087576,PJMISO,WEPEAK,Obligation,27677106,19623499,APS:26,19623499,APS,APS,...,SELL,CF,1,-84.292,BUY,1,-64.84,2023-07-01,-87.02,22.18
3,-9219936064532111128,PJMISO,OFF7X8,Obligation,50641,1048039,PPL:52,PPL:21,PPL,PPL,...,SELL,PF,1,26.628,BUY,1,38.04,2023-07-01,1.81,36.23
4,-9212397093433755705,PJMISO,OFF7X8,Obligation,51231,50769,51231,PENELEC:47,PENELEC,PENELEC,...,SELL,CF,1,-35.61,BUY,1,-35.61,2024-03-01,-218.94,183.33


# Profit and Loss

## PnL by flow month

In [223]:
df_pnl_flowmonth = df_revenue.groupby('FLOWMONTH').agg(
    PROFIT = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

df_pnl_flowmonth = df_pnl_flowmonth.sort_values(by=['FLOWMONTH'])
df_pnl_flowmonth['PROFIT_CUMSUM'] = df_pnl_flowmonth['PROFIT'].cumsum()

df_pnl_flowmonth['PROFIT_SCALED'] = df_pnl_flowmonth['PROFIT'] / df_pnl_flowmonth['TRADE_COUNT']
df_pnl_flowmonth['PROFITSCALED_CUMSUM'] = df_pnl_flowmonth['PROFIT_SCALED'].cumsum()

df_pnl_flowmonth.head(12)

Unnamed: 0,FLOWMONTH,PROFIT,TRADE_COUNT,PROFIT_CUMSUM,PROFIT_SCALED,PROFITSCALED_CUMSUM
0,2023-09-01,-3114.4768,148,-3114.4768,-21.043762,-21.043762
1,2023-10-01,457.7308,280,-2656.746,1.634753,-19.409009
2,2023-11-01,-10921.1203,429,-13577.8663,-25.457157,-44.866166
3,2023-12-01,15226.3956,563,1648.5293,27.045108,-17.821059
4,2024-01-01,66356.1248,696,68004.6541,95.33926,77.518201
5,2024-02-01,49426.2096,812,117430.8637,60.869716,138.387917
6,2024-03-01,29606.0645,962,147036.9282,30.775535,169.163452
7,2024-04-01,29057.8308,1144,176094.759,25.400202,194.563654
8,2024-05-01,56503.3604,1300,232598.1194,43.464123,238.027777


In [224]:
df_pnl_flowmonth = df_revenue.groupby(['FLOWMONTH']).agg(
    PROFIT_TOTAL = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count'),
    PROFIT_PF = ('PROFIT', lambda x: x[df_revenue['FLOW_TYPE'] == 'PF'].sum()),
    TRADE_COUNT_PF = ('ID_PATH_FLOWMONTH', lambda x: x[df_revenue['FLOW_TYPE'] == 'PF'].count()),
    PROFIT_CF = ('PROFIT', lambda x: x[df_revenue['FLOW_TYPE'] == 'CF'].sum()),
    TRADE_COUNT_CF = ('ID_PATH_FLOWMONTH', lambda x: x[df_revenue['FLOW_TYPE'] == 'CF'].count())
).reset_index()

df_pnl_flowmonth = df_pnl_flowmonth.sort_values(by=['FLOWMONTH'])
df_pnl_flowmonth['PROFIT_TOTAL_CUMSUM'] = df_pnl_flowmonth['PROFIT_TOTAL'].cumsum()
df_pnl_flowmonth['PROFIT_PF_CUMSUM'] = df_pnl_flowmonth['PROFIT_PF'].cumsum()
df_pnl_flowmonth['PROFIT_CF_CUMSUM'] = df_pnl_flowmonth['PROFIT_CF'].cumsum()

## Return per MW
df_pnl_flowmonth['PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_TOTAL'] / df_pnl_flowmonth['TRADE_COUNT']
df_pnl_flowmonth['PF_PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_PF'] / df_pnl_flowmonth['TRADE_COUNT_PF']
df_pnl_flowmonth['CF_PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_CF'] / df_pnl_flowmonth['TRADE_COUNT_CF']


## Try to format?
df_pnl_flowmonth['PROFIT_TOTAL'] = df_pnl_flowmonth['PROFIT_TOTAL'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_TOTAL_CUMSUM'] = df_pnl_flowmonth['PROFIT_TOTAL_CUMSUM'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_PF'] = df_pnl_flowmonth['PROFIT_PF'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_PF_CUMSUM'] = df_pnl_flowmonth['PROFIT_PF_CUMSUM'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_CF'] = df_pnl_flowmonth['PROFIT_CF'].apply(lambda x: "${:,.2f}".format(x))
df_pnl_flowmonth['PROFIT_CF_CUMSUM'] = df_pnl_flowmonth['PROFIT_CF_CUMSUM'].apply(lambda x: "${:,.2f}".format(x))

df_pnl_flowmonth['PROFIT_PER_MW'] = df_pnl_flowmonth['PROFIT_PER_MW'].apply(lambda x: "$/MW {:,.2f}".format(x))
df_pnl_flowmonth['PF_PROFIT_PER_MW'] = df_pnl_flowmonth['PF_PROFIT_PER_MW'].apply(lambda x: "$/MW {:,.2f}".format(x))
df_pnl_flowmonth['CF_PROFIT_PER_MW'] = df_pnl_flowmonth['CF_PROFIT_PER_MW'].apply(lambda x: "$/MW {:,.2f}".format(x))

df_pnl_flowmonth = df_pnl_flowmonth[
    ['FLOWMONTH', 
     'PROFIT_TOTAL', 'PROFIT_TOTAL_CUMSUM',
     'PROFIT_PER_MW',
     'PROFIT_PF', 'PROFIT_PF_CUMSUM', 'PF_PROFIT_PER_MW',
     'PROFIT_CF', 'PROFIT_CF_CUMSUM', 'CF_PROFIT_PER_MW',
     'TRADE_COUNT', 
     'TRADE_COUNT_PF', 'TRADE_COUNT_CF']
]

df_pnl_flowmonth.head(12)

Unnamed: 0,FLOWMONTH,PROFIT_TOTAL,PROFIT_TOTAL_CUMSUM,PROFIT_PER_MW,PROFIT_PF,PROFIT_PF_CUMSUM,PF_PROFIT_PER_MW,PROFIT_CF,PROFIT_CF_CUMSUM,CF_PROFIT_PER_MW,TRADE_COUNT,TRADE_COUNT_PF,TRADE_COUNT_CF
0,2023-09-01,"$-3,114.48","$-3,114.48",$/MW -21.04,"$-1,328.65","$-1,328.65",$/MW -17.72,"$-1,785.83","$-1,785.83",$/MW -24.46,148,75,73
1,2023-10-01,$457.73,"$-2,656.75",$/MW 1.63,"$3,070.36","$1,741.71",$/MW 25.80,"$-2,612.63","$-4,398.46",$/MW -16.23,280,119,161
2,2023-11-01,"$-10,921.12","$-13,577.87",$/MW -25.46,"$-7,156.67","$-5,414.96",$/MW -41.13,"$-3,764.45","$-8,162.91",$/MW -14.76,429,174,255
3,2023-12-01,"$15,226.40","$1,648.53",$/MW 27.05,"$-4,230.81","$-9,645.77",$/MW -21.81,"$19,457.20","$11,294.29",$/MW 52.73,563,194,369
4,2024-01-01,"$66,356.12","$68,004.65",$/MW 95.34,"$58,752.75","$49,106.98",$/MW 211.34,"$7,603.38","$18,897.67",$/MW 18.19,696,278,418
5,2024-02-01,"$49,426.21","$117,430.86",$/MW 60.87,"$24,244.66","$73,351.65",$/MW 81.63,"$25,181.55","$44,079.22",$/MW 48.90,812,297,515
6,2024-03-01,"$29,606.06","$147,036.93",$/MW 30.78,"$11,783.49","$85,135.13",$/MW 34.06,"$17,822.58","$61,901.80",$/MW 28.93,962,346,616
7,2024-04-01,"$29,057.83","$176,094.76",$/MW 25.40,"$13,006.81","$98,141.94",$/MW 29.83,"$16,051.02","$77,952.82",$/MW 22.67,1144,436,708
8,2024-05-01,"$56,503.36","$232,598.12",$/MW 43.46,"$44,745.20","$142,887.14",$/MW 93.03,"$11,758.16","$89,710.98",$/MW 14.36,1300,481,819


## PnL by zone

In [153]:
## sometimes zone is missing and that throws off the calculation
print(df_revenue['SOURCE_ZONE'].unique())
print(df_revenue['SINK_ZONE'].unique())

['METED' 'AEP' 'APS' 'PPL' 'PENELEC' 'EKPC' 'DPL' 'COMED' 'DOM' 'DEOK'
 'BGE' None 'DAY' 'ATSI' 'PECO' 'PEPCO' 'PSEG' 'OVEC' 'DUQ' 'JCPL' 'AECO'
 'RECO']
['METED' 'AEP' 'APS' 'PPL' 'PENELEC' 'DPL' 'COMED' 'DOM' 'DEOK' 'BGE'
 'AECO' None 'PEPCO' 'ATSI' 'PSEG' 'JCPL' 'PECO' 'DAY' 'RECO' 'EKPC' 'DUQ'
 'OVEC']


In [227]:
df_pnl_zone = df_revenue.copy().groupby('SINK_ZONE').agg(
    PROFIT = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

# df_pnl_zone = df_revenue.copy().groupby('SOURCE_ZONE').agg(
#     PROFIT = ('PROFIT', 'sum'),
#     TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
# ).reset_index()

print(f"Are there duplicate IDs?: {df_revenue.duplicated(subset=['ID_PATH_FLOWMONTH']).sum()}")
print(f"Are there missing profits?: {df_revenue['PROFIT'].isnull().sum()}")

df_pnl_zone['PROFIT_TOTAL'] = df_pnl_zone['PROFIT'].sum()

df_pnl_zone['PROFIT_SCALED'] = df_pnl_zone['PROFIT'] / df_pnl_zone['TRADE_COUNT']

df_pnl_zone.head(50)

Are there duplicate IDs?: 0
Are there missing profits?: 0


Unnamed: 0,SINK_ZONE,PROFIT,TRADE_COUNT,PROFIT_TOTAL,PROFIT_SCALED
0,AECO,1365.8533,184,221333.8795,7.423116
1,AEP,60021.134,891,221333.8795,67.363787
2,APS,30066.3968,339,221333.8795,88.691436
3,ATSI,15031.6409,387,221333.8795,38.841449
4,BGE,6466.6505,165,221333.8795,39.191821
5,COMED,32706.3414,799,221333.8795,40.934094
6,DAY,5556.6531,121,221333.8795,45.922753
7,DEOK,5458.3808,211,221333.8795,25.869103
8,DOM,20083.2488,814,221333.8795,24.672296
9,DUQ,-491.2684,47,221333.8795,-10.452519


## PnL in a cash flow sense

In [156]:
df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,COST,REVENUE_MONTH,REVENUE,PROFIT
0,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,METED:39,METED:39,METED,METED,...,SELL,CF,1,-810.498,BUY,0,-623.46,2023-12-01,-344.0512,-279.4088
1,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,AEP:331,AEP:81,AEP,AEP,...,BUY,PF,1,17.472,SELL,0,13.44,2024-03-01,14.0992,0.6592
2,-9220066552863087576,PJMISO,WEPEAK,Obligation,27677106,19623499,APS:26,19623499,APS,APS,...,SELL,CF,1,-84.292,BUY,1,-64.84,2023-07-01,-87.02,22.18
3,-9219936064532111128,PJMISO,OFF7X8,Obligation,50641,1048039,PPL:52,PPL:21,PPL,PPL,...,SELL,PF,1,26.628,BUY,1,38.04,2023-07-01,1.81,36.23
4,-9212397093433755705,PJMISO,OFF7X8,Obligation,51231,50769,51231,PENELEC:47,PENELEC,PENELEC,...,SELL,CF,1,-35.61,BUY,1,-35.61,2024-03-01,-218.94,183.33


In [157]:
df_entry = df_revenue.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'ENTRY_TYPE',
    'AUCTION_MONTH',
    'COST'
]).copy()
df_entry.rename(columns={'AUCTION_MONTH': 'MONTH_CF'}, inplace=True)
df_entry.rename(columns={'COST': 'CASHFLOW'}, inplace=True)
## If it was a long FW position, cost is negative (out-flow)
df_entry['CASHFLOW'] = np.where(
    df_entry['ENTRY_TYPE'] == 'SELL',
    df_entry['CASHFLOW'],
    df_entry['CASHFLOW']*(-1)
)

df_exit = df_revenue.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'ENTRY_TYPE',
    'REVENUE_MONTH',
    'REVENUE'
]).copy()
df_exit.rename(columns={'REVENUE_MONTH': 'MONTH_CF'}, inplace=True)
df_exit.rename(columns={'REVENUE': 'CASHFLOW'}, inplace=True)

## If it was a long FW position, revenue is positive (in-flow)
df_exit['CASHFLOW'] = np.where(
    df_exit['ENTRY_TYPE'] == 'BUY',
    df_exit['CASHFLOW'],
    df_exit['CASHFLOW']*(-1)
)

df_pnl_cashflow = pd.concat([df_entry, df_exit]).reset_index(drop=True)
df_pnl_cashflow.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,ENTRY_TYPE,MONTH_CF,CASHFLOW
0,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,2023-12-01,SELL,2023-05-01,-623.46
1,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,2024-03-01,BUY,2023-05-01,-13.44
2,-9220066552863087576,PJMISO,WEPEAK,Obligation,27677106,19623499,2024-02-01,SELL,2023-05-01,-64.84
3,-9219936064532111128,PJMISO,OFF7X8,Obligation,50641,1048039,2023-10-01,SELL,2023-05-01,38.04
4,-9212397093433755705,PJMISO,OFF7X8,Obligation,51231,50769,2024-04-01,SELL,2023-05-01,-35.61


In [158]:
df_pnl_cashflow = df_pnl_cashflow.groupby('MONTH_CF').agg(
    PROFIT = ('CASHFLOW', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

df_pnl_cashflow = df_pnl_cashflow.sort_values(by=['MONTH_CF'])
df_pnl_cashflow['PROFIT_CUMSUM'] = df_pnl_cashflow['PROFIT'].cumsum()

df_pnl_cashflow['PROFIT_SCALED'] = df_pnl_cashflow['PROFIT'] / df_pnl_cashflow['TRADE_COUNT']
df_pnl_cashflow['PROFITSCALED_CUMSUM'] = df_pnl_cashflow['PROFIT_SCALED'].cumsum()

df_pnl_cashflow.head(20)

Unnamed: 0,MONTH_CF,PROFIT,TRADE_COUNT,PROFIT_CUMSUM,PROFIT_SCALED,PROFITSCALED_CUMSUM
0,2023-05-01,-2250.29,1361,-2250.29,-1.653409,-1.653409
1,2023-06-01,16563.19,1175,14312.9,14.096332,12.442923
2,2023-07-01,29309.14,1870,43622.04,15.673337,28.11626
3,2023-08-01,29946.54,872,73568.58,34.342362,62.458622
4,2023-09-01,28033.9068,858,101602.4868,32.673551,95.132173
5,2023-10-01,6671.2428,770,108273.7296,8.663952,103.796125
6,2023-11-01,-1422.0918,1669,106851.6378,-0.852062,102.944062
7,2023-12-01,-15900.8292,564,90950.8086,-28.19296,74.751103
8,2024-01-01,-15494.018,566,75456.7906,-27.37459,47.376513
9,2024-02-01,-11316.3912,450,64140.3994,-25.147536,22.228977


In [159]:
print(f"Sum of profit column from flow-month PnL: {df_pnl_flowmonth['PROFIT'].sum()}")
print(f"Sum of profit column from cash-flow-month PnL: {df_pnl_cashflow['PROFIT'].sum()}")

Sum of profit column from flow-month PnL: 204848.42220000003
Sum of profit column from cash-flow-month PnL: 204848.42220000003


# Get portfolios to look at

In [225]:
df_holdings.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,CLUSTERID_LOW_SRC,CLUSTERID_LOW_SNK,SOURCE_ZONE,SINK_ZONE,...,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,WINRATE,RANK_WINRATE,LONGSHORT_SRC,LONGSHORT_SNK,LONGCLUSTER,SHORTCLUSTER,index
0,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,METED:39,METED:39,METED,METED,...,-810.498,BUY,0,1.0,1,1,-1,METED:39,METED:39,80.0
1,-9220810376192278441,PJMISO,ONPEAK,Obligation,40243783,32418595,AEP:331,AEP:81,AEP,AEP,...,17.472,SELL,0,0.0,9437,-1,1,AEP:81,AEP:331,358.0
2,-9220066552863087576,PJMISO,WEPEAK,Obligation,27677106,19623499,APS:26,19623499,APS,APS,...,-84.292,BUY,1,1.0,1,1,-1,APS:26,19623499,460.0
3,-9219936064532111128,PJMISO,OFF7X8,Obligation,50641,1048039,PPL:52,PPL:21,PPL,PPL,...,26.628,BUY,1,1.0,1,1,-1,PPL:52,PPL:21,524.0
4,-9212397093433755705,PJMISO,OFF7X8,Obligation,51231,50769,51231,PENELEC:47,PENELEC,PENELEC,...,-35.61,BUY,1,1.0,11,1,-1,51231,PENELEC:47,1359.0


In [229]:
flowmonths = sorted(df_holdings['FLOWMONTH'].unique())
print(flowmonths)

## Worst: Nov 2023
## Best: Jan 2023
print(flowmonths[2])
print(flowmonths[4])

[Timestamp('2023-09-01 00:00:00'), Timestamp('2023-10-01 00:00:00'), Timestamp('2023-11-01 00:00:00'), Timestamp('2023-12-01 00:00:00'), Timestamp('2024-01-01 00:00:00'), Timestamp('2024-02-01 00:00:00'), Timestamp('2024-03-01 00:00:00'), Timestamp('2024-04-01 00:00:00'), Timestamp('2024-05-01 00:00:00')]
2023-11-01 00:00:00
2024-01-01 00:00:00


In [None]:
thismonth = flowmonths[1]
print(thismonth)

df_takealook = df_holdings[df_holdings['FLOWMONTH']==flowmonths[1]].copy()
df_takealook['PROFIT'] = np.where(
    df_takealook['ENTRY_TYPE']=='BUY',
    (df_takealook['CONG']-df_takealook['MCP']) * df_takealook['CLEARED_ENTRY'],
    (df_takealook['MCP']-df_takealook['CONG'])  * df_takealook['CLEARED_ENTRY']
)
print(df_takealook['PROFIT'].sum())

df_takealook = df_takealook.filter(items=[
    'FLOWMONTH',
    'AUCTION_MONTH',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'ENTRY_TYPE',
    'BID_ENTRY',
    # 'MWS_ROUNDED2'
])

print(df_takealook.dtypes)

## YE auction_dates are one month ahead of VB auction_months
df_takealook['AUCTION_MONTH'] = df_takealook['AUCTION_MONTH'] + pd.DateOffset(months=1)
df_takealook.rename(columns={'AUCTION_MONTH': 'AUCTION_DATE'}, inplace=True)

df_takealook.rename(columns={'FLOWMONTH': 'CONTRACT_STARTDATE'}, inplace=True)
df_takealook.rename(columns={'ENTRY_TYPE': 'TRADE_TYPE'}, inplace=True)
df_takealook.rename(columns={'BID_ENTRY': 'BID'}, inplace=True)
# df_takealook.rename(columns={'MWS_ROUNDED2': 'PATHSIZE'}, inplace=True)
df_takealook['CONTRACT_TYPE'] = "M"

# df_takealook.to_csv('~/Downloads/sasha_bidstrat_Aug24.csv', index=False)

df_takealook.head()

2023-10-01 00:00:00
-53669.2396
FLOWMONTH        datetime64[ns]
AUCTION_MONTH    datetime64[ns]
PEAKTYPE                 object
HEDGETYPE                object
SOURCEID                 object
SINKID                   object
ENTRY_TYPE               object
BID_ENTRY               float64
dtype: object


Unnamed: 0,CONTRACT_STARTDATE,AUCTION_DATE,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,TRADE_TYPE,BID,CONTRACT_TYPE
8,2023-10-01,2023-06-01,OFF7X8,Obligation,50641,1048039,SELL,-43.531898,M
17,2023-10-01,2023-06-01,ONPEAK,Obligation,33092311,1268571933,BUY,163.796453,M
23,2023-10-01,2023-06-01,WEPEAK,Obligation,1348263397,2156109754,BUY,-12.038309,M
26,2023-10-01,2023-06-01,ONPEAK,Obligation,1123180723,32412317,SELL,20.339806,M
42,2023-10-01,2023-06-01,OFF7X8,Obligation,34497127,123901555,BUY,114.756304,M
