In [201]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_validate, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler, normalize, MinMaxScaler, PowerTransformer

from scipy.stats.mstats import winsorize

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

from datetime import datetime

import seaborn as sns

# Data processing

## Get data

In [202]:
df_raw = pd.read_csv('exploratory.csv')

print(df_raw.dtypes)

ID_PATH_FLOWMONTH           int64
ISO                        object
PEAKTYPE                   object
HEDGETYPE                  object
SOURCEID                    int64
SINKID                      int64
MW_NET                    float64
MW_BUY                    float64
MW_SELL                   float64
DC_MCP_WAVG               float64
DC_AUCTION_TYPE_FIRST      object
DC_AUCTION_MONTH_FIRST     object
FW_DC_FIRST                 int64
FLOWMONTH                  object
AUCTION_ID                 object
AUCTION_MONTH              object
AUCTION_TYPE               object
FW                          int64
MCP                       float64
CONG                      float64
FUTURE_AUCTION_ID          object
FUTURE_AUCTION_MONTH       object
FUTURE_AUCTION_TYPE        object
FUTURE_FW                 float64
FUTURE_MCP                float64
dtype: object


In [203]:
df_raw.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,AUCTION_MONTH,AUCTION_TYPE,FW,MCP,CONG,FUTURE_AUCTION_ID,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,2023-02-01,Mar,1,-0.07,0.2016,PJMISO:Apr:1:2023-03-01,2023-03-01,Apr,0.0,1.05
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-05-01,Jun,2,23.79,46.4896,PJMISO:Aug:1:2023-07-01,2023-07-01,Aug,0.0,19.41
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-05-01,Jun,2,23.79,46.4896,PJMISO:Jul:1:2023-06-01,2023-06-01,Jul,1.0,20.41
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-06-01,Jul,1,20.41,46.4896,PJMISO:Aug:1:2023-07-01,2023-07-01,Aug,0.0,19.41
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,2023-08-01,Sep,3,-5.88,-0.4024,PJMISO:Dec:1:2023-11-01,2023-11-01,Dec,0.0,-4.56


In [204]:
df_raw.describe()

Unnamed: 0,ID_PATH_FLOWMONTH,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,FW_DC_FIRST,FW,MCP,CONG,FUTURE_FW,FUTURE_MCP
count,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2023091.0,2019640.0,2019640.0
mean,3.260349e+16,438486500.0,447223500.0,10.54111,14.28585,3.744738,26.16037,8.869535,5.589818,32.99981,16.19917,2.311615,29.40816
std,5.322809e+18,692626500.0,691540500.0,18.37164,16.40692,8.016405,321.3635,2.451281,2.603479,339.6788,564.0391,2.196493,323.2869
min,-9.223338e+18,48824.0,48824.0,-182.8,0.0,0.0,-5760.43,2.0,1.0,-6740.72,-27043.18,0.0,-10979.64
25%,-4.577779e+18,21601790.0,31065430.0,0.0,4.2,0.0,-25.19202,7.0,4.0,-24.41,-30.0184,0.0,-25.05
50%,8.05898e+16,38368010.0,44460860.0,8.0,9.1,0.0,-3.11,9.0,6.0,1.72,0.2592,2.0,2.11
75%,4.634187e+18,1084391000.0,1084391000.0,16.5,18.8,5.1,16.11641,11.0,8.0,35.15,37.5776,4.0,37.31
max,9.223273e+18,2156114000.0,2156114000.0,471.8,471.8,182.8,13378.85,12.0,11.0,13690.67,26366.86,10.0,17803.15


## Placeholder for subsetting

In [205]:
df_sample = df_raw[
    (df_raw['MW_NET'] != 0) & ## Drop paths that they bought up and totally exited (like an annual flip)
    (df_raw['DC_MCP_WAVG'] != 0) &
    (df_raw['HEDGETYPE'] == 'Obligation')
].copy()

df_sample = df_sample.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'MW_NET',
    'MW_BUY',
    'MW_SELL',
    'DC_MCP_WAVG',
    'DC_AUCTION_TYPE_FIRST',
    'DC_AUCTION_MONTH_FIRST',
    'FW_DC_FIRST',
    'FLOWMONTH',
    # 'AUCTION_ID',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    # 'FUTURE_AUCTION_ID',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP'
])

df_sample.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,FLOWMONTH,AUCTION_MONTH,AUCTION_TYPE,FW,MCP,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,2023-04-01,2023-02-01,Mar,1,-0.07,0.2016,2023-03-01,Apr,0.0,1.05
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-08-01,2023-05-01,Jun,2,23.79,46.4896,2023-07-01,Aug,0.0,19.41
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-08-01,2023-05-01,Jun,2,23.79,46.4896,2023-06-01,Jul,1.0,20.41
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,2023-08-01,2023-06-01,Jul,1,20.41,46.4896,2023-07-01,Aug,0.0,19.41
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,2023-12-01,2023-08-01,Sep,3,-5.88,-0.4024,2023-11-01,Dec,0.0,-4.56


# Apply bid strategy

In [206]:
df_bids = df_sample.copy()
df_bids['AUCTION_MONTH'] = pd.to_datetime(df_bids['AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FUTURE_AUCTION_MONTH'] = pd.to_datetime(df_bids['FUTURE_AUCTION_MONTH'], format='%Y-%m-%d')
df_bids['FLOWMONTH'] = pd.to_datetime(df_bids['FLOWMONTH'], format='%Y-%m-%d')

## Want to bid at a "premium" to DC: means bidding higher on long and bidding lower on short
conditions = [
    (df_bids['MW_BUY'] >= df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] > 0), ## DC Buy, PF MCP --> go long
    (df_bids['MW_BUY'] >= df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] < 0), ## DC Buy, CF MCP --> go short
    (df_bids['MW_BUY'] < df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] > 0), ## DC Sell, PF MCP --> go short
    (df_bids['MW_BUY'] < df_bids['MW_SELL']) & (df_bids['DC_MCP_WAVG'] < 0) ## DC Sell, CF MCP --> go long
]

choices = [
    (df_bids['DC_MCP_WAVG'] * 1.2), ## Long: bid 20% higher than DC's clearing price
    (df_bids['DC_MCP_WAVG'] * 0.8), ## Short: bid 20% lower than DC's clearing price
    (df_bids['DC_MCP_WAVG'] * 0.8), ## Short: bid 20% lower than DC's clearing price
    (df_bids['DC_MCP_WAVG'] * 1.2), ## Long: bid 20% higher than DC's clearing price
]

df_bids['BID_ENTRY'] = np.select(conditions, choices, default=np.nan)
df_bids = df_bids[~df_bids['BID_ENTRY'].isna()]

df_bids['ENTRY_TYPE'] = np.where(df_bids['MW_BUY'] >= df_bids['MW_SELL'], 'BUY', 'SELL')
df_bids['FLOW_TYPE'] = np.where(df_bids['MCP'] >= 0, 'PF', 'CF')

## Get an indicator for whether entry cleared
df_bids['CLEARED_ENTRY'] = np.where(
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['BID_ENTRY'] > df_bids['MCP']) | 
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['BID_ENTRY'] < df_bids['MCP']),
    1,
    0
).astype(int)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,MCP,CONG,FUTURE_AUCTION_MONTH,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,-0.07,0.2016,2023-03-01,Apr,0.0,1.05,-12.912,BUY,CF,0
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,23.79,46.4896,2023-07-01,Aug,0.0,19.41,26.358201,BUY,PF,1
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,23.79,46.4896,2023-06-01,Jul,1.0,20.41,26.358201,BUY,PF,1
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,20.41,46.4896,2023-07-01,Aug,0.0,19.41,26.358201,BUY,PF,1
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,-5.88,-0.4024,2023-11-01,Dec,0.0,-4.56,-21.56,BUY,CF,0


In [207]:
## We can also tabulate exits now. If our bid clear, the OFFER_EXIT is based on MCP (which is entry cost)

# df_potential_trades['OFFER_EXIT'] = df_potential_trades['MCP_ENTRY'] * 1.2
# df_potential_trades['CLEARED_EXIT'] = (df_potential_trades['OFFER_EXIT'] < df_potential_trades['FUTURE_MCP']).astype(int) ## this logic assumes only selling a previous BUY

## Want to try to exit at a greater premium if we are more forward from flow
## If forward >= 2 --> 30% premium, FW1 --> 20%, FW0 --> exit at cost
df_bids['OFFER_PREMIUM'] = np.select(
    [df_bids['FUTURE_FW'] >= 2,
     df_bids['FUTURE_FW'] == 1,
     df_bids['FUTURE_FW'] == 0],
     [30, 20, 0]
) 

## Want to bid at a "premium" to DC: means bidding higher on long and bidding lower on short
conditions_exit = [
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['FLOW_TYPE'] == 'PF'), ## Long
    (df_bids['ENTRY_TYPE'] == 'BUY') & (df_bids['FLOW_TYPE'] == 'CF'), ## Short
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['FLOW_TYPE'] == 'PF'), ## Short
    (df_bids['ENTRY_TYPE'] == 'SELL') & (df_bids['FLOW_TYPE'] == 'CF') ## Long
]

choices_exit = [
    (df_bids['MCP'] * (1 + df_bids['OFFER_PREMIUM']/100)), ## Long: sell for more than we bought
    (df_bids['MCP'] * (1 - df_bids['OFFER_PREMIUM']/100)), ## Short: buy back for less than we bought
    (df_bids['MCP'] * (1 - df_bids['OFFER_PREMIUM']/100)), ## Short: buy back for less than we bought (want to buy it back for less than we sold it)
    (df_bids['MCP'] * (1 + df_bids['OFFER_PREMIUM']/100)), ## Long: sell for more than we bought (want someone to pay us more to take it back)
]

## Get an indicator for whether entry cleared
df_bids['OFFER_EXIT'] = np.select(conditions_exit, choices_exit, default=np.nan)
df_bids['EXIT_TYPE'] = np.where(
    df_bids['ENTRY_TYPE'] == 'BUY', 'SELL', 'BUY'
)
## Get an indicator for whether entry cleared
df_bids['CLEARED_EXIT'] = np.where(
    (df_bids['EXIT_TYPE'] == 'BUY') & (df_bids['OFFER_EXIT'] > df_bids['FUTURE_MCP']) | 
    (df_bids['EXIT_TYPE'] == 'SELL') & (df_bids['OFFER_EXIT'] < df_bids['FUTURE_MCP']),
    1,
    0
).astype(int)

df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,0.0,1.05,-12.912,BUY,CF,0,0,-0.07,SELL,1
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,0.0,19.41,26.358201,BUY,PF,1,0,23.79,SELL,0
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,1.0,20.41,26.358201,BUY,PF,1,20,28.548,SELL,0
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,0.0,19.41,26.358201,BUY,PF,1,0,20.41,SELL,0
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,0.0,-4.56,-21.56,BUY,CF,0,0,-5.88,SELL,1


# Get portfolio outcomes

## Get trades and holdings

In [208]:
## df_bids is really a big set of potential trades

## To look at our portfolio we will do the following iteratively for each AUCTION_MONTH:
## 1) Consider potential trades:
##      a) Consider path-flowmonths that are not in our holdings
##      b) Subset to FW >= 3
## 2) See what clears, add it to *Holdings*
## 3) Get costs and revenue
## 4) Move on to the next AUCTION_MONTH & repeat

In [209]:
df_bids.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9223337983664651781,PJMISO,WEPEAK,Obligation,51252,659771,40.0,40.0,0.0,-16.14,...,0.0,1.05,-12.912,BUY,CF,0,0,-0.07,SELL,1
1,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,0.0,19.41,26.358201,BUY,PF,1,0,23.79,SELL,0
2,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,1.0,20.41,26.358201,BUY,PF,1,20,28.548,SELL,0
3,-9223234284026271016,PJMISO,OFF7X8,Obligation,1369012492,1348264767,16.4,16.4,0.0,21.965167,...,0.0,19.41,26.358201,BUY,PF,1,0,20.41,SELL,0
4,-9223116829435080393,PJMISO,OFF7X8,Obligation,32417779,33092303,30.0,30.0,0.0,-26.95,...,0.0,-4.56,-21.56,BUY,CF,0,0,-5.88,SELL,1


In [210]:
sorted_auction_months = sorted(df_bids['AUCTION_MONTH'].unique())
df_holdings = pd.DataFrame(columns=df_bids.columns)

for auction_month in sorted_auction_months:

    ## 1) Consider potential trades
    ## 1a) Get broad set of potential trades that we are not already holding
    df_potential_trades = df_bids[
        (df_bids['AUCTION_MONTH'] == auction_month) & 
        (~df_bids['ID_PATH_FLOWMONTH'].isin(df_holdings['ID_PATH_FLOWMONTH']))
    ]

    # df_potential_trades[~df_potential_trades['FUTURE_FW'].isna()] ## don't drop these, just have to hold (unless we remap these nodes)

    ## 1b) Only consider trades with enough opportunities to sell
    df_potential_trades = df_potential_trades[df_potential_trades['FW']>=3]
    if df_potential_trades.empty: continue ## Need this because, for example, standing in March, there are no FWs satisfying FW > 3 (planning year is June-June)
    
    ## 2) See what clears and add it to holdings
    ## Only keep what clears
    df_potential_trades = df_potential_trades[df_potential_trades['CLEARED_ENTRY'] == 1]
    
    ## 3) Get where we exit trades
    ## Sort by CLEARED_EXIT and then FUTURE_AUCTION_MONTH. Gives us the first thing to clear or the first record
    df_sorted = df_potential_trades.sort_values(by=['ID_PATH_FLOWMONTH', 'AUCTION_MONTH', 'CLEARED_EXIT', 'FUTURE_AUCTION_MONTH'], ascending=[True, True, False, True])
    df_first_rows = df_sorted.groupby(['ID_PATH_FLOWMONTH', 'AUCTION_MONTH']).first().reset_index()
    ## Handling cases where everything is empty <-- (this shouldn't happen)
    df_first_rows = df_first_rows.dropna(how='all')
    if df_first_rows.empty: 
        print(f"Month with empty df_first_rows: {auction_month}")
        continue

    ## Add the kept trades to holdings
    df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)



  df_holdings = pd.concat([df_holdings, df_first_rows]).reset_index(drop=True)


In [211]:
## Get cost and revenue
df_holdings.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,MW_NET,MW_BUY,MW_SELL,DC_MCP_WAVG,...,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_PREMIUM,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9222893321979192545,PJMISO,OFF7X8,Obligation,71856761,34887787,6.9,6.9,0.0,-47.046907,...,7.0,-26.41,-37.637525,BUY,CF,1,30,-27.93,SELL,1
1,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,-10.0,0.0,10.0,-725.411719,...,1.0,-960.0,-870.494062,SELL,CF,1,20,-748.152,BUY,1
2,-9221875954374263028,PJMISO,WEPEAK,Obligation,51241,1356162213,8.0,8.0,0.0,-13.399222,...,0.0,-15.11,-10.719378,BUY,CF,1,0,-22.94,SELL,1
3,-9221774299757105218,PJMISO,ONPEAK,Obligation,135389799,338269,30.0,30.0,0.0,448.187725,...,9.0,846.35,537.82527,BUY,PF,1,30,584.532,SELL,1
4,-9221237361624466890,PJMISO,ONPEAK,Obligation,50403,2155502045,20.0,20.0,0.0,148.274847,...,6.0,113.13,177.929816,BUY,PF,1,30,109.824,SELL,1


## Calculate revenue

In [212]:
df_revenue = df_holdings.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'AUCTION_MONTH',
    'AUCTION_TYPE',
    'FW',
    'MCP',
    'CONG',
    'FUTURE_AUCTION_MONTH',
    'FUTURE_AUCTION_TYPE',
    'FUTURE_FW',
    'FUTURE_MCP',
    'BID_ENTRY',
    'ENTRY_TYPE',
    'FLOW_TYPE',
    'CLEARED_ENTRY',
    'OFFER_EXIT',
    'EXIT_TYPE',
    'CLEARED_EXIT'
]).copy()

df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,AUCTION_MONTH,AUCTION_TYPE,FW,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT
0,-9222893321979192545,PJMISO,OFF7X8,Obligation,71856761,34887787,2024-05-01,2023-05-01,Jun,11,...,Oct,7.0,-26.41,-37.637525,BUY,CF,1,-27.93,SELL,1
1,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,2023-12-01,2023-05-01,Jun,6,...,Nov,1.0,-960.0,-870.494062,SELL,CF,1,-748.152,BUY,1
2,-9221875954374263028,PJMISO,WEPEAK,Obligation,51241,1356162213,2024-04-01,2023-05-01,Jun,10,...,Apr,0.0,-15.11,-10.719378,BUY,CF,1,-22.94,SELL,1
3,-9221774299757105218,PJMISO,ONPEAK,Obligation,135389799,338269,2024-05-01,2023-05-01,Jun,11,...,Aug,9.0,846.35,537.82527,BUY,PF,1,584.532,SELL,1
4,-9221237361624466890,PJMISO,ONPEAK,Obligation,50403,2155502045,2024-05-01,2023-05-01,Jun,11,...,Nov,6.0,113.13,177.929816,BUY,PF,1,109.824,SELL,1


In [213]:
## Need to assert this
df_revenue[df_revenue['CLEARED_ENTRY']==0].head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,AUCTION_MONTH,AUCTION_TYPE,FW,...,FUTURE_AUCTION_TYPE,FUTURE_FW,FUTURE_MCP,BID_ENTRY,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT


In [214]:
df_revenue['COST'] = df_revenue['MCP']

df_revenue['REVENUE_MONTH'] = np.where(
    df_revenue['CLEARED_EXIT']==1,
    df_revenue['FUTURE_AUCTION_MONTH'],
    df_revenue['FLOWMONTH']
)

df_revenue['REVENUE'] = np.where(
    df_revenue['CLEARED_EXIT']==1,
    df_revenue['FUTURE_MCP'],
    df_revenue['CONG']
)

df_revenue['PROFIT'] = np.where(
    df_revenue['ENTRY_TYPE'] == 'BUY',
    df_revenue['REVENUE'] - df_revenue['COST'],
    df_revenue['COST'] - df_revenue['REVENUE']
)

In [215]:
df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,AUCTION_MONTH,AUCTION_TYPE,FW,...,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,COST,REVENUE_MONTH,REVENUE,PROFIT
0,-9222893321979192545,PJMISO,OFF7X8,Obligation,71856761,34887787,2024-05-01,2023-05-01,Jun,11,...,BUY,CF,1,-27.93,SELL,1,-39.9,2023-09-01,-26.41,13.49
1,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,2023-12-01,2023-05-01,Jun,6,...,SELL,CF,1,-748.152,BUY,1,-623.46,2023-10-01,-960.0,336.54
2,-9221875954374263028,PJMISO,WEPEAK,Obligation,51241,1356162213,2024-04-01,2023-05-01,Jun,10,...,BUY,CF,1,-22.94,SELL,1,-22.94,2024-03-01,-15.11,7.83
3,-9221774299757105218,PJMISO,ONPEAK,Obligation,135389799,338269,2024-05-01,2023-05-01,Jun,11,...,BUY,PF,1,584.532,SELL,1,449.64,2023-07-01,846.35,396.71
4,-9221237361624466890,PJMISO,ONPEAK,Obligation,50403,2155502045,2024-05-01,2023-05-01,Jun,11,...,BUY,PF,1,109.824,SELL,1,84.48,2023-10-01,113.13,28.65


# Profit and Loss

## PnL by flow month

In [216]:
df_pnl_flowmonth = df_revenue.groupby('FLOWMONTH').agg(
    PROFIT = ('PROFIT', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

df_pnl_flowmonth = df_pnl_flowmonth.sort_values(by=['FLOWMONTH'])
df_pnl_flowmonth['PROFIT_CUMSUM'] = df_pnl_flowmonth['PROFIT'].cumsum()

df_pnl_flowmonth['PROFIT_SCALED'] = df_pnl_flowmonth['PROFIT'] / df_pnl_flowmonth['TRADE_COUNT']
df_pnl_flowmonth['PROFITSCALED_CUMSUM'] = df_pnl_flowmonth['PROFIT_SCALED'].cumsum()

df_pnl_flowmonth.head(12)

Unnamed: 0,FLOWMONTH,PROFIT,TRADE_COUNT,PROFIT_CUMSUM,PROFIT_SCALED,PROFITSCALED_CUMSUM
0,2023-09-01,-33429.1412,2507,-33429.1412,-13.33432,-13.33432
1,2023-10-01,-29562.4072,3186,-62991.5484,-9.278847,-22.613168
2,2023-11-01,3272.9162,4070,-59718.6322,0.804156,-21.809011
3,2023-12-01,41890.348,4452,-17828.2842,9.409332,-12.399679
4,2024-01-01,172462.7448,4520,154634.4606,38.155475,25.755796
5,2024-02-01,201279.682,5099,355914.1426,39.474344,65.23014
6,2024-03-01,71780.8148,6532,427694.9574,10.989102,76.219242
7,2024-04-01,128455.8084,7506,556150.7658,17.11375,93.332992
8,2024-05-01,278296.8,7911,834447.5658,35.17846,128.511453


## PnL in a cash flow sense

In [217]:
df_revenue.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,AUCTION_MONTH,AUCTION_TYPE,FW,...,ENTRY_TYPE,FLOW_TYPE,CLEARED_ENTRY,OFFER_EXIT,EXIT_TYPE,CLEARED_EXIT,COST,REVENUE_MONTH,REVENUE,PROFIT
0,-9222893321979192545,PJMISO,OFF7X8,Obligation,71856761,34887787,2024-05-01,2023-05-01,Jun,11,...,BUY,CF,1,-27.93,SELL,1,-39.9,2023-09-01,-26.41,13.49
1,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,2023-12-01,2023-05-01,Jun,6,...,SELL,CF,1,-748.152,BUY,1,-623.46,2023-10-01,-960.0,336.54
2,-9221875954374263028,PJMISO,WEPEAK,Obligation,51241,1356162213,2024-04-01,2023-05-01,Jun,10,...,BUY,CF,1,-22.94,SELL,1,-22.94,2024-03-01,-15.11,7.83
3,-9221774299757105218,PJMISO,ONPEAK,Obligation,135389799,338269,2024-05-01,2023-05-01,Jun,11,...,BUY,PF,1,584.532,SELL,1,449.64,2023-07-01,846.35,396.71
4,-9221237361624466890,PJMISO,ONPEAK,Obligation,50403,2155502045,2024-05-01,2023-05-01,Jun,11,...,BUY,PF,1,109.824,SELL,1,84.48,2023-10-01,113.13,28.65


In [218]:
df_entry = df_revenue.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'ENTRY_TYPE',
    'AUCTION_MONTH',
    'COST'
]).copy()
df_entry.rename(columns={'AUCTION_MONTH': 'MONTH_CF'}, inplace=True)
df_entry.rename(columns={'COST': 'CASHFLOW'}, inplace=True)
## If it was a long FW position, cost is negative (out-flow)
df_entry['CASHFLOW'] = np.where(
    df_entry['ENTRY_TYPE'] == 'SELL',
    df_entry['CASHFLOW'],
    df_entry['CASHFLOW']*(-1)
)

df_exit = df_revenue.filter(items=[
    'ID_PATH_FLOWMONTH',
    'ISO',
    'PEAKTYPE',
    'HEDGETYPE',
    'SOURCEID',
    'SINKID',
    'FLOWMONTH',
    'ENTRY_TYPE',
    'REVENUE_MONTH',
    'REVENUE'
]).copy()
df_exit.rename(columns={'REVENUE_MONTH': 'MONTH_CF'}, inplace=True)
df_exit.rename(columns={'REVENUE': 'CASHFLOW'}, inplace=True)

## If it was a long FW position, revenue is positive (in-flow)
df_exit['CASHFLOW'] = np.where(
    df_exit['ENTRY_TYPE'] == 'BUY',
    df_exit['CASHFLOW'],
    df_exit['CASHFLOW']*(-1)
)

df_pnl_cashflow = pd.concat([df_entry, df_exit]).reset_index(drop=True)
df_pnl_cashflow.head()

Unnamed: 0,ID_PATH_FLOWMONTH,ISO,PEAKTYPE,HEDGETYPE,SOURCEID,SINKID,FLOWMONTH,ENTRY_TYPE,MONTH_CF,CASHFLOW
0,-9222893321979192545,PJMISO,OFF7X8,Obligation,71856761,34887787,2024-05-01,BUY,2023-05-01,39.9
1,-9222514428509594973,PJMISO,ONPEAK,Obligation,50754,1552843818,2023-12-01,SELL,2023-05-01,-623.46
2,-9221875954374263028,PJMISO,WEPEAK,Obligation,51241,1356162213,2024-04-01,BUY,2023-05-01,22.94
3,-9221774299757105218,PJMISO,ONPEAK,Obligation,135389799,338269,2024-05-01,BUY,2023-05-01,-449.64
4,-9221237361624466890,PJMISO,ONPEAK,Obligation,50403,2155502045,2024-05-01,BUY,2023-05-01,-84.48


In [219]:
df_pnl_cashflow = df_pnl_cashflow.groupby('MONTH_CF').agg(
    PROFIT = ('CASHFLOW', 'sum'),
    TRADE_COUNT = ('ID_PATH_FLOWMONTH', 'count')
).reset_index()

df_pnl_cashflow = df_pnl_cashflow.sort_values(by=['MONTH_CF'])
df_pnl_cashflow['PROFIT_CUMSUM'] = df_pnl_cashflow['PROFIT'].cumsum()

df_pnl_cashflow['PROFIT_SCALED'] = df_pnl_cashflow['PROFIT'] / df_pnl_cashflow['TRADE_COUNT']
df_pnl_cashflow['PROFITSCALED_CUMSUM'] = df_pnl_cashflow['PROFIT_SCALED'].cumsum()

df_pnl_cashflow.head(20)

Unnamed: 0,MONTH_CF,PROFIT,TRADE_COUNT,PROFIT_CUMSUM,PROFIT_SCALED,PROFITSCALED_CUMSUM
0,2023-05-01,-482997.61,21995,-482997.61,-21.959428,-21.959428
1,2023-06-01,-145194.56,13533,-628192.17,-10.728926,-32.688354
2,2023-07-01,-4960.98,11723,-633153.15,-0.423183,-33.111537
3,2023-08-01,199458.13,8160,-433695.02,24.443398,-8.668139
4,2023-09-01,170748.8888,7315,-262946.1312,23.342295,14.674156
5,2023-10-01,295144.8228,6924,32198.6916,42.626346,57.300502
6,2023-11-01,-61018.8838,5850,-28820.1922,-10.430578,46.869924
7,2023-12-01,116006.028,4982,87185.8358,23.285032,70.154956
8,2024-01-01,-6406.3852,3239,80779.4506,-1.97789,68.177066
9,2024-02-01,-63093.738,2165,17685.7126,-29.142604,39.034462


In [197]:
print(f"Sum of profit column from flow-month PnL: {df_pnl_flowmonth['PROFIT'].sum()}")
print(f"Sum of profit column from cash-flow-month PnL: {df_pnl_cashflow['PROFIT'].sum()}")

Sum of profit column from flow-month PnL: 834447.5658
Sum of profit column from cash-flow-month PnL: 834447.5658000002
