In [201]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

In [202]:
# prices = pd.read_csv('round2/prices_round_2_day_1.csv', sep=';')
# trades = pd.read_csv('round2/trades_round_2_day_1_nn.csv', sep=';')
prices = pd.concat([pd.read_csv(f'round2/prices_round_2_day_{i}.csv', sep=';') for i in range(-1, 2)], ignore_index=True)
trades = pd.concat([pd.read_csv(f'round2/trades_round_2_day_{i}_nn.csv', sep=';') for i in range(-1, 2)], ignore_index=True)

In [203]:
prices['swmid'] = (prices['bid_price_1'] * prices['ask_volume_1'] + prices['ask_price_1'] * prices['bid_volume_1']) / (prices['ask_volume_1'] + prices['bid_volume_1'])

In [204]:
croissants = prices[prices["product"] == "CROISSANTS"].reset_index(drop=True).copy()
jams = prices[prices["product"] == "JAMS"].reset_index(drop=True).copy()
djembes = prices[prices["product"] == "DJEMBES"].reset_index(drop=True).copy()
basket1 = prices[prices["product"] == 'PICNIC_BASKET1'].reset_index(drop=True).copy()
basket2 = prices[prices["product"] == 'PICNIC_BASKET2'].reset_index(drop=True).copy()

In [205]:
[croissants, jams, djembes, basket1, basket2] = [df.drop(columns=['product']) for df in [croissants, jams, djembes, basket1, basket2]]

In [206]:
croissants = croissants.rename(columns={col: col + '_croissants' for col in croissants.columns if col not in ['timestamp', 'day']})
jams = jams.rename(columns={col: col + '_jams' for col in jams.columns if col not in ['timestamp', 'day']})
djembes = djembes.rename(columns={col: col + '_djembes' for col in djembes.columns if col not in ['timestamp', 'day']})
basket1 = basket1.rename(columns={col: col + '_basket1' for col in basket1.columns if col not in ['timestamp', 'day']})
basket2 = basket2.rename(columns={col: col + '_basket2' for col in basket2.columns if col not in ['timestamp', 'day']})

`PICNIC_BASKET1` contains three products: 

1. Six (6) `CROISSANTS`
2. Three (3) `JAMS`
3. One (1) `DJEMBE`

`PICNIC_BASKET2` contains just two products: 

1. Four (4) `CROISSANTS`
2. Two (2) `JAMS`

Aside from the Picnic Baskets, you can now also trade the three products individually on the island exchange. 

Position limits for the newly introduced products:

- `CROISSANT`: 250
- `JAM`: 350
- `DJEMBE`: 60
- `PICNIC_BASKET1`: 60
- `PICNIC_BASKET2`: 100

In [207]:
# join croissants, james, djembes, basket1, basket2 on timestamp
mk = croissants.merge(jams, on=['day', 'timestamp'])
mk = mk.merge(djembes, on=['day', 'timestamp'])
mk = mk.merge(basket1, on=['day', 'timestamp'])
mk = mk.merge(basket2, on=['day', 'timestamp'])

In [208]:
synth1_weights = {
    'croissants': 6,
    'jams': 3,
    'djembes': 1,
}

synth2_weights = {
    'croissants': 4,
    'jams': 2,
}

mk['swmid_synth1'] = 0
for product, value in synth1_weights.items():
    mk['swmid_synth1'] += mk[f'swmid_{product}'] * value

mk['swmid_synth2'] = 0
for product, value in synth2_weights.items():
    mk['swmid_synth2'] += mk[f'swmid_{product}'] * value

In [209]:
mk['swmid_spread1'] = mk['swmid_basket1'] - mk['swmid_synth1']
mk['swmid_spread2'] = mk['swmid_basket2'] - mk['swmid_synth2']

In [195]:
mk['spread1_sma'] = mk['swmid_spread1'].rolling(window=300).mean()
mk['spread1_std'] = mk['swmid_spread1'].rolling(window=50).std()
mk['spread1_zscore'] = (mk['swmid_spread1'] - mk['spread1_sma']) / mk['spread1_std']

In [196]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=mk['timestamp'], y=mk['swmid_spread1'], mode='lines', name='SWMid Spread1'))
fig.add_trace(go.Scatter(x=mk['timestamp'], y=mk['spread1_sma'], mode='lines', name='Spread1 SMA'))
fig.add_trace(go.Scatter(x=mk['timestamp'], y=mk['spread1_zscore'], mode='lines', name='Spread1 Z-Score', yaxis='y2'))

fig.update_layout(yaxis2=dict(title='Z Score', overlaying='y', side='right'))
fig.update_layout(title='Spread1 Z-Score', xaxis_title='Timestamp', yaxis_title='Z-Score')
fig.show()

In [None]:
'''
what is the period between these valleys and peaks over here?

if we're trading at such spiky regions, we'd want to sell at peaks and buy at valleys

'''

In [183]:
mk['spread1_zscore'].describe()

count    9701.000000
mean        0.252552
std         3.484058
min       -10.157279
25%        -2.210694
50%         0.050785
75%         2.689442
max        17.789029
Name: spread1_zscore, dtype: float64

day0 spread1 zscore summary
count    9701.000000
mean        0.322448
std         3.353061
min        -9.998620
25%        -2.123352
50%         0.228565
75%         2.708731
max        11.672497
Name: spread1_zscore, dtype: float64

day1
day0 spread1 zscore summary
count    9701.000000
mean        0.322448
std         3.353061
min        -9.998620
25%        -2.123352
50%         0.228565
75%         2.708731
max        11.672497
Name: spread1_zscore, dtype: float64

day1



In [None]:
spread = mk[['timestamp', 'swmid_spread1']].copy()
spread.rename(columns={'swmid_spread1': 'swmid'}, inplace=True)
spread['spread'] = spread['swmid'].copy()

spread2 = mk[['timestamp', 'swmid_spread2']].copy()
spread2.rename(columns={'swmid_spread2': 'swmid'}, inplace=True)
spread2['spread'] = spread2['swmid'].copy()

In [152]:
from tqdm import tqdm

def cross_spread(cash, quantity):
    return cash - abs(quantity) * 10

def backtest(spread, thresh, target_position, std_window, sma_window, verbose=False):
    cash = 0
    position = 0
    pnl_hist = []
    position_hist = []
    cash_hist = []
    spread[f'std{std_window}'] = spread['spread'].rolling(window=std_window).std()
    spread[f'sma{sma_window}'] = spread['spread'].rolling(window=sma_window).mean()
    z_score = (spread['spread'].to_numpy() - spread[f'sma{sma_window}']) / spread[f'std{std_window}'].to_numpy()
    spread['spread_z'] = z_score
    for index, row in spread.iterrows():
        if index == 0:
            continue
        swmid = row['swmid']
        
        if row['spread_z'] > thresh and position != -target_position:
        
            
            quantity = -target_position - position
            cash -= (-target_position - position) * swmid
            cash = cross_spread(cash, quantity)
            position = -target_position
            
            if verbose:
                print(f"SELL {quantity} AT PRICE {swmid} AT TIME {row['timestamp']}")
        
        if row['spread_z'] < -thresh and position != target_position:
            quantity = target_position - position
            cash -= (target_position - position) * swmid
            cash = cross_spread(cash, quantity)
            position = target_position
            
            if verbose:
                print(f"BUY {quantity} FOR PRICE {swmid} AT TIME {row['timestamp']}")
    
        position_hist.append(position)
        cash_hist.append(cash)
        pnl_hist.append(cash + position * swmid)
        
    if verbose:
        print(f"PNL: {pnl_hist[-1]}")
        
    return pnl_hist


In [163]:
position_opt = [60]
thresh_opt = [1,2,3,5,6,7,7.5,8,9,10,15,20,25]
std_window_opt = [10,20,25,30,35,40,50]
sma_window_opt = [10,20,25,30,35,40,50,75, 100, 125, 150, 200, 300, 500]
opt = []
for thresh in tqdm(thresh_opt): 
    for std_window in std_window_opt: 
        for sma_window in sma_window_opt:
            for position in position_opt:
                pnl = backtest(spread2, thresh, position, std_window, sma_window)
                opt.append({"thresh": thresh, "position": position, "std_window": std_window, "sma_window": sma_window, "pnl": pnl})
#                 print("="*80)
#                 print(f"Thresh: {thresh}, Position: {position}, Std Window: {std_window}, PnL: {pnl[-1]}")
#                 print("="*80)

100%|██████████| 13/13 [02:55<00:00, 13.46s/it]


In [164]:
spread['spread'].mean()

np.float64(43.922489021349016)

In [165]:
opt.sort(key=lambda x: x['pnl'][-1], reverse=True)
top_3_pnl = opt[:3]
pnl_graph = top_3_pnl[0]['pnl']

fig = go.Figure()
fig.add_trace(go.Scatter(x=spread.index, y=pnl_graph, mode='lines', name='PnL'))
fig.show()

In [167]:
top_3_pnl = opt[:3]
top_pnl_params = top_3_pnl[0]
for params in top_3_pnl:
    print(f"thresh: {params['thresh']}, std_window: {params['std_window']}, sma_window: {params['sma_window']}, pnl: {params['pnl'][-1]}")

thresh: 8, std_window: 25, sma_window: 125, pnl: 12486.761331593298
thresh: 10, std_window: 25, sma_window: 150, pnl: 11953.98168192296
thresh: 5, std_window: 20, sma_window: 35, pnl: 10530.0


spread1
thresh: 7, std_window: 35, sma_window: 125, pnl: 24212.82352941198
thresh: 7, std_window: 50, sma_window: 200, pnl: 21864.714339038765
thresh: 5, std_window: 50, sma_window: 125, pnl: 21512.814052237227

spread2
thresh: 8, std_window: 25, sma_window: 125, pnl: 12486.761331593298
thresh: 10, std_window: 25, sma_window: 150, pnl: 11953.98168192296
thresh: 5, std_window: 20, sma_window: 35, pnl: 10530.0

In [166]:
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=spread.index, y=pnl_graph, mode='lines', name='PnL'))
# fig.show()

for o in opt[:5]:
    # graph the pnl
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=spread.index, y=o['pnl'], mode='lines', name='PnL'))
    fig.update_layout(title=f"Thresh: {o['thresh']}, Position: {o['position']}, Std Window: {o['std_window']}, Sma Window: {o['sma_window']}, PnL: {o['pnl'][-1]}")
    fig.show()
    