In [None]:
import os
import random
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import pandas as pd
import numpy as np
import lib.helpers as hlp
import lib.backtest as backtest
import lib.plotting as plotting
import lib.kelly as kelly
import lib.random_path_generation as rpg


In [None]:
historical_df = hlp.get_historical_prices_df()

flow2_cached_results = pd.read_csv(os.path.join(hlp.INPUT_DATA_DIR, "backtested_mc_125.csv"), sep=',')
n_bins = np.linspace(-1, 5, 1000) # Number of bins. Could be also string (e.g. 'auto') or list of bin edges. Whatever complies with np.histogram
monte_carlo_paths = 10000 # Number of paths to use in Monte Carlo Convolution
bonding_curve_resolution = 100 # number of utils on the [0, 1] segment, passed to the get_kelly_curve
# simulated_price_path_len = 100 # number of option life cycles to use for backtest
simulation_length_days = 100 # backtest days
number_of_paths = 20
clusterization_premium_relative_threshold = 0.5

In [None]:
# plot results
hue = 'option_type'# 'strike_pct', 'duration', 'util'
x_axis = "100_percentile_max_drawdown"
y_axis = '50_percentile_cagr'
fig = go.Figure()
fig = px.scatter(flow2_cached_results, x=x_axis, y=y_axis,
                 color=hue,
                 symbol="asset_label")
fig.update_layout(
            
            showlegend=False,
            plot_bgcolor='rgba(0,0,0,0)',
            xaxis_title=x_axis,
            yaxis_title=y_axis,
            xaxis_range=(0, 1)).update_xaxes(showgrid=False).update_yaxes(showgrid=False)


fig.show()

In [None]:
# pick acceptable risk/reward
max_loss_pick = 0.25
max_loss_radius = 0.05
cagr_pick = 0.05
cagr_radius = 0.05

# slice picked instruments
picked_instruments = flow2_cached_results[(flow2_cached_results['100_percentile_max_drawdown'] > max_loss_pick - max_loss_radius) &
                                          (flow2_cached_results['100_percentile_max_drawdown'] < max_loss_pick + max_loss_radius) &
                                          (flow2_cached_results['50_percentile_cagr'] > cagr_pick - cagr_radius) &
                                          (flow2_cached_results['50_percentile_cagr'] < cagr_pick + cagr_radius)
                                           ].reset_index(drop=True)
if 'option_type' not in picked_instruments:
    picked_instruments['option_type'] = 'put'
picked_instruments.drop_duplicates(subset=['asset', 'strike_pct', 'duration', 'option_type'], inplace=True)
print(picked_instruments)

In [None]:
# calculate bonding curves for each instrument
picked_instruments['price_series'] = None
picked_instruments['returns_series'] = None
picked_instruments['kelly_curve_df'] = None
# picked_instruments['fit_params'] = None
picked_instruments['min_premium'] = None
picked_instruments['max_premium'] = None
for index, row in picked_instruments.iterrows():
    
    picked_instruments.at[index, 'price_series'] = historical_df[(historical_df.index > row['date_start']) & 
                                                   (historical_df.index < row['date_end'])][row['asset']]
    picked_instruments.at[index, 'returns_series'] = picked_instruments.at[index, 'price_series'].pct_change()[1:]

    
    picked_instruments.at[index, "kelly_params"] = [
        float(i)
        for i in str(row["kelly_params"]).replace("[", "").replace("]", "").replace(",", '').split(" ") if i != ''
    ]
    picked_instruments.at[index, "kelly_curve_df"] = kelly.get_curve_df(
        *picked_instruments.at[index, "kelly_params"], bonding_curve_resolution
    )

    picked_instruments.at[index, 'left_premium'] = picked_instruments.at[index, 'kelly_curve_df']['premium'].values[0]
    picked_instruments.at[index, 'right_premium'] = picked_instruments.at[index, 'kelly_curve_df']['premium'].values[-1]


In [None]:
# clusterization of the curves
picked_instruments.sort_values(by='left_premium', inplace=True)
picked_instruments['cluster'] = None
cluster_info_dicts = {}
for index, row in picked_instruments.iterrows():
    found_cluster = False
    for key in cluster_info_dicts:
        if row['left_premium'] >= cluster_info_dicts[key]['min_left'] and \
            row['left_premium'] <= cluster_info_dicts[key]['max_left'] and \
            row['right_premium'] >= cluster_info_dicts[key]['min_right'] and \
            row['right_premium'] <= cluster_info_dicts[key]['max_right']:
            picked_instruments.at[index, 'cluster'] = key
            found_cluster = True
            break
        
    if not found_cluster:
        picked_instruments.at[index, 'cluster'] = len(cluster_info_dicts)
        cluster_info_dicts[len(cluster_info_dicts)] = {'min_left': row['left_premium'], 'max_left': row['left_premium'] * (1 + clusterization_premium_relative_threshold), 
                            'min_right': row['right_premium'], 'max_right': row['right_premium'] * (1 + clusterization_premium_relative_threshold)}
        
        
                

In [None]:
# add unique color for each cluster
rgb_int = lambda: random.randint(0,255)

cluster_colors = {}
picked_instruments['color'] = None
for index, row in picked_instruments.iterrows():
    if row['cluster'] in cluster_colors:
        picked_instruments.at[index, 'color'] = cluster_colors[row['cluster']]
    else:
        cluster_colors[row['cluster']] = f'rgb({rgb_int()}, {rgb_int()}, {rgb_int()})'
        picked_instruments.at[index, 'color'] = cluster_colors[row['cluster']]
    

In [None]:
# plot bonding curves for each instrument

fig = go.Figure()
for index, row in picked_instruments.iterrows():
    fig.add_trace(
        go.Scatter(
            x=row['kelly_curve_df']["util"], y=row['kelly_curve_df']["premium"], 
            mode="lines", 
            name=f'Cl={row["cluster"]}_lp={round(row["left_premium"], 3)}_rp={round(row["right_premium"], 3)}',
            line_color=row['color']
        )
    )

fig.show()

In [None]:
# pick the largest cluster
largest_cluster_len = 0
largest_cluster = None
for cluster_number, cluster_size in picked_instruments['cluster'].value_counts().items():
    if cluster_size > largest_cluster_len:
        largest_cluster = cluster_number
        largest_cluster_len = cluster_size
# largest_cluster = 4

print(f'largest_cluster: {largest_cluster}, largest_cluster_len: {largest_cluster_len}')
clustered_instruments = picked_instruments[picked_instruments['cluster']==largest_cluster]
clustered_instruments.reset_index(inplace=True)

print(clustered_instruments)

In [None]:
# random index sampling
returns_df = pd.DataFrame()
assets_proccessed = []
cols = clustered_instruments.index.to_list()
list_of_series = clustered_instruments['returns_series'].values
for col, returns_series in zip(cols, list_of_series):
    if clustered_instruments.at[col, 'asset'] in assets_proccessed:
        continue
    if returns_df.empty:
        returns_df = pd.DataFrame(returns_series.rename(col))
    else:
        returns_df = returns_df.merge(pd.DataFrame(returns_series.rename(col)), how='outer', on='date')
    assets_proccessed.append(clustered_instruments.at[col, 'asset'])
returns_df.dropna(inplace=True)
returns_df.rename(columns=lambda col_name: clustered_instruments.at[col_name, 'asset'], inplace=True)
random_returns_indexes_path_dict = {}
for i in range(number_of_paths):
    random_returns_indexes_path_dict[i] = np.random.randint(0, len(returns_df), simulation_length_days)
# random_returns_indexes = np.random.randint(0, len(returns_df), simulation_length_days)

In [None]:
# Finally create Monte Carlo returns paths and backtest
monte_carlo_generated_returns_df = pd.DataFrame(columns=assets_proccessed) # need only for testing
path_num = 1 # need only for testing
util_dict = {x: 0.1 for x in clustered_instruments.index.values}

clustered_instruments['daily_returns_paths'] = None
clustered_instruments['cycles_returns_paths'] = None
clustered_instruments['payoff_daily'] = None

total_payoff_df = pd.DataFrame()
bankroll_df = pd.DataFrame()

for index, row in clustered_instruments.iterrows():
    # Create Utils lists
    ##########################################################################
    utils = rpg.generate_utils_list(simulation_length_days, 
                                    initial_util=util_dict[index], 
                                    duration=row['duration'], randomize=False)
    ##########################################################################
    # Premiums Calculation
    ##########################################################################
    # chosing fit params of the highest curve over here
    premiums, kelly_curve_df, fit_params = kelly.get_premiums_list_with_all_calculations(
        utils,
        premium_offset=0.,
        fit_params=clustered_instruments.at[len(util_dict)-1, 'kelly_params']
    )
    ##########################################################################
    
    # Create daily returns paths
    ##########################################################################
    path_list = []
    for i in range(number_of_paths):
        returns_path = returns_df.iloc[random_returns_indexes_path_dict[i]][row['asset']].values    
        
        path_list.append(returns_path)
    monte_carlo_generated_returns_df[row['asset']] = path_list[path_num]
    ##########################################################################
    # Convert daily returns path to the option expiration cycles returns path
    ##########################################################################
    daily_paths_df = pd.DataFrame(path_list).T + 1
    cycles_returns_paths_df = pd.DataFrame(columns=daily_paths_df.columns)
    i = 0

    while i + row['duration'] < simulation_length_days:
        cycles_returns_paths_df = cycles_returns_paths_df.append(daily_paths_df[i:i+row['duration']].cumprod().tail(1), ignore_index=True)
        i += row['duration']

    cycles_returns_paths_df -= 1
    
    clustered_instruments.at[index, 'daily_returns_paths'] = daily_paths_df - 1
    clustered_instruments.at[index, 'cycles_returns_paths'] = cycles_returns_paths_df
    ##########################################################################
    # Actual Backtest
    ##########################################################################
    # calculate payoffs at the option expiration with 0 premia
    if clustered_instruments.at[
            index, "option_type"
        ] == 'call':
            payoff_cycles_df = clustered_instruments.at[index, 'cycles_returns_paths'].applymap(
            kelly.call_option_payout, strike=row['strike'], premium=0
        )
    else:
        payoff_cycles_df = clustered_instruments.at[index, 'cycles_returns_paths'].applymap(
            kelly.put_option_payout, strike=row['strike'], premium=0
        )

    # add zeros to the payoffs to account for the days before the expiration
    payoff_cycles_df.index = range(
        row['duration'] - 1, row['duration'] * len(payoff_cycles_df) + row['duration'] - 1, row['duration']
    )
    
    payoff_days_df = payoff_cycles_df.reindex(index=range(simulation_length_days))
    payoff_days_df.fillna(0, inplace=True)
    payoff_days_df = (payoff_days_df.add(premiums, axis=0)).multiply(
        utils, axis=0
    )
    clustered_instruments.at[index, 'payoff_daily'] = payoff_days_df
    ##########################################################################
    
    
    if total_payoff_df.empty:
        total_payoff_df = clustered_instruments.at[index, 'payoff_daily']
    else:
        total_payoff_df += clustered_instruments.at[index, 'payoff_daily']

total_payoff_df = total_payoff_df + 1
bankroll_df = total_payoff_df.cumprod(axis=0)

In [None]:
plotting.plot_bankroll_df(bankroll_df)

In [None]:
# Check the final bankroll stats and compare with the original cluster
max_drawdown_percentiles = backtest.calculate_percentiles(
        backtest.calculate_max_drawdown,
        bankroll_df,
        percentiles=(0, 25, 50, 75, 100),
)
cagr_percentiles = backtest.calculate_percentiles(
    backtest.calculate_cagr, bankroll_df, percentiles=(0, 25, 50, 75, 100)
)

print(f"median_drawdown = {round(max_drawdown_percentiles[2], 2)}")
print(f"median_cagr = {round(cagr_percentiles[2], 2)}")


In [None]:
# plot results
# # pick acceptable risk/reward
# drawdown_pick = -1 
# drawdown_radius = 0.2
# cagr_pick = 0.01
# cagr_radius = 0.01

fig = go.Figure()
fig = px.scatter(flow2_cached_results, x="100_percentile_max_drawdown", y="50_percentile_cagr",
                 symbol="asset_label")
fig.update_layout(
            
            showlegend=False,
            plot_bgcolor='rgba(0,0,0,0)',
            xaxis_title='max_drawdown',
            yaxis_title='cagr', 
            xaxis_range=(0, 1)).update_xaxes(showgrid=False).update_yaxes(showgrid=False)

fig.add_shape(type='line',
                x0=max_loss_pick - max_loss_radius,
                y0=cagr_pick + cagr_radius,
                x1=max_loss_pick + max_loss_radius,
                y1=cagr_pick + cagr_radius,
                line=dict(color='Red',),
                xref='x',
                yref='y'
)

fig.add_shape(type='line',
                x0=max_loss_pick - max_loss_radius,
                y0=cagr_pick - cagr_radius,
                x1=max_loss_pick + max_loss_radius,
                y1=cagr_pick - cagr_radius,
                line=dict(color='Red',),
                xref='x',
                yref='y'
)
fig.add_shape(type='line',
                x0=max_loss_pick - max_loss_radius,
                y0=cagr_pick - cagr_radius,
                x1=max_loss_pick - max_loss_radius,
                y1=cagr_pick + cagr_radius,
                line=dict(color='Red',),
                xref='x',
                yref='y'
)
fig.add_shape(type='line',
                x0=max_loss_pick + max_loss_radius,
                y0=cagr_pick - cagr_radius,
                x1=max_loss_pick + max_loss_radius,
                y1=cagr_pick + cagr_radius,
                line=dict(color='Red',),
                xref='x',
                yref='y'
)

fig.add_shape(type='line',
                x0=max_drawdown_percentiles[4],
                y0=cagr_percentiles[2],
                x1=max_drawdown_percentiles[4]+0.01,
                y1=cagr_percentiles[2]-0.001,
                line=dict(color='Green',width=10),
                xref='x',
                yref='y',
                
)

fig.show()

In [None]:
# KDE plot 
# kde of bankrolls vs drawdowns

kde_df = pd.DataFrame()

for col in bankroll_df:
    max_drawdown = backtest.calculate_max_drawdown(bankroll_df[col])
    cagr = backtest.calculate_cagr(bankroll_df[col])
    kde_df = kde_df.append({'cagr': cagr, 
                            'max_drawdown': max_drawdown}, 
                           ignore_index=True)

sns.kdeplot(data=kde_df, x = 'max_drawdown', y='cagr', 
            color='b', shade=True, cmap='Blues', alpha=0.6)
