In [1]:
import os
import sys
path = '/'.join(os.path.abspath('').split('/')[0:-2])
print(path)
sys.path.append(path)

import pickle
import pandas as pd
import json
import random, tqdm
import numpy as np

from experts.alpha import alpha
from exploration.explore import explore
from exploration.outils import stats

import plotly.express as px

/home/mora/Documents/projects/Bandits


In [2]:
with open('../../../datasharing/tickers.json','r') as file:
    tickers = json.load(file)

tickers = tickers["sp500"]

In [3]:
# Load the pickle file that contains the full data for each stock
with open('../../../datasharing/hist_stock_data.pkl','rb') as file:
    df_hst_tickers = pickle.load(file)

# Chose the label "Close" as the main label to analyse the price series
label = 'Close'
df_close_stocks = pd.DataFrame([])
for key, df in df_hst_tickers.items():
    df_close_stocks = pd.concat([df_close_stocks,
                                 df[label]], axis = 1)

# Create a single data frame that cotains the series data from all the SP500 stocks
df_close_stocks.columns = list(df_hst_tickers.keys())
df_close_stocks['date'] = df_close_stocks.index
df_close_stocks.index = pd.to_datetime(df_close_stocks['date'])


df_close_stocks = pd.DataFrame()

for ticker in tickers:
    srs = df_hst_tickers[ticker][['Close']]
    srs.columns = [ticker]
    df_close_stocks = pd.concat([df_close_stocks,srs], axis = 1)
    
df_close_stocks.reset_index(inplace = True)
df_close_stocks['date'] = pd.to_datetime(df_close_stocks['Date'])

df_close_stocks.set_index('date', inplace = True)
df_2w = df_close_stocks.resample("2W").last()

  df_close_stocks['date'] = df_close_stocks.index
  df_close_stocks.reset_index(inplace = True)
  df_close_stocks['date'] = pd.to_datetime(df_close_stocks['Date'])


In [4]:
# Creating the function that compute the statistics
def get_stats(df_close_stocks, tickers, years = None, inter = None, freq = 'M', min_els = 6):
    """
    This function produces some statistics using a dataframe fille with stocks.
    Precisely the average mean return and the sharpe ratio.
    Args:
        df_hst_tickers (dict) - contains dataframes for multiple stocks
        tickers (list) - the list of tickers to compute the statistics 
        years (list) - the list of years to compute the statistics
        inter (slice) - A slice that precises the data indexs to compute the statistics
        freq (str) - the frequency for the statistics (Y,M,W,2W, ...)
        min_els(int) - the minimum quantity of elements in the series required
            after the frequency postprocessing.
    """
    st_stats = {}
    for ticker in tickers:
        if years:
            tmp_df = df_close_stocks[df_close_stocks['Date'].dt.year.isin(years)]
        elif inter:
            tmp_df = df_close_stocks.iloc[inter]

        pc_m = tmp_df[ticker].pct_change(periods = 1, freq = freq).dropna()

        # discart ticket if not enough data
        if pc_m.shape[0] < min_els:
            continue
            
        av_ret = pc_m.mean()
        sr = av_ret / pc_m.std()
        st_stats[ticker] = {"sr":sr, 'av_ret':av_ret}
            
    return st_stats

def df_and_sort(st_stats, col):
    """
    Function that creates a dataframe using a dictionary with of stocks with statistics
    metrics and sort the dataframe based on the column name precised.
    Args:
        st_stats (dict) - The dictionary containing the statistics.
        col (str) - The name of the column for sorting.
    """
    df_stats = pd.DataFrame(st_stats).transpose()
    df_stats.sort_values(by=col, inplace = True, ascending = False)
    return df_stats


def create_stock_groups(lst_stocks: list, basket_size: int,
                        max_stocks: int):
    """
    Create multiple group of stocks with basket_size elements
    inside each group.
    Args:
        lst_stocks (list) : List of stocks
        basket_size (int): The total element of stocks in
            each basket.
    """
    
    random.shuffle(tst)
    stcks = {'stocks_dict':list(tst), 'groups':{}}

    for i in range(0,max_stocks,basket_size):
        stcks['groups'].update({i:list(tst[i:i+basket_size])})
    
    return stcks


def simulate_year(df_series, years: list, n_sims: int,
                  stock_groups: dict, exploration_rate: float,
                  p_alpha: float):

    df_test = df_series[df_series['Date'].dt.year.isin(years)]
    lines = []

    for group, lst_stocks in stock_groups['groups'].items():

        df_bandits = df_test[lst_stocks].pct_change(1).dropna()

        if df_bandits.shape[0] == 0:
            continue

        arguments = {"exploration_rate": exploration_rate,
                     "n_bandit":df_bandits.shape[1],
                     "steps":df_bandits.shape[0],
                     "alpha":p_alpha,
                     "df_bandits":df_bandits
                    }


        exp = explore(alpha.execute)
        exp.execute(n_sims, **arguments)
        mean, std = stats(exp.rets_compo)
        line = [arguments["alpha"],
                arguments["exploration_rate"],
                mean,
                std]

        lines.append(line)

    df_rets = pd.DataFrame(lines)
    df_rets.columns = ["alpha", "expl", "mean", "std"]
    
    return df_rets.mean()

In [5]:
# Create a group of stocks
max_stocks = 120
stock_groups = {}
basket_size = 5
metric = 'sr' # choose between 'av_ret' or 'sr'

for year in tqdm.tqdm(range(2013,2024)):

    years_tr = list(range(year - 10, year))
    stats_tr = get_stats(df_close_stocks, tickers, years = years_tr)
    df_stats_tr = df_and_sort(stats_tr, metric)
    tst = df_stats_tr.index[0:max_stocks].values
    stock_groups[year] = create_stock_groups(tickers,
                                             basket_size,
                                             max_stocks)

100%|███████████████████████████████████████████| 11/11 [00:42<00:00,  3.90s/it]


In [None]:
final_stats = []

for year in tqdm.tqdm(range(2013,2024)):
    for exploration_rate in np.arange(0.01,0.15,0.01):
        for p_alpha in np.arange(0.005,0.15,0.005):
            st_year = simulate_year(df_2w, [year], n_sims = 5,
                                    stock_groups = stock_groups[year],
                                    exploration_rate = exploration_rate,
                                    p_alpha = p_alpha
                                   )
            tmp_year = {"year":year,"ex_rate":exploration_rate,
                        "alpha":p_alpha, "return": st_year['mean']}
            final_stats.append(tmp_year)

 64%|████████████████████████████                | 7/11 [05:44<03:16, 49.15s/it]

In [None]:
df_res = pd.DataFrame(final_stats)
df_groups = df_res.groupby(by=['ex_rate','alpha']).mean()['return'].reset_index()

In [None]:
path_csv = '../../../datasharing/df_groups_alpha.csv'
df_groups.to_csv(path_csv,index = False)
df_groups = pd.read_csv(path_csv)
df_groups.head()

In [None]:
df_groups['return'].plot()

In [None]:
fig = px.density_heatmap(df_groups, x="ex_rate", y="alpha", z="return",
                         histfunc="avg"
                        )
fig.show()

In [None]:
exploration_rate = 0.01
p_alpha = 0.05

s_final_state = []
for year in tqdm.tqdm(range(2013,2024)):
    st_year = simulate_year(df_2w, [year], n_sims = 5,
                            stock_groups = stock_groups[year],
                            exploration_rate = exploration_rate,
                            p_alpha = p_alpha
                           )
    s_final_state.append(st_year)

In [None]:
df_year = pd.DataFrame(s_final_state)

In [None]:
mean_year = df_year['mean'].mean()
std_year = df_year['mean'].std()
com_ret = (df_year['mean'] + 1).cumprod()
com_ret_last = com_ret.iloc[-1]

In [None]:
print("Mean average return per year ", mean_year)
print("Mean average std per year ", std_year)
print("Return final year ", com_ret_last)
com_ret.plot()