# Stock index statistics

This notebook shows how to compute stock index statistics.

In [None]:
import pandas as pd
import os
import pickle 
from src.analyzer import StockIndexAnalyzer

# load data
FILEPATH = "data/all_indexes_2006-01-01_2021-12-31.pickle"
with open(FILEPATH, "rb") as f:
    data = pickle.load(f)
    
# define groups of indexes
indexes_usa = ['SPX','CCMP','RIY','RTY','RAY','RLV','RLG','NBI']
indexes_sp500 = ['S5COND','S5CONS','S5ENRS','S5FINL','S5HLTH','S5INFT','S5MATR','S5TELS','S5UTIL','S5INDU']#'S5RLST'
indexes_eu = ['DAX','CAC','UKX','BEL20','IBEX','KFX','OMX','SMI']
indexes_apac = ['AS51'] #,'HSI','STI']
indexes_jp = ['NKY','TPX']
indexes_bric = ['IBOV','NIFTY','MXIN','SHCOMP','SHSZ300'] #'RTSI$'

# merge all groups into one
indexes_all = indexes_usa + indexes_sp500 + indexes_eu + indexes_apac + indexes_jp + indexes_bric

In [None]:
#df = data['RTSI$']#['0118440Q UQ']
#df

Loop over all indexes to compute empirical statistics (mean, median and mode). The resuts are saved into dictionary.

In [None]:
start_date = "2006-12-29"
end_date = "2021-12-31"

results_expt = {}   # histogram statistics 
results_mcmc = {}   # fit histogram with MCMC
results_scipy = {}  # fit histogram with scipy

for index_name in indexes_all:
    
    print(f"Processing {index_name} index")
    
    stock_analyzer = StockIndexAnalyzer(prices = data[index_name],
                                        stock_index = index_name,
                                        start_date = start_date,
                                        end_date = end_date)
    
    ### Part 1. Compute empirical stock index distribution parameter ###
    #stock_analyzer.plot_histogram()
    stock_analyzer.plot_histogram_fit(save_data=False)

    results_expt[index_name] = {'category': stock_analyzer.category,
                           'years': stock_analyzer.nyears,
                           'n_stocks': len(stock_analyzer.tickers),
                           'n_stocks_data': len(stock_analyzer.mu),
                           'mean': stock_analyzer.mean_expt,
                           'median': stock_analyzer.median_expt,
                           'mode': stock_analyzer.mode_expt,
                           'mean/median': stock_analyzer.mean_expt/stock_analyzer.median_expt,
                           'mean/mode': stock_analyzer.mean_expt/stock_analyzer.mode_expt}
    
    results_scipy[index_name] = stock_analyzer.compare_stats()
    
    ### Part 2: fit index histogram with MCMC ###
    results_mcmc[index_name] = stock_analyzer.pymc3_fit(draws=10000, tune=5000)
    
    summary = stock_analyzer.find_best_distribution()
    print(summary)
    results_mcmc[index_name]['lognorm error'] = summary.loc['lognorm']['sumsquare_error']
    results_mcmc[index_name]['best distr'] = summary.sort_values('sumsquare_error').iloc[0].name
    results_mcmc[index_name]['best distr error'] = summary.sort_values('sumsquare_error').iloc[0]['sumsquare_error']
    
    ### Part 3: estimate QQ plot
    stock_analyzer.plot_qq()
    stock_analyzer.plot_qq_seaborn()
    #break

Transform results dictionary into DataFrame. Transpose the table to have indexes as rows.

In [None]:
# columns to be rounded
col_dec2 = ['logn mean','logn median','logn mode','logn mu','logn sigma','logn sigma2','C']

df_scipy = pd.DataFrame.from_dict(results_scipy).T
df_scipy[col_dec2] = df_scipy[col_dec2].astype(float).round(2)
df_scipy

In [None]:
# columns to be rounded
col_dec2 = ['logn mean','logn median','logn mode','logn mu','logn sigma','logn sigma2','C']
col_dec3 = ['muh','sigmah','sigma']
col_dec4 = ['lognorm error', 'best distr error']

df_mcmc = pd.DataFrame.from_dict(results_mcmc).T
df_mcmc = df_mcmc.drop(['muh std','sigma std', 'sigmah std'], axis=1)
df_mcmc[col_dec2] = df_mcmc[col_dec2].astype(float).round(2)
df_mcmc[col_dec3] = df_mcmc[col_dec3].astype(float).round(3)
df_mcmc[col_dec4] = df_mcmc[col_dec4].astype(float).round(4)
df_mcmc

In [None]:
# columns to be rounded
col_dec = ['mean','mode','median','mean/median','mean/mode']

df_expt = pd.DataFrame.from_dict(results_expt).T
df_expt[col_dec] = df_expt[col_dec].astype(float).round(2)
df_expt

In [None]:
#df_expt.to_latex()

In [None]:
#df_mcmc.to_latex()

Save dataframe with the results table into CSV file.

In [None]:
DIR = 'results'
os.makedirs(DIR, exist_ok=True)

df_expt.to_csv(DIR+f'/data_emprirical_{stock_analyzer.nyears}.csv', header=True)
df_mcmc.to_csv(DIR+f'/data_mcmc_{stock_analyzer.nyears}.csv', header=True)
df_scipy.to_csv(DIR+f'/data_scipy_{stock_analyzer.nyears}.csv', header=True)