# Total return distributions

We examine the distribution of stock index returns using historical stock price data from 2006 to 2021 and quantify the difference between average returns and typical returns (approximated by a mode or median) for major stock indexes. We show that the log-normal distribution provides a reasonable fit for the total returns for most world stock indexes and highlight the limitations of this model.

Import libraries and load data:

In [None]:
import os
import pickle
import scipy

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from src.analyzer import StockIndexAnalyzer
from src.tables import prepare_table

# PATH TO DATA
FILEPATH = "data/all_indexes_2006-01-01_2021-12-31.pickle"

with open(FILEPATH, "rb") as f:
    data = pickle.load(f)

START_YEAR = 2006
END_YEAR = 2021

mcmc = False # do we want to use MCMC or not

Define groups of indexes:

In [None]:
# Unites States indexes
indexes_usa = ['SPX','CCMP','RIY','RTY','RAY','RLV','RLG','NBI']

# S&P500 sectors
indexes_sp500 = ['S5COND','S5CONS','S5ENRS','S5FINL','S5HLTH','S5INFT','S5MATR','S5TELS','S5UTIL','S5INDU']

# European indexes
indexes_eu = ['DAX','CAC','UKX','BEL20','IBEX','KFX','OMX','SMI']

# Asia Pacific indexes
indexes_apac = ['AS51'] 

# Japan indexes
indexes_jp = ['NKY','TPX']

# BRIC indexes
indexes_bric = ['IBOV','NIFTY','MXIN','SHCOMP','SHSZ300'] 

# All indexes: merge all lists
indexes_all = indexes_usa + indexes_sp500 + indexes_eu + indexes_apac + indexes_jp + indexes_bric

Loop over all indexes to compute empirical statistics (mean, median and mode). The resuts are saved into dictionary.

- Plot a histogram with left and right tails being grouped into cumulative bins.
- Fit it with KDE
- Find emprirical distribution parameters: mean, mode, etc
- Find the best distribution fit
- Plot QQ plots (empirical VS theoretical distribution). 

Optional:

- Plot selected stock price evolution
- Markov chain Monte-Carlo

In [None]:
results_expt = {}   # histogram statistics 
results_mcmc = {}   # fit histogram with MCMC
results_scipy = {}  # fit histogram with scipy

# random seed parametbins = self.bins,er to insure reproducibily of qqplot with seaborn
SEED = 99
np.random.seed(SEED)

for index_name in indexes_all:
    
    print(f"Processing {index_name} index")
    
    # class computes total returns and estimates empirical distribution parameters
    stock_analyzer = StockIndexAnalyzer(prices = data[index_name],
                                        stock_index = index_name,
                                        start_year = START_YEAR,
                                        end_year = END_YEAR)

    # plot empirical histogram with two (left and right) cumulative bins
    stock_analyzer._plot_histogram()

    results_expt[index_name] = stock_analyzer._empirical_distribution_table()

    #results_scipy[index_name] = stock_analyzer._scipy_fit()

    summary = stock_analyzer._fitter_find_best_distribution()

    ### Part 2: Markov Chain Monte Carlo to determine best fit parameters ###
    if mcmc:
        print("Starting MCMC ... ")
        results_mcmc[index_name] = stock_analyzer._pymc3_fit(draws=10000, tune=5000)

        # add results to dictionary
        results_mcmc[index_name]['lognorm error'] = summary.loc['lognorm']['sumsquare_error']
        results_mcmc[index_name]['best distr'] = summary.sort_values('sumsquare_error').iloc[0].name
        results_mcmc[index_name]['best distr error'] = summary.sort_values('sumsquare_error').iloc[0]['sumsquare_error']

    ### Part 3: estimate QQ plot: statsmodels or seaborn ###
    stock_analyzer._generate_qq_plots(package='statsmodels')

Transform results dictionary into DataFrame, save it as latex file in tables folder.

In [None]:
# save results to csv
DIR = 'tables'
os.makedirs(DIR, exist_ok=True)

df_expt = prepare_table(results = results_expt, mode = 'empirical', nyears=stock_analyzer.nyears)
df_expt.to_latex(os.path.join(DIR,"empirical.tex"), index=False)

df_scipy = prepare_table(results = results_scipy, mode = 'scipy', nyears=stock_analyzer.nyears)
df_scipy.to_latex(os.path.join(DIR,"scipy.tex"), index=False)

if mcmc:
    df_mcmc = prepare_table(results = results_mcmc, mode = 'mcmc', nyears=stock_analyzer.nyears)                    
    df_mcmc.to_latex(os.path.join(DIR,"mcmc.tex"), index=False)