## Appendix: average volatility vs correlation on monthly data

Load data

In [1]:
import pickle
import pandas as pd
import numpy as np

import plotly.graph_objects as go
import plotly.io as pio

from sklearn.linear_model import HuberRegressor

import warnings
from sklearn.exceptions import DataConversionWarning

warnings.filterwarnings("ignore", category=DataConversionWarning)

with open('data/correlation_statistics_mean_monthly.pkl', 'rb') as file:
    data = pickle.load(file)

Plot all indices on separate plots and fit the scatter plot with Hubert regression.

Notations: 

    - red dashed line: maximum correlation coefficient.
    - black solid line: Huber linear regression.

In [4]:
def get_huber_coefficients(X, y):
    """ Returns coefficients of Huber linear regression. Fit-predicts on X, y """
    huber = HuberRegressor().fit(X, y)
    y_pred = huber.predict(X)

    a = huber.coef_[0]
    b = huber.intercept_
    return a, b, y_pred

huber_dict = {}

for index in list(data.keys()):
    df = data[index]

    fig = go.Figure()
    for year in list(df.year.unique()):
        df_year = df[df.year == year]
        hover_texts = df_year.apply(lambda row: f'{row["month_abbr"]} {row["year"]}', axis=1)
        fig.add_trace(go.Scatter(x=np.sqrt(252.)*100.*df_year['volatility'], 
                                 y=df_year['correlation'],
                                 mode='markers',
                                 name=year,
                                 text = hover_texts))
                                 #hovertemplate='%{text}'))

    # maximum correlation
    y_max = df['correlation'].max()
    fig.add_shape(
        type="line",
        x0=0.,
        x1=150.,
        y0=y_max,
        y1=y_max,
        line=dict(color="red", width=2, dash="dash"),
        name='max correlation',)

    # Hubert linear regression
    X = np.sqrt(252.)*100.*(df['volatility']).values.reshape(-1, 1)
    y = df['correlation'].values.reshape(-1, 1)

    a, b, y_pred = get_huber_coefficients(X, y)
    huber_dict[index] = {'a': a, 'b': b}

    fig.add_trace(go.Scatter(x=X.ravel(),
                             y=y_pred.ravel(),
                             mode='lines',
                             name='Huber regression',
                             line=dict(color='black')))

    ## Add text in the left upper corner. Plotly has rendering issues with LaTeX
    #fig.add_annotation(
    #    text='$\rho$ = {a:.4f}$\cdot\sigma$ + {b:.2f}',
    #    xref='paper',
    #    yref='paper',
    #    x=0.01,
    #    y=0.95,
    #    showarrow=False,
    #    font=dict(size=12, color='black'))

    fig.update_layout(title=f'Average volatility vs average correlation for the {index} index.', 
                  xaxis_title='Averaged Annualized Volatility, %', 
                  yaxis_title='Average correlation coefficient',
                  yaxis_range=[0., 1.],
                  xaxis_range=[0, 150.],
                  xaxis=dict(
                          title_font=dict(size=20),
                          tickfont=dict(size=16)),
                  yaxis=dict(
                          title_font=dict(size=20),
                          tickfont=dict(size=16)))

    #fig.show()
    fig.write_image(f'figures/monthly/{index}_volatility_vs_correlation_monthly_means.png', scale=1)
    #plt.savefig(f'figures/monthly/{index}_volatility_vs_correlation_monthly_means.png', dpi=300, bbox_inches='tight')

Hubert regression coefficients:

In [None]:
df_huber = pd.DataFrame(huber_dict).T
df_huber

Concatinate all data together for convenience:

In [6]:
stacked_df = pd.concat(data, axis=0, keys=data.keys())
stacked_df.reset_index(level=1, inplace=True)
stacked_df.drop('level_1', axis=1, inplace=True)

stacked_df.reset_index(inplace=True)

Plot data by year:

In [7]:
for year in stacked_df['year'].unique():

    fig = go.Figure()

    for index in list(stacked_df['index'].unique()):

        a = stacked_df[(stacked_df['index']==index) & (stacked_df['year']==year)]

        hover_texts = stacked_df[(stacked_df['index']==index) & (stacked_df['year']==year)].apply(lambda row: f'{row["index"]}, {row["month_abbr"]} {row["year"]}', axis=1)
        fig.add_trace(go.Scatter(x = np.sqrt(252.)*100.*stacked_df[(stacked_df['index']==index) & (stacked_df['year']==year)]["volatility"], 
                             y = stacked_df[(stacked_df['index']==index) & (stacked_df['year']==year)]["correlation"],
                             text = hover_texts,
                             #hovertemplate='%{text}',
                             mode='markers', 
                             name=f'{index}'))

    fig.update_layout(title=f'Average volatility vs average correlation for all indices in {year}.', 
                      xaxis_title='Averaged Annualized Volatility, %', 
                      yaxis_title='Average correlation coefficient',
                      yaxis_range=[0., 1.],
                      xaxis_range=[0, 150.],
                    xaxis=dict(
                          title_font=dict(size=20),
                          tickfont=dict(size=16)),
                    yaxis=dict(
                          title_font=dict(size=20),
                          tickfont=dict(size=16)))
    
    fig.write_image(f'figures/monthly/all_indices_volatility_vs_correlation_monthly_means_{year}.png', scale=1)    
    #fig.show()

Plot all data together:

In [8]:
fig = go.Figure()

for index in list(stacked_df['index'].unique()):


    hover_texts = stacked_df[stacked_df['index']==index].apply(lambda row: f'{row["index"]}, {row["month_abbr"]} {row["year"]}', axis=1)
    fig.add_trace(go.Scatter(x = np.sqrt(252.)*100.*stacked_df[stacked_df['index']==index]["volatility"], 
                             y = stacked_df[stacked_df['index']==index]["correlation"],
                             text = hover_texts,
                             #hovertemplate='%{text}',
                             mode='markers', 
                             name=f'{index}'))

fig.update_layout(title=f'Average volatility vs average correlation for all indices', 
                  xaxis_title='Averaged Annualized Volatility, %', 
                  yaxis_title='Average correlation coefficient',
                  yaxis_range=[0., 1.],
                  xaxis_range=[0, 150.],
                  xaxis=dict(
                          title_font=dict(size=20),
                          tickfont=dict(size=16)),
                  yaxis=dict(
                          title_font=dict(size=20),
                          tickfont=dict(size=16)))
fig.write_image(f'figures/monthly/all-indice_volatility_vs_correlation_monthly_means.png', scale=1)
#fig.show("notebook")