In [None]:
import pandas as pd
import numpy as np

# For PCA
from statsmodels.multivariate import pca

# For plotting and formatting the plots
import matplotlib.pyplot as plt
import matplotlib.ticker as mtkr
import matplotlib.dates as mdts

In [None]:
# FUNCTION TO MAKE A SCATTER PLOT WITH EQUAL AXIS LENGTHS

def plot_equal_lengths_scatters(data, x_name, y_name):
    """
    This function makes a scatter plot. It also draws dotted lines to make the x-axis and y-axis more easy to see.

    data: pandas DataFrame with dates on the index, and x- and y-axes names as columns.
    This is the data for the scatter plot.
    x_name: String; the column name of the x-axis.
    y_name: String; the column name of the y-axis.

    Return: The matplotlib.Figure and matplotlib.axes.Axes for the plot.
    """
    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.grid(visible=True, linestyle='dashed', lw=0.35, color='lightgray')
    ax.axhline(y=0, color='black', linestyle=(0, (10, 6)), lw=0.5)
    ax.axvline(x=0, color='black', linestyle=(0, (10, 6)), lw=0.5)
    ax.scatter(x=data[x_name], y=data[y_name], s=15, marker='o', c='gainsboro', edgecolors='darkgrey')
    ax.set_xlabel(xlabel=x_name)
    ax.set_ylabel(ylabel=y_name)

    xlim_left, xlim_right = ax.get_xlim()
    ylim_bottom, ylim_top = ax.get_ylim()

    lim = np.max(np.abs(np.array([xlim_left, xlim_right, ylim_bottom, ylim_top])))

    ax.set_xlim(left=-lim, right=lim)
    ax.set_ylim(bottom=-lim, top=lim)

    return fig, ax

# FUNCTION TO MAKE A TIME SERIES PLOT ON ONE AXIS

def plot_timeseries_one_axis(data, y_label=None):
    """
    This function plots a time series on a single axis.

    data: pandas DataFrame with dates on the index.
    y_label: A string providing the label to use for the y-axis. Optional. Defaults to None.

    Return: The matplotlib.Figure and matplotlib.axes.Axes for the plot.
    """
    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.grid(visible=True, linestyle='dashed', lw=0.35, color='lightgray')
    data.plot(ax=ax, lw=1.)
    if y_label is not None:
        ax.set_ylabel(ylabel=y_label)
    ax.xaxis.set_major_formatter(mdts.DateFormatter('%b-%y'))
    ax.legend(loc='best', framealpha=0)
    return fig, ax

# FUNCTION TO MAKE A TIME SERIES PLOT ON A PRIMARY AND A SECONDARY AXIS

def plot_timeseries_two_axes(data, cols_primary, cols_secondary, y_label_primary=None, y_label_secondary=None):
    """
    This function plots time series on a primary and secondary axis. (The maximum number of series that can be plotted
    on primary and secondary axes together is 10.)

    data: pandas DataFrame with dates on the index.
    cols_primary: A list of columns to plot on the primary y-axis.
    cols_secondary: A list of columns to plot on the secondary y-axis.
    y_label_primary: A string providing the label to use for the primary y-axis. Optional. Defaults to None.
    y_label_secondary: A string providing the label to use for the secondary y-axis. Optional. Defaults to None.

    Return: The matplotlib.Figure, and the two matplotlib.axes.Axes objects corresponding to the primary and
    secondary axes.
    """
    colours = ['black', 'silver', 'lightcoral', 'red', 'peru', 'olivedrab', 'limegreen', 'deepskyblue', 'royalblue',
               'darkorchid']

    if not isinstance(cols_primary, list):
        cols_primary = [cols_primary]
    if not isinstance(cols_secondary, list):
        cols_secondary = [cols_secondary]

    data_primary = data[cols_primary].rename(
        mapper={c: ' '.join([c, '(left)']) for c in cols_primary},
        axis=1)
    data_secondary = data[cols_secondary].rename(
        mapper={c: ' '.join([c, '(right)']) for c in cols_secondary},
        axis=1)

    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.grid(visible=True, linestyle='dashed', lw=0.35, color='lightgray')
    data_primary.plot(ax=ax, lw=1., color=colours[:data_primary.shape[1]])
    if y_label_primary is not None:
        ax.set_ylabel(ylabel=y_label_primary)
    ax.legend().set_visible(False)

    ax2 = ax.twinx()
    ax2.grid(visible=False)
    data_secondary.plot(ax=ax2, lw=1.,
                        color=colours[data_primary.shape[1]:(data_primary.shape[1] + data_secondary.shape[1])])
    if y_label_secondary is not None:
        ax2.set_ylabel(ylabel=y_label_secondary)
    ax2.legend().set_visible(False)

    ax.xaxis.set_major_formatter(mdts.DateFormatter('%b-%y'))
    lns = ax.lines + ax2.lines
    labs = [l.get_label() for l in lns]
    ax.legend(lns, labs, loc='best', framealpha=0)

    return fig, ax, ax2

In [None]:
MA_pca_changes = pd.read_csv('Copie de Multi-asset PCA_CHANGES=4w.csv',index_col=0)
TAM_pca_changes=pd.read_csv('Copie de Traditional Assets_Multi-asset PCA_CHANGES=4w.csv',index_col=0)
SSM_pca_changes=pd.read_csv('Copie de Traditional Assets_Multi-asset PCA_CHANGES=4w.csv',index_col=0)
factors = pd.read_csv('Copie de equity_factor_performance_data.csv',index_col=0)

In [None]:
factors.plot()

In [None]:
MA_pca_changes_normalised = (MA_pca_changes - MA_pca_changes.mean()) / MA_pca_changes.std()

In [None]:
MA_pca_changes_normalised

In [None]:
MA_as_dict = MA_pca_changes_normalised.to_dict(orient='list')

In [None]:
MA_as_dict['MSCI World']

In [None]:
format_colour = lambda x: 'color:red' if x < 0. else 'color:lightgreen'

In [None]:
MA_pca_changes_normalised.corr().style.format(formatter='{:.0%}').applymap(format_colour)

In [None]:
# PERFORM PCA; OBTAIN THE LOADING MATRIX & SCORES

pca_model = pca.PCA(data=MA_pca_changes_normalised.values, standardize=True, normalize=False)
loadings = pca_model.loadings
scores = pca_model.scores

In [None]:
loadings = loadings[0:6]
loadings = loadings.T

In [None]:
loadings_df = pd.DataFrame(
    index=MA_pca_changes_normalised.columns,
    columns=[' '.join(['PC', str(j + 1)]) for j in range(loadings.shape[1])],
    #columns=[' '.join(['PC', str(j + 1)]) for j in range(MA_pca_changes_normalised.shape[1])],
    data=loadings)



loadings_df.style.format(formatter='{:+.2f}').applymap(func=format_colour)

In [None]:
scores

In [None]:
scores_df = pd.DataFrame(
    index=MA_pca_changes_normalised.index,
    columns=loadings_df.columns,
    data=scores)

scores_df.style.format(formatter='{:+.4f}').applymap(func=format_colour)

In [None]:
# PLOT A TIME SERIES OF CUMULATED PRINCIPAL COMPONENT SCORES

scores_cum_df = scores_df.cumsum()
fig_1, ax_1 = plot_timeseries_one_axis(data=scores_cum_df)

In [None]:
pca = PCA()
principal_components = pca.fit_transform(MA_pca_changes)

In [None]:
loadings_df = pd.DataFrame(pca.components_[:6], columns=MA_pca_changes.columns)
print("Loadings of the first 6 Components:")
print(loadings_df)