# Compute XY-Rankings

In [1]:
import numpy as np
import pandas as pd
import networkx as nx

from PMFG_mod import PMFG

from time import time
import timeit

#%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# S&P constituents return data

log_returns_df = pd.read_excel("data/historical_2016_2019.xlsx", index_col='Names Date')
stock_names = log_returns_df.columns   # this is just simple returns, not log -- but whatevs
df_shape = (log_returns_df.shape)
print(f"There are {df_shape[0]} rows and {df_shape[1]} columns in the dataset.")
print(f"Data timeperiod covers: {log_returns_df.index[0]} to {log_returns_df.index[-1]}")

ticker_names = log_returns_df.columns

There are 1006 rows and 485 columns in the dataset.
Data timeperiod covers: 2016-01-04 00:00:00 to 2019-12-31 00:00:00


In [3]:
def XY_ranking(G):
    G1 = nx.Graph()
    weight_map = lambda w: 1+w
    for u,v,d in G.edges(data=True):
        G1.add_edge(u,v,weight=weight_map(d['weight']))
    #deg= pd.DataFrame.from_dict(dict(G1.degree(weight='weight')), orient='index', columns = ['D'])
    #deg_ranking = deg['D'].argsort().argsort()
    PG_ranking = pd.Series(nx.pagerank(G1, weight='weight')).rank()  # almost identical to degree
    EC_ranking = pd.Series(nx.eigenvector_centrality(G1, weight='weight', max_iter=1000)).rank()
    
    G1 = nx.Graph()
    weight_map = lambda w: np.sqrt(2*(1-w))
    #for u,v,d in G0_filtered.edges(data=True):
    for u,v,d in G.edges(data=True):
        G1.add_edge(u,v,weight=weight_map(d['weight']))
    NE_ranking = (-pd.Series(nx.eccentricity(G1))).rank()
    CLO_ranking = pd.Series(nx.closeness_centrality(G1, distance='weight')).rank()
    BC_ranking = pd.Series(nx.betweenness_centrality(G1, weight='weight')).rank()
    
    X = (PG_ranking + BC_ranking).rank()
    Y = (NE_ranking + CLO_ranking + EC_ranking).rank()
    return X,Y,(X+Y).rank()


In [4]:
# wrap the whole correlation construction into one function?
def shrinkage_estimator(corr, shrinkage_coef):
    shrinkage_target=np.eye(corr.shape[1])
    return corr*(1-shrinkage_coef) + shrinkage_target*shrinkage_coef

class XY_constructor:
    def __init__(self, basket_update_frequency, rolling_window):
        self.basket_update_frequency = basket_update_frequency
        self.rolling_window          = rolling_window
    
    def get_XYs(self, data, verbose = False):
        T, n = data.shape
        
        df_Xs  = pd.DataFrame(columns=ticker_names, index=data.index)
        df_Ys  = pd.DataFrame(columns=ticker_names, index=data.index)
        df_XYs = pd.DataFrame(columns=ticker_names, index=data.index)
        
        for t in range(self.rolling_window, T-1):
            if (t - self.rolling_window)%self.basket_update_frequency == 0:
                timestamp = time()
                corr   = shrinkage_estimator(data.iloc[t - self.rolling_window:t].corr(), 1e-4)
                G      = nx.from_pandas_adjacency(corr - np.diag(np.diag(corr)))
                G      = PMFG(G).compute(tol_ratio=.03)   # do not save dense corr-network
                X,Y,XY = XY_ranking(G)
                timestamp = time() - timestamp
                
                if verbose:
                    print('\nRecomputing filtered network at ', data.index[t])
                    print('Time taken: %.2f\n' %timestamp)
            
            # might want to save only @ dates when the basket is rebalanced
            df_Xs.loc[data.index[t]]  = X
            df_Ys.loc[data.index[t]]  = Y
            df_XYs.loc[data.index[t]] = XY
        
        df_Xs  = df_Xs.fillna(method='bfill')
        df_Ys  = df_Ys.fillna(method='bfill')
        df_XYs = df_XYs.fillna(method='bfill')
            
        return df_Xs, df_Ys, df_XYs
    


In [5]:
# basket size 30, quarterly rebalance, 1y rolling window for est.
update_freq = 63
#rolling_window = 126
rolling_window = 252

t_back = XY_constructor(update_freq, rolling_window)
tXs, tYs, tXYs = t_back.get_XYs(log_returns_df, verbose=True)


Recomputing filtered network at  2017-01-03 00:00:00
Time taken: 60.05


Recomputing filtered network at  2017-04-04 00:00:00
Time taken: 56.15


Recomputing filtered network at  2017-07-05 00:00:00
Time taken: 45.57


Recomputing filtered network at  2017-10-03 00:00:00
Time taken: 36.22


Recomputing filtered network at  2018-01-03 00:00:00
Time taken: 40.94


Recomputing filtered network at  2018-04-05 00:00:00
Time taken: 62.15


Recomputing filtered network at  2018-07-05 00:00:00
Time taken: 58.33


Recomputing filtered network at  2018-10-03 00:00:00
Time taken: 58.15


Recomputing filtered network at  2019-01-04 00:00:00
Time taken: 63.34


Recomputing filtered network at  2019-04-05 00:00:00
Time taken: 67.74


Recomputing filtered network at  2019-07-08 00:00:00
Time taken: 68.01


Recomputing filtered network at  2019-10-04 00:00:00
Time taken: 65.72



In [6]:
tXs.to_csv('data/X_rankings.csv', index_label='Date')
tYs.to_csv('data/Y_rankings.csv', index_label='Date')
tXYs.to_csv('data/XY_rankings.csv', index_label='Date')
