# Calibration of the Average Oracle eigenvalues

2021-10-20

author: Christian Bongiorno (christian.bongiorno@centralesupelec.fr)
Adapted by Pau Autrand and Thomas Bienaimé for this project

Note: the computation takes about a 2s.

In [51]:
import os
# To speed up multiprocessing. Force numpy to work on a single thread
os.environ["OMP_NUM_THREADS"] = "1"
from multiprocessing import Pool
import matplotlib.pyplot as plt

In [52]:
import numpy as np
import pandas as pd
import pickle

In [53]:
def get_sortest_eig(C):
    '''
    input 
        C: correlation matrix
        
    output: 
        l: eigenvalues
        v: eigenvectors 
    '''
    
    l,v = np.linalg.eigh(C)
    ordn = np.argsort(l)
    l,v = l[ordn],v[:,ordn]
    return l,v

In [54]:
def selectData(t,dtin,dtout,):
    '''
    input 
        t: index of today
        dtin: in-sample window size
        dtout: out-of-sample window size
    output
        cin: in-sample correlation
        cout: out-sample correlation
    
    '''
    
    c = C[:,:,t-dtin:t+dtout]

    #Split in-sample and out-of-sample
    cin,cout = c[:,:,:dtin],c[:,:,dtin:]

    return cin,cout


In [55]:
def get_Oracle(t):
    '''
    input
        t: index of today


    output:
        oracle eigenvalues
    '''
    

    #get in-sample and out-of-sample
    cin,cout = selectData(t, 1,1)
    
    #compute correlation matrices
    Cin,Cout = cin[:,:,0],cout[:,:,0]

    #get eigenvalues and eigenvectos
    l,v = get_sortest_eig(Cin)

    #compute oracle
    oracle = (v.T @ Cout @ v).diagonal()
    return oracle

In [56]:

def compute_AO(dtin,dtout,ncpu=None):


    #Available days for the calibratios
    avail_t = np.arange(dtin,Tmax-dtout)

    # selection of all the inputs
    conf_input = [t for t in avail_t]

    p = Pool(processes=ncpu)
    AO = p.map(get_Oracle,conf_input,chunksize=1000)
    p.close()
    
    AO = [i for i in AO if not i is None]

    # Average-Oracle eigenvalues
    AO = np.nanmean(AO,axis=0)

    print(AO)
    return AO

# you can now save the AO values

In [57]:
# Total number of days
last_year_start_index = -257# from our data, we separated calibration for average oracle and use of it (4 year calibration, 1 year use (2022))
with open('../data/allstocks_correlation.pickle', 'rb') as f:
    C = pickle.load(f)[:,:,:last_year_start_index]
Tmax = C.shape[2]
Tmax = (Tmax*85)//100 #take 85% data to obtain average oracle
Tmax

866

In [58]:
N=C.shape[0] # or choose any fixed value
N

33

In [59]:

dtin=1
dtout=1
Nsamples=Tmax

file_AO="AO_Tin35.csv"

AO = compute_AO(dtin,dtout)
pd.DataFrame( AO )\
    .to_csv(file_AO)

[0.83584979 0.81668341 0.8145342  0.82095916 0.81433646 0.82512308
 0.83413734 0.84070549 0.84484508 0.84771816 0.85179381 0.85979779
 0.86729106 0.87169289 0.87851656 0.88239558 0.89059653 0.89070808
 0.9020965  0.91185598 0.91946512 0.92520947 0.93139761 0.94431868
 0.94640136 0.95517777 0.95742562 0.97082556 0.98296382 1.0127625
 1.04179951 1.15641257 4.15420347]
