# import, data read

In [4]:
import pandas as pd, numpy as np
import scipy
import matplotlib.pyplot as plt
from scipy.linalg import sqrtm

In [5]:
df = pd.read_csv('market_data_binance.by_minute_ALL_2022-09-19T04:00:00Z_2022-09-20T03:59:00Z.csv')
df['time'] = pd.to_datetime(df['timestamp'],unit='s')
df_close = df.pivot(index='time', columns='symbol', values='close').resample('10min').last()

In [74]:
symbols = ['YFIIUSDT', 'ETHUSDT', 'PAXGUSDT']  # , 'BIFIUSDT', 'BNBUSDT'
df_close_symbols = df_close[symbols]

# portmanteau

In [68]:
def calc_autocov_matrix(arr, lag):
    m = arr.shape[0]  # sample size.
    return 1 / (m - lag - 1) * (arr[lag:] - np.nanmean(arr[lag:], axis=0)).T @ (arr[:m-lag] - np.nanmean(arr[:m-lag], axis=0))

In [27]:
def autocov_matrix_calc(arr, lag):
    # sample size.
    m = arr.shape[0]
    arr_demeaned = arr - np.nanmean(arr, axis=0)
    return 1 / (m - lag - 1) * arr_demeaned[lag:].T @ arr_demeaned[:-lag]; 

In [55]:
def portmanteau_gep(df, lags):
    assert lags > 0
    rho = df.cov().values
    rho_inv_sqrt = np.linalg.inv(sqrtm(rho))

    pmt_matrix = 0;
    for i in range(1, lags+1):
        autocov = calc_autocov_matrix(df.values, i)
        pmt_matrix += np.square(rho_inv_sqrt @ autocov @ rho_inv_sqrt)
    pmt_matrix /= lags;

    eigenvalues, eigenvectors = np.linalg.eig(pmt_matrix)
    asc = np.argsort(eigenvalues)
    eigenvalues, eigenvectors = eigenvalues[asc], eigenvectors[:, asc]

    wgts = rho_inv_sqrt @ eigenvectors

    return np.real(eigenvalues), np.real(wgts)

In [75]:
df_close_symbols

symbol,YFIIUSDT,ETHUSDT,PAXGUSDT
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-09-19 04:00:00,834.2,1300.92,1659.0
2022-09-19 04:10:00,834.6,1303.61,1659.0
2022-09-19 04:20:00,835.4,1303.85,1660.0
2022-09-19 04:30:00,835.0,1304.09,1659.0
2022-09-19 04:40:00,837.4,1305.19,1659.0
...,...,...,...
2022-09-20 03:10:00,879.8,1349.10,1666.0
2022-09-20 03:20:00,874.4,1344.29,1666.0
2022-09-20 03:30:00,879.8,1349.84,1667.0
2022-09-20 03:40:00,880.4,1353.94,1667.0


In [76]:
portmanteau_gep(df_close_symbols, 1)

(array([0.55945359, 0.69833252, 0.86015399]),
 array([[ 0.03981983,  0.16496209, -0.08249003],
        [-0.10793552, -0.0994292 ,  0.09367988],
        [ 0.55981155,  0.0763796 , -0.08819786]]))

In [77]:
(df_close_symbols - df_close_symbols.mean(0)).cov()

symbol,YFIIUSDT,ETHUSDT,PAXGUSDT
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YFIIUSDT,369.217708,547.929429,78.480051
ETHUSDT,547.929429,900.177659,137.954222
PAXGUSDT,78.480051,137.954222,24.920697


In [78]:
df = df_close_symbols
lags = 2

#df_normalized = df - df.mean(0);
rho = df.cov().values
rho_inv_sqrt = np.linalg.inv(sqrtm(rho))

pmt_matrix = 0;
for i in range(1, lags):
    autocov = calc_autocov_matrix(df.values, i)
    pmt_matrix += np.square(rho_inv_sqrt @ autocov @ rho_inv_sqrt)
pmt_matrix /= lags;

eigenvalues, eigenvectors = np.linalg.eig(pmt_matrix)
asc = np.argsort(eigenvalues)
eigenvalues, eigenvectors = eigenvalues[asc], eigenvectors[:, asc]

wgts = rho_inv_sqrt @ eigenvectors

np.real(eigenvalues), np.real(wgts)

(array([0.27972679, 0.34916626, 0.430077  ]),
 array([[ 0.03981983,  0.16496209, -0.08249003],
        [-0.10793552, -0.0994292 ,  0.09367988],
        [ 0.55981155,  0.0763796 , -0.08819786]]))

In [79]:
wgts

array([[ 0.03981983,  0.16496209, -0.08249003],
       [-0.10793552, -0.0994292 ,  0.09367988],
       [ 0.55981155,  0.0763796 , -0.08819786]])

In [92]:
sum(abs(wgts[:,0]))

0.7075669068199595

In [80]:
asc

array([2, 1, 0])

In [81]:
eigenvalues

array([0.27972679, 0.34916626, 0.430077  ])

In [82]:
eigenvectors

array([[-0.1221195 ,  0.99655103,  0.02584774],
       [-0.01997348, -0.08134926,  0.99846494],
       [ 0.99231441,  0.01638136,  0.04898632]])