# Tensor Decomposition of Returns and Factor data

In [1]:
import numpy as np
from hottbox.core import Tensor
import yfinance as yf
import alphalens
import pandas as pd
from hottbox.pdtools import tensor_to_pd, pd_to_tensor
from hottbox.utils.generation import residual_tensor
from hottbox.algorithms.decomposition import TTSVD, HOSVD, HOOI, CPD
from hottbox.metrics import residual_rel_error, mse, rmse, mape
from hottbox.rank import rank_estimation as re
from matplotlib import pyplot as plt

In [2]:
pan = yf.download("GOOGL AAPL AMD TSLA NFLX TXN AMAT AMZN INTC NVDA ADBE CSCO V IBM QCOM MSFT XLNX HPQ VZ T", start="2011-05-20", end="2021-05-20")
# pan = yf.download("BAC JPM GS C MS HSBC BNPQY BCS LYG UBS CS WFC RY SCHW TD HDB USB PNC TFC BNS", start="2011-05-20", end="2021-05-20")

[*********************100%***********************]  20 of 20 completed


In [3]:
pan["Adj Close"]

Unnamed: 0_level_0,AAPL,ADBE,AMAT,AMD,AMZN,CSCO,GOOGL,HPQ,IBM,INTC,MSFT,NFLX,NVDA,QCOM,T,TSLA,TXN,V,VZ,XLNX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011-05-19,10.457952,35.410000,11.807164,8.640000,198.800003,12.413521,265.890900,12.441459,121.125946,17.450867,19.808411,34.675713,16.341516,43.385468,17.975750,5.640000,26.947958,18.545631,23.718208,29.181328
2011-05-20,10.294883,35.310001,11.609413,8.620000,198.649994,12.316658,262.277283,12.389801,120.820641,17.213638,19.624115,35.128571,16.626440,43.446041,17.929951,5.594000,26.971077,18.217299,23.610163,28.946644
2011-05-23,10.269693,34.430000,11.362230,8.490000,196.220001,12.182540,259.454468,12.331264,119.471565,16.946764,19.367695,35.404285,16.608053,42.688881,17.792555,5.364000,26.393129,17.912233,23.419500,28.185965
2011-05-24,10.201826,34.139999,11.296312,8.390000,193.270004,12.122929,259.389374,12.386359,119.279854,16.761433,19.351664,35.371429,16.672396,42.635868,17.838360,5.344000,26.262125,18.070585,23.451271,27.878454
2011-05-25,10.342788,34.130001,11.419907,8.430000,192.259995,12.063319,260.095093,12.400136,119.109428,16.813324,19.383720,37.067142,16.874594,43.316139,17.746759,5.796000,26.416248,18.338381,23.133501,27.935095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-13,124.970001,474.160004,119.710686,73.089996,3161.469971,52.490002,2229.040039,32.389999,144.169998,54.009998,242.470108,486.660004,546.609985,127.110001,32.209999,571.690002,178.990005,223.740005,58.810001,114.610001
2021-05-14,127.449997,486.559998,124.612114,74.589996,3222.899902,52.900002,2278.379883,32.919998,144.679993,55.349998,247.578308,493.369995,569.719971,130.149994,32.240002,589.739990,183.270004,226.940002,58.689999,118.000000
2021-05-17,126.269997,482.739990,123.354317,74.650002,3270.389893,52.939999,2288.919922,33.040001,145.110001,55.330002,244.615158,488.940002,566.619995,129.800003,31.370001,576.830017,180.830002,226.440002,57.939999,118.139999
2021-05-18,124.849998,480.619995,121.597382,74.440002,3232.280029,52.930000,2262.469971,32.340000,143.910004,54.840000,242.520004,486.279999,560.630005,128.910004,29.549999,577.869995,178.869995,225.570007,57.180000,117.809998


In [4]:
returns = pan["Adj Close"].apply(np.log).diff(1)
returns = returns.stack()
returns.index = returns.index.set_names(['date', 'asset'])
returns = returns.unstack()
# returns.index = returns.index.tz_localize('UTC')
returns = returns.stack()
returns

date        asset
2011-05-20  AAPL    -0.015716
            ADBE    -0.002828
            AMAT    -0.016890
            AMD     -0.002318
            AMZN    -0.000755
                       ...   
2021-05-19  TSLA    -0.025253
            TXN      0.015808
            V       -0.004354
            VZ      -0.004557
            XLNX     0.024732
Length: 50320, dtype: float64

In [5]:
pan = yf.download("GOOGL AAPL AMD TSLA NFLX TXN AMAT AMZN INTC NVDA ADBE CSCO V IBM QCOM MSFT XLNX HPQ VZ T", start="2011-05-14", end="2021-05-20")
# pan = yf.download("BAC JPM GS C MS HSBC BNPQY BCS LYG UBS CS WFC RY SCHW TD HDB USB PNC TFC BNS", start="2011-05-14", end="2021-05-20")

predictive_factor = pan['Open']
predictive_factor = -predictive_factor.pct_change(5)

predictive_factor = predictive_factor.stack()
predictive_factor.index = predictive_factor.index.set_names(['date', 'asset'])

pricing = pan['Open'].iloc[1:]
pricing.index = pricing.index.tz_localize('UTC')

predictive_factor = predictive_factor.unstack()
# factor.index = factor.index.tz_localize('UTC')
predictive_factor = predictive_factor.stack()

predictive_factor

[*********************100%***********************]  20 of 20 completed


date        asset
2011-05-20  AAPL     0.017647
            ADBE     0.008112
            AMAT     0.050532
            AMD      0.038117
            AMZN     0.037676
                       ...   
2021-05-19  TSLA     0.082889
            TXN      0.024765
            V        0.003890
            VZ       0.029700
            XLNX     0.023755
Length: 50320, dtype: float64

In [6]:
pan = yf.download("GOOGL AAPL AMD TSLA NFLX TXN AMAT AMZN INTC NVDA ADBE CSCO V IBM QCOM MSFT XLNX HPQ VZ T", start="2011-05-07", end="2021-05-20")
# pan = yf.download("BAC JPM GS C MS HSBC BNPQY BCS LYG UBS CS WFC RY SCHW TD HDB USB PNC TFC BNS", start="2011-05-14", end="2021-05-20")
non_predictive_factor = pan["Open"].pct_change(10)
non_predictive_factor = non_predictive_factor.stack()
non_predictive_factor.index = non_predictive_factor.index.set_names(['date', 'asset'])
non_predictive_factor

[*********************100%***********************]  20 of 20 completed


date        asset
2011-05-20  AAPL    -0.028969
            ADBE     0.064226
            AMAT    -0.062377
            AMD     -0.046667
            AMZN    -0.005776
                       ...   
2021-05-19  TSLA    -0.188691
            TXN     -0.031877
            V       -0.047424
            VZ      -0.035988
            XLNX    -0.065998
Length: 50320, dtype: float64

In [7]:
from datetime import datetime

# for i in range(len(pd.date_range(start="2011-05-16",end="2021-05-14").to_pydatetime().tolist())):
#     if i==0:
#         closing_prices = list(pan["Adj Close"].iloc[i,:].values)
#     else:
#         closing_prices = closing_prices + list(pan["Adj Close"].iloc[0,:].values)

data = {'Date': list(returns.index.get_level_values('date')),
        'Asset': list(returns.index.get_level_values('asset')),
        'Returns': returns,
        'Predictive Factor': predictive_factor,
        'Non-Predictive Factor': non_predictive_factor
       }

# data = {
#         'Asset': list(returns.index.get_level_values('asset')),
#         'Date': list(returns.index.get_level_values('date')),
#         'Returns': returns,
#         'Predictive Factor': predictive_factor,
#         'Non-Predictive Factor': non_predictive_factor
#        }

df_original = pd.DataFrame.from_dict(data)
df_original.set_index(["Date","Asset","Returns","Predictive Factor","Non-Predictive Factor"], inplace=True)
df_original

Date,Asset,Returns,Predictive Factor,Non-Predictive Factor
2011-05-20,AAPL,-0.015716,0.017647,-0.028969
2011-05-20,ADBE,-0.002828,0.008112,0.064226
2011-05-20,AMAT,-0.016890,0.050532,-0.062377
2011-05-20,AMD,-0.002318,0.038117,-0.046667
2011-05-20,AMZN,-0.000755,0.037676,-0.005776
...,...,...,...,...
2021-05-19,TSLA,-0.025253,0.082889,-0.188691
2021-05-19,TXN,0.015808,0.024765,-0.031877
2021-05-19,V,-0.004354,0.003890,-0.047424
2021-05-19,VZ,-0.004557,0.029700,-0.035988


In [8]:
tensor = []
tensor_date = []
for i in range(len(df_original.index.get_level_values('Asset'))):
    if len(tensor_date)==20:
        tensor.append(tensor_date) 
        tensor_date = []
    tensor_date.append([df_original.index.get_level_values('Returns')[i], df_original.index.get_level_values('Predictive Factor')[i], df_original.index.get_level_values('Non-Predictive Factor')[i]])          

In [9]:
np.array(tensor)

array([[[-0.01571574,  0.01764742, -0.02896853],
        [-0.00282801,  0.00811191,  0.06422567],
        [-0.01689023,  0.05053193, -0.06237688],
        ...,
        [-0.01786265,  0.00825005,  0.01070059],
        [-0.00456579,  0.00693144, -0.00560596],
        [-0.00807478,  0.00689465,  0.00139041]],

       [[-0.00244979,  0.02721112, -0.05142865],
        [-0.02523796,  0.01167093,  0.04139175],
        [-0.02152152,  0.05945947, -0.08421051],
        ...,
        [-0.0168877 ,  0.0260266 , -0.02578037],
        [-0.00810822,  0.01368756, -0.01421671],
        [-0.02663012,  0.00674541, -0.01284914]],

       [[-0.00663043, -0.01054217, -0.03837878],
        [-0.00845859, -0.00494336,  0.01348982],
        [-0.00581839,  0.0475207 , -0.08652579],
        ...,
        [ 0.00880159,  0.03496242, -0.03701387],
        [ 0.00135567, -0.00027096, -0.00992486],
        [-0.01097   ,  0.00936696, -0.02350308]],

       ...,

       [[ 0.01965039,  0.0351548 , -0.04196387],
        [ 0

In [10]:
tensor_data = Tensor(np.array(tensor))
print(tensor_data)

This tensor is of order 3 and consists of 150900 elements.
Sizes and names of its modes are (2515, 20, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.


In [11]:
#evaluating best kruskal rank for cpd
re.rankest(tensor_data, [1, 13, 15, 21, 36, 43])

(43,)

In [12]:
# evaluating mlrank for tucker
re.mlrank(tensor_data)

(60, 20, 3)

# CPD

In [13]:
alg = CPD()
alg

CPD(epsilon=0.01, init='svd', max_iter=50, random_state=None, tol=0.0001,
    verbose=False)

In [14]:
kruskal_rank = (18,)

tensor_cpd = alg.decompose(tensor_data, rank=kruskal_rank)
print("\tOutput of the {} algorithm:".format(alg.name))
print(tensor_cpd)

print('\n\tFactor matrices')
for mode, fmat in enumerate(tensor_cpd.fmat):
    print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))
    
print('\n\tCore tensor')
print(tensor_cpd.core)
tensor_cpd.core.data

	Output of the CPD algorithm:
Kruskal representation of a tensor with rank=(18,).
Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']
With corresponding latent components described by (2515, 20, 3) features respectively.

	Factor matrices
Mode-0 factor matrix is of shape (2515, 18)
Mode-1 factor matrix is of shape (20, 18)
Mode-2 factor matrix is of shape (3, 18)

	Core tensor
This tensor is of order 3 and consists of 5832 elements.
Sizes and names of its modes are (18, 18, 18) and ['mode-0', 'mode-1', 'mode-2'] respectively.


array([[[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [15]:
print('\n\Factor tensor')
tensor_cpd.fmat[1]


\Factor tensor


array([[-8.82314364e-02,  2.00694346e+00,  1.41449875e-01,
        -5.64280346e-03,  6.67179030e-01, -1.20132367e+00,
        -5.37878274e-02,  1.72098899e+00, -1.18861485e-01,
        -1.20661512e+00,  8.11024242e-01,  4.02375563e-01,
        -1.29573578e+00, -2.90057794e-01,  1.30363157e-01,
        -1.02184630e-01, -9.42151921e-02, -2.04168714e+00],
       [ 7.21178968e-01,  1.06011598e+00, -1.58532699e-01,
         6.10360075e-01,  6.75664427e-01, -1.25184201e+00,
         9.18376948e-01, -8.88379728e-02,  8.83287831e-03,
        -9.09335397e-01,  9.86323881e-01, -4.41416440e-01,
        -9.09927893e-01, -1.49524709e-01,  9.92489680e-02,
        -3.67251357e-01, -6.63601209e-01, -1.59153932e+00],
       [ 4.34140872e-01, -3.61994916e-03,  1.29886620e-01,
         1.10078938e+00,  1.39567649e+00, -1.76665120e+00,
        -2.91499676e-01,  1.07480552e-02,  1.81157436e-01,
        -2.66568138e+00,  6.94900930e-01, -1.67477878e-01,
        -1.94163388e+00,  2.02660774e+00, -1.73969664e

In [16]:
full_shape = tensor_cpd.ft_shape
order = tensor_cpd.order
print('The shape of the underlying tensor is {}'.format(full_shape))
print('The order of the underlying tensor is {}'.format(order))

The shape of the underlying tensor is (2515, 20, 3)
The order of the underlying tensor is 3


In [17]:
tensor_full = tensor_cpd.reconstruct()
print(tensor_full)
tensor_full.data

This tensor is of order 3 and consists of 150900 elements.
Sizes and names of its modes are (2515, 20, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.


array([[[-3.14253983e-03,  2.85915780e-02, -2.74933810e-02],
        [-1.61012717e-04,  1.23182838e-02,  4.79664595e-03],
        [-8.50023988e-03,  4.87616549e-02, -5.42566479e-02],
        ...,
        [-9.82081357e-04,  1.70324600e-02, -6.83691661e-03],
        [ 1.18603733e-03, -4.42211185e-03,  1.33171583e-02],
        [-3.17034510e-03,  3.82195976e-03,  9.87490488e-03]],

       [[-1.15591003e-02,  3.92022698e-02, -5.44140088e-02],
        [-1.09659112e-02,  1.50760854e-02, -1.27505481e-02],
        [-1.96803534e-02,  5.34464639e-02, -7.65523283e-02],
        ...,
        [-9.10357407e-03,  2.35250570e-02, -2.57524657e-02],
        [-1.10827503e-03,  7.10819070e-05,  1.86897894e-03],
        [-1.16359906e-02,  4.26051648e-03, -7.03943985e-03]],

       [[-3.11475470e-03,  9.82807150e-03, -3.90970930e-02],
        [-1.62091876e-03, -1.59370086e-03, -2.13539987e-02],
        [-8.53938372e-03,  3.37692155e-02, -8.77306509e-02],
        ...,
        [-1.83476618e-03,  5.19422055e-03,

# Tucker Decomposition

# HOSVD

In [18]:
alg = HOSVD()
alg

HOSVD(process=(), verbose=False)

In [19]:
ml_rank = (18, 18, 2)
tensor_tkd_hosvd = alg.decompose(tensor_data, ml_rank)
print("\tOutput of the {} algorithm:".format(alg.name))
print(tensor_tkd_hosvd)

print('\n\tFactor matrices')
for mode, fmat in enumerate(tensor_tkd_hosvd.fmat):
    print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))
    
print('\n\tCore tensor')
print(tensor_tkd_hosvd.core)
tensor_tkd_hosvd.core.data

	Output of the HOSVD algorithm:
Tucker representation of a tensor with multi-linear rank=(18, 18, 2).
Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']
With corresponding latent components described by (2515, 20, 3) features respectively.

	Factor matrices
Mode-0 factor matrix is of shape (2515, 18)
Mode-1 factor matrix is of shape (20, 18)
Mode-2 factor matrix is of shape (3, 2)

	Core tensor
This tensor is of order 3 and consists of 648 elements.
Sizes and names of its modes are (18, 18, 2) and ['mode-0', 'mode-1', 'mode-2'] respectively.


array([[[-1.07265658e+01, -5.15141257e-02],
        [-1.05415785e-01, -5.44512830e-02],
        [-6.20352381e-02,  4.60152455e-02],
        [ 1.17024880e-01,  2.53559802e-03],
        [-2.35127635e-02, -2.52176443e-02],
        [ 9.51448138e-03, -4.78771869e-02],
        [-8.52255187e-04, -4.15958851e-02],
        [ 2.00472886e-02,  9.13701797e-02],
        [-3.46651654e-02,  5.95859535e-02],
        [ 4.17889722e-03, -1.09596162e-02],
        [ 6.08438170e-03,  1.04539638e-02],
        [ 3.94492896e-03,  2.75732529e-02],
        [ 4.87468670e-02, -2.12690672e-02],
        [ 3.16843032e-02, -4.57840028e-02],
        [ 1.63638576e-02,  2.57208698e-02],
        [ 1.33340314e-02,  3.57935031e-02],
        [-1.61166672e-02,  6.89995467e-04],
        [ 1.78880418e-02, -1.02192938e-02]],

       [[ 5.46121087e-02,  4.56532072e-01],
        [-5.95007021e+00,  5.74394401e-02],
        [ 2.08513778e-01,  4.07534872e-04],
        [ 5.51840342e-02, -1.05136238e-01],
        [-3.83511163e-02,  5.4

In [20]:
full_shape = tensor_tkd_hosvd.ft_shape
order = tensor_tkd_hosvd.order
print('The shape of the underlying tensor is {}'.format(full_shape))
print('The order of the underlying tensor is {}'.format(order))

The shape of the underlying tensor is (2515, 20, 3)
The order of the underlying tensor is 3


In [21]:
print('\n\tFactor matrices')
for mode, fmat in enumerate(tensor_tkd_hosvd.fmat):
    print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))
tensor_tkd_hosvd.fmat


	Factor matrices
Mode-0 factor matrix is of shape (2515, 18)
Mode-1 factor matrix is of shape (20, 18)
Mode-2 factor matrix is of shape (3, 2)


[array([[ 0.01002118, -0.01961588, -0.0050248 , ..., -0.00290194,
          0.03286297,  0.00941265],
        [ 0.01430942, -0.01885868,  0.00148562, ..., -0.00416606,
          0.02188033,  0.0023603 ],
        [ 0.01118301, -0.01476375,  0.01436571, ...,  0.00239003,
          0.0071859 , -0.00563392],
        ...,
        [ 0.03046917, -0.00234603,  0.01092601, ...,  0.01061467,
         -0.01266561,  0.03587705],
        [ 0.02965309,  0.00658548,  0.01772144, ...,  0.01324464,
         -0.0102266 ,  0.02195001],
        [ 0.01032203,  0.01235885,  0.01941285, ..., -0.00197566,
         -0.00620362,  0.02885035]]),
 array([[-1.74245452e-01,  3.59434164e-02, -5.13463050e-02,
         -1.35533809e-01,  2.56005561e-01, -1.46623907e-01,
          9.76763674e-02,  2.49588957e-01, -4.08688492e-01,
          6.76767729e-01, -1.31913712e-01, -1.68396377e-01,
          2.74821645e-01, -7.91333686e-02, -1.53606946e-01,
          2.35443238e-02,  6.93218828e-02,  7.24574499e-02],
        [-1.

In [22]:
tensor_full = tensor_tkd_hosvd.reconstruct()
print(tensor_full)
tensor_full.data

This tensor is of order 3 and consists of 150900 elements.
Sizes and names of its modes are (2515, 20, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.


array([[[-3.15980433e-03,  3.37980106e-02, -3.69870148e-02],
        [-5.70055954e-04,  2.92134361e-03,  1.27135350e-02],
        [-4.45266048e-03,  4.80040898e-02, -5.44239949e-02],
        ...,
        [-1.24051248e-03,  1.03500593e-02,  3.29468080e-03],
        [-3.73978955e-04,  2.46127806e-03,  5.01545691e-03],
        [-1.35423463e-03,  1.11418633e-02,  4.55514777e-03]],

       [[-3.69117270e-03,  4.20753420e-02, -5.90382830e-02],
        [-1.43606316e-03,  1.46277998e-02, -1.23377393e-02],
        [-4.72841253e-03,  5.40219889e-02, -7.63806231e-02],
        ...,
        [-1.98470654e-03,  2.08548688e-02, -2.09489991e-02],
        [-5.85092391e-04,  5.63415837e-03, -3.03924001e-03],
        [-1.15588889e-03,  1.16264963e-02, -9.03073837e-03]],

       [[-3.09838563e-04,  8.65401669e-03, -3.62205156e-02],
        [ 6.18735513e-04, -1.23275772e-03, -2.56286679e-02],
        [-2.30611588e-03,  3.44991170e-02, -8.70088666e-02],
        ...,
        [-1.82783437e-04,  6.82246650e-03,

# HOOI

In [23]:
alg = HOOI()
alg

HOOI(epsilon=0.01, init='hosvd', max_iter=50, process=(),
     random_state=None, tol=0.0001, verbose=False)

In [24]:
ml_rank = (18, 18, 2)
tensor_tkd_hooi = alg.decompose(tensor_data, ml_rank)
print("\tOutput of the {} algorithm:".format(alg.name))
print(tensor_tkd_hooi)

print('\n\tFactor matrices')
for mode, fmat in enumerate(tensor_tkd_hooi.fmat):
    print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))
    
print('\n\tCore tensor')
print(tensor_tkd_hooi.core)
tensor_tkd_hooi.core.data

	Output of the HOOI algorithm:
Tucker representation of a tensor with multi-linear rank=(18, 18, 2).
Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']
With corresponding latent components described by (2515, 20, 3) features respectively.

	Factor matrices
Mode-0 factor matrix is of shape (2515, 18)
Mode-1 factor matrix is of shape (20, 18)
Mode-2 factor matrix is of shape (3, 2)

	Core tensor
This tensor is of order 3 and consists of 648 elements.
Sizes and names of its modes are (18, 18, 2) and ['mode-0', 'mode-1', 'mode-2'] respectively.


array([[[-1.07278261e+01,  5.57870942e-02],
        [-4.10008113e-02,  5.51624527e-02],
        [-4.23033580e-02, -4.46243293e-02],
        [-4.43376617e-02,  2.41005199e-03],
        [ 9.31342562e-03, -1.94037826e-02],
        [-4.89671450e-03,  4.82170520e-02],
        [-1.03763282e-02, -3.57691955e-02],
        [-5.68909930e-03, -9.38614868e-02],
        [-2.61757205e-02, -6.42019185e-02],
        [ 8.55967326e-03,  3.70121242e-04],
        [ 7.01820054e-03,  2.96681274e-03],
        [ 7.86605847e-03,  2.58354031e-02],
        [-3.26499594e-02, -3.19873767e-02],
        [ 8.21707170e-03,  2.90628841e-02],
        [ 2.83929277e-03,  3.17361908e-02],
        [-2.25882162e-02, -5.74742304e-03],
        [ 9.08263062e-03, -3.39684470e-02],
        [ 4.12470511e-03, -1.99868037e-02]],

       [[ 1.98327642e-02, -4.57062475e-01],
        [-5.95278164e+00, -5.14727925e-02],
        [ 7.79966608e-02,  1.21953562e-03],
        [-1.16719969e-01, -1.02934119e-01],
        [ 2.85358389e-02,  4.9

In [25]:
full_shape = tensor_tkd_hooi.ft_shape
order = tensor_tkd_hooi.order
print('The shape of the underlying tensor is {}'.format(full_shape))
print('The order of the underlying tensor is {}'.format(order))

The shape of the underlying tensor is (2515, 20, 3)
The order of the underlying tensor is 3


In [26]:
tensor_tkd_hooi.fmat

[array([[ 0.01000867, -0.01959655, -0.0051046 , ...,  0.03322368,
         -0.01001669, -0.01979307],
        [ 0.01430525, -0.01887538,  0.00131725, ...,  0.02209852,
         -0.00226812, -0.02142148],
        [ 0.01118891, -0.01478457,  0.01430477, ...,  0.00730451,
          0.00571663, -0.01300862],
        ...,
        [ 0.03047045, -0.00221764,  0.01121088, ..., -0.01389224,
         -0.0365665 , -0.01105748],
        [ 0.02965175,  0.00659402,  0.01770557, ..., -0.01002143,
         -0.02153019, -0.01440644],
        [ 0.01031621,  0.01233411,  0.01928529, ..., -0.00540358,
         -0.02824837,  0.0251995 ]]),
 array([[-1.73072489e-01,  3.72721333e-02, -5.38277723e-02,
          1.34774921e-01, -2.77219457e-01, -1.54248582e-01,
         -1.04600239e-01,  2.10279521e-01, -4.85018891e-01,
          6.28031946e-01, -6.28941866e-02,  1.65358080e-01,
         -2.17865827e-01, -1.83546959e-01,  1.78305118e-01,
         -2.11584535e-02,  7.40766806e-02, -1.07542881e-01],
        [-1.

In [27]:
tensor_full = tensor_tkd_hooi.reconstruct()
print(tensor_full)
tensor_full.data

This tensor is of order 3 and consists of 150900 elements.
Sizes and names of its modes are (2515, 20, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.


array([[[-2.74612375e-03,  2.97405135e-02, -3.47036141e-02],
        [ 4.28706758e-04, -8.48687738e-03,  3.02671921e-02],
        [-4.17274990e-03,  4.52027601e-02, -5.28091852e-02],
        ...,
        [-9.89085141e-04,  8.28880767e-03,  3.16402827e-03],
        [ 1.94885509e-04, -3.51091243e-03,  1.15151067e-02],
        [-1.15978015e-03,  1.00516352e-02,  1.56155167e-03]],

       [[-3.27737028e-03,  3.77626331e-02, -5.60833205e-02],
        [-4.04691123e-04,  2.59421845e-03,  6.44813005e-03],
        [-4.39202328e-03,  5.06133889e-02, -7.52058211e-02],
        ...,
        [-1.71212690e-03,  1.84628703e-02, -2.11230066e-02],
        [-1.71664131e-06, -5.78690248e-04,  3.83944109e-03],
        [-9.78559019e-04,  1.06129646e-02, -1.24644377e-02]],

       [[-7.62676648e-05,  6.03113948e-03, -3.46126568e-02],
        [ 1.17940810e-03, -8.16834192e-03, -1.48622504e-02],
        [-2.14854701e-03,  3.23241369e-02, -8.56904758e-02],
        ...,
        [-6.83225933e-05,  5.57875389e-03,

# TTSVD

In [28]:
alg = TTSVD()
alg

TTSVD(verbose=False)

In [30]:
tt_rank = (40,2)

tensor_tt = alg.decompose(tensor_data, tt_rank)
print("\tOutput of the {} algorithm:".format(alg.name))
print(tensor_tt)

for i, core in enumerate(tensor_tt.cores):
    print('\n\tTT-Core #{}'.format(i))
    print(core)
    print(core.data)

	Output of the TTSVD algorithm:
Tensor train representation of a tensor with tt-rank=(40, 2).
Shape of this representation in the full format is (2515, 20, 3).
Physical modes of its cores represent properties: ['mode-0', 'mode-1', 'mode-2']

	TT-Core #0
This tensor is of order 2 and consists of 100600 elements.
Sizes and names of its modes are (2515, 40) and ['mode-0', 'mode-1'] respectively.
[[-0.01002118 -0.01961588 -0.0050248  ... -0.0103126  -0.0030772
  -0.03163549]
 [-0.01430942 -0.01885868  0.00148562 ... -0.01013152 -0.01089099
  -0.02816942]
 [-0.01118301 -0.01476375  0.01436571 ...  0.00393695 -0.02973984
  -0.01827306]
 ...
 [-0.03046917 -0.00234603  0.01092601 ...  0.00087909  0.0052764
  -0.00356889]
 [-0.02965309  0.00658548  0.01772144 ... -0.01475543 -0.00525716
   0.0020202 ]
 [-0.01032203  0.01235885  0.01941285 ... -0.01901488  0.02444567
   0.00482025]]

	TT-Core #1
This tensor is of order 3 and consists of 1600 elements.
Sizes and names of its modes are (40, 20, 2)

In [None]:
full_shape = tensor_tt.ft_shape
order = tensor_tt.order
print('The shape of the underlying tensor is {}'.format(full_shape))
print('The order of the underlying tensor is {}'.format(order))

In [None]:
tensor_tt.fmat

In [None]:
tensor_full = tensor_tt.reconstruct()
print(tensor_full)
tensor_full.data

# Evaluating results of tensor decomposition

In [None]:
rel_error = residual_rel_error(tensor_data, tensor_cpd)
print('Relative error of CPD approximation = {:.2f}'.format(rel_error))

mse_error = mse(tensor_data, tensor_cpd)
print('MSE error of CPD approximation = {:.2f}'.format(mse_error))

rmse_error = rmse(tensor_data, tensor_cpd)
print('RMSE error of CPD approximation = {:.2f}'.format(rmse_error))

mape_error = mape(tensor_data, tensor_cpd)
print('MAPE error of CPD approximation = {:.2f}'.format(mape_error))

In [None]:
rel_error = residual_rel_error(tensor_data, tensor_tkd_hosvd)
print('Relative error of HOSVD approximation = {:.2f}'.format(rel_error))

mse_error = mse(tensor_data, tensor_tkd_hosvd)
print('MSE error of HOSVD approximation = {:.2f}'.format(mse_error))

rmse_error = rmse(tensor_data, tensor_tkd_hosvd)
print('RMSE error of HOSVD approximation = {:.2f}'.format(rmse_error))

mape_error = mape(tensor_data, tensor_tkd_hosvd)
print('MAPE error of HOSVD approximation = {:.2f}'.format(mape_error))

In [None]:
rel_error = residual_rel_error(tensor_data, tensor_tkd_hooi)
print('Relative error of HOOI approximation = {:.2f}'.format(rel_error))

mse_error = mse(tensor_data, tensor_tkd_hooi)
print('MSE error of HOOI approximation = {:.2f}'.format(mse_error))

rmse_error = rmse(tensor_data, tensor_tkd_hooi)
print('RMSE error of HOOI approximation = {:.2f}'.format(rmse_error))

mape_error = mape(tensor_data, tensor_tkd_hooi)
print('MAPE error of HOOI approximation = {:.2f}'.format(mape_error))

In [None]:
rel_error = residual_rel_error(tensor_data, tensor_tt)
print('Relative error of TT approximation = {:.2f}'.format(rel_error))

mse_error = mse(tensor_data, tensor_tt)
print('MSE error of TT approximation = {:.2f}'.format(mse_error))

rmse_error = rmse(tensor_data, tensor_tt)
print('RMSE error of TT approximation = {:.2f}'.format(rmse_error))

mape_error = mape(tensor_data, tensor_tt)
print('MAPE error of TT approximation = {:.2f}'.format(mape_error))

# Plotting reconstructed signals

In [None]:
arr = tensor_tt.reconstruct().data
df = pd.concat([pd.DataFrame(x) for x in arr], keys=np.arange(arr.shape[0]))
df

In [None]:
reconstructed_returns = []
for j in range(20):
    array = []
    for i in range(arr.shape[0]):
        array.append(df.loc[:,0][i,j])
    reconstructed_returns.append(array)

In [None]:
reconstructed_pred = []
for j in range(20):
    array = []
    for i in range(arr.shape[0]):
        array.append(df.loc[:,1][i,j])
    reconstructed_pred.append(array)

In [None]:
reconstructed_nonpred = []
for j in range(arr.shape[1]):
    array = []
    for i in range(arr.shape[0]):
        array.append(df.loc[:,2][i,j])
    reconstructed_nonpred.append(array)

In [None]:
arr = returns
original_returns = []
for j in range(20):
    array = []
    for i in range(j,arr.shape[0],20):
        array.append(arr[i])
    original_returns.append(array)

In [None]:
arr = predictive_factor
original_pred = []
for j in range(20):
    array = []
    for i in range(j,arr.shape[0],20):
        array.append(arr[i])
    original_pred.append(array)

In [None]:
arr = non_predictive_factor
original_nonpred = []
for j in range(20):
    array = []
    for i in range(j,arr.shape[0],20):
        array.append(arr[i])
    original_nonpred.append(array)

In [None]:
fig,ax=plt.subplots(3,figsize=(20, 15))
ax[0].plot(original_returns[2])
ax[0].plot(reconstructed_returns[2])
ax[0].legend(["Original","Reconstructed"])
ax[0].grid()
ax[0].set_title("Returns")

ax[1].plot(original_pred[2])
ax[1].plot(reconstructed_pred[2])
ax[1].legend(["Original","Reconstructed"])
ax[1].grid()
ax[1].set_title("Predictive Factors")

ax[2].plot(original_nonpred[2])
ax[2].plot(reconstructed_nonpred[2])
ax[2].legend(["Original","Reconstructed"])
ax[2].grid()
ax[2].set_title("Non-Predictive Factors")

In [None]:
from sklearn.metrics import mean_squared_error

returns_rms = {}
pred_rms = {}
non_pred_rms = {}

stocks=["GOOGL", "AAPL", "AMD", "TSLA", "NFLX", "TXN", "AMAT", "AMZN", "INTC", "NVDA", "ADBE", "CSCO", "V", "IBM", "QCOM", "MSFT", "XLNX", "HPQ", "VZ", "T"]
stocks = sorted(stocks)

for i in range(20):
    returns_rms[stocks[i]] = mean_squared_error(original_returns[i][:-1], reconstructed_returns[i], squared=False)
    pred_rms[stocks[i]] = mean_squared_error(original_pred[i][:-1], reconstructed_pred[i], squared=False)
    non_pred_rms[stocks[i]] = mean_squared_error(original_nonpred[i][:-1], reconstructed_nonpred[i], squared=False)

In [None]:
returns_rms

In [None]:
pred_rms

In [None]:
non_pred_rms