Accepts ma feature space and provides an introspective results to identify characteristics and guide further analysis. A first layer, high level review of a given dataset to assist in building useful models.

# Time series analysis

In [1]:
import sys, os
from pathlib import Path
from datetime import datetime, date

current_path = Path.cwd().resolve()
project_root = current_path.parents[2]  
sys.path.append(str(project_root))
project_root

WindowsPath('C:/Users/micha/OneDrive/Documents/code/pyfi')

In [2]:
from pyfi.lib.retrievers import fred
data = fred.get_matrix(ids = ['VIXCLS', *fred.IDS_STANDARD])
# data

In [9]:
fred.get_release_id_from_series_name('GDP')
fred.get_release_dates('53').iloc[-10:]

Unnamed: 0,release_id,date
825,53,2024-02-28
826,53,2024-03-28
827,53,2024-04-25
828,53,2024-05-30
829,53,2024-06-27
830,53,2024-07-25
831,53,2024-08-29
832,53,2024-09-26
833,53,2024-10-02
834,53,2024-10-30


In [7]:
fred.get_matrix(ids=['GDP']).iloc[-10:]

Unnamed: 0_level_0,GDP
date,Unnamed: 1_level_1
2022-04-01,25805.791
2022-07-01,26272.011
2022-10-01,26734.277
2023-01-01,27164.359
2023-04-01,27453.815
2023-07-01,27967.697
2023-10-01,28296.967
2024-01-01,28624.069
2024-04-01,29016.714
2024-07-01,29349.924


In [3]:
from pyfi.lib.datasci.time_series.timeseries import TimeSeries, Frequency, AggFunc
ts = TimeSeries(df=data, dep_var = 'GDP')
ts


<pyfi.lib.datasci.time_series.timeseries.TimeSeries at 0x1f52b16d810>

In [4]:
ts.describe()



Unnamed: 0,VIXCLS,GDP,CPIAUCSL,UNRATE,BAMLH0A0HYM2,SP500
mean,19.476622,7379.807868,121.100909,5.686334,5.302562,3316.336277
median,17.63,4444.094,108.85,5.5,4.65,2979.39
std,7.847576,7685.636801,86.715145,1.709642,2.547828,1058.386902
var,61.584457,59069013.032196,7519.516354,2.922875,6.491427,1120182.835047
skew,2.181118,1.008879,0.435281,0.860028,2.501414,0.514679
kurtosis,8.502122,-0.009631,-1.12564,1.070466,9.232034,-0.809094
cv,0.402923,1.041441,0.716057,0.300658,0.48049,0.319143
min,9.14,243.164,21.48,2.5,2.41,1829.08
max,82.69,29349.924,315.454,14.8,21.82,6001.35
range,73.55,29106.76,293.974,12.3,19.41,4172.27


In [5]:
ts.group(frequency=Frequency.MONTHLY, aggfunc=AggFunc.LAST)

In [6]:
ts.df = ts.df.dropna(how='any', axis=0)

In [7]:
ts.df.head()

Unnamed: 0_level_0,VIXCLS,GDP,CPIAUCSL,UNRATE,BAMLH0A0HYM2,SP500
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-31,20.97,18063.529,234.747,5.7,5.26,1994.99
2015-04-30,14.55,18279.784,236.222,5.4,4.59,2085.51
2015-07-31,12.12,18401.626,238.034,5.2,5.36,2103.84
2015-10-31,15.07,18435.137,237.733,5.0,5.9,2079.36
2016-01-31,20.2,18525.933,237.652,4.8,7.77,1940.24


In [8]:
transformed_df, transformations_applied = ts.naive_transform()

In [9]:
ts.df.head()

Unnamed: 0_level_0,VIXCLS_log,GDP_scale,CPIAUCSL_scale,UNRATE_log,BAMLH0A0HYM2_log,SP500_scale
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-31,3.089678,-1.265455,-1.203651,1.902108,1.83418,-1.28645
2015-04-30,2.744061,-1.203833,-1.14497,1.856298,1.720979,-1.195251
2015-07-31,2.574138,-1.169115,-1.072882,1.824549,1.850028,-1.176784
2015-10-31,2.776954,-1.159566,-1.084857,1.791759,1.931521,-1.201447
2016-01-31,3.054001,-1.133694,-1.088079,1.757858,2.171337,-1.341611


In [10]:
transformations_applied

{'VIXCLS_log': ['log'],
 'GDP_scale': ['scale'],
 'CPIAUCSL_scale': ['scale'],
 'UNRATE_log': ['log'],
 'BAMLH0A0HYM2_log': ['log'],
 'SP500_scale': ['scale']}

In [11]:
ts.get_stationarity()

Unnamed: 0,id,ADF Statistic,p-value,lags,n-obs,bool
0,VIXCLS_log,-1.716504,0.42263,8.0,30.0,False
1,GDP_scale,1.041473,0.994685,0.0,38.0,False
2,CPIAUCSL_scale,0.200527,0.972291,2.0,36.0,False
3,UNRATE_log,-2.932281,0.041721,0.0,38.0,True
4,BAMLH0A0HYM2_log,-3.153544,0.022836,3.0,35.0,True
5,SP500_scale,2.029735,0.99871,9.0,29.0,False


In [12]:
coef, pval, tall = ts.get_correlation()

In [13]:
coef

Unnamed: 0,VIXCLS_log,GDP_scale,CPIAUCSL_scale,UNRATE_log,BAMLH0A0HYM2_log,SP500_scale
VIXCLS_log,1.0,0.221845,0.23301,0.435626,0.30707,0.272487
GDP_scale,0.221845,1.0,0.994724,-0.373852,-0.431881,0.949623
CPIAUCSL_scale,0.23301,0.994724,1.0,-0.343274,-0.368945,0.930346
UNRATE_log,0.435626,-0.373852,-0.343274,1.0,0.502011,-0.22474
BAMLH0A0HYM2_log,0.30707,-0.431881,-0.368945,0.502011,1.0,-0.518552
SP500_scale,0.272487,0.949623,0.930346,-0.22474,-0.518552,1.0


In [14]:
pval

Unnamed: 0,VIXCLS_log,GDP_scale,CPIAUCSL_scale,UNRATE_log,BAMLH0A0HYM2_log,SP500_scale
VIXCLS_log,0.0,0.174677,0.153425,0.005573,0.057239,0.0933
GDP_scale,0.174677,0.0,0.0,0.019054,0.006043,0.0
CPIAUCSL_scale,0.153425,0.0,0.0,0.032396,0.020816,0.0
UNRATE_log,0.005573,0.019054,0.032396,0.0,0.001129,0.168975
BAMLH0A0HYM2_log,0.057239,0.006043,0.020816,0.001129,0.0,0.00072
SP500_scale,0.0933,0.0,0.0,0.168975,0.00072,0.0


In [15]:
tall

Unnamed: 0,Feature_1,Feature_2,Coefficient,P_Value
0,VIXCLS_log,VIXCLS_log,1.0,
1,VIXCLS_log,GDP_scale,0.221845,0.1746772
2,VIXCLS_log,CPIAUCSL_scale,0.23301,0.1534246
3,VIXCLS_log,UNRATE_log,0.435626,0.00557278
4,VIXCLS_log,BAMLH0A0HYM2_log,0.30707,0.0572389
5,VIXCLS_log,SP500_scale,0.272487,0.0933001
6,GDP_scale,GDP_scale,1.0,
7,GDP_scale,CPIAUCSL_scale,0.994724,3.3731559999999996e-38
8,GDP_scale,UNRATE_log,-0.373852,0.01905423
9,GDP_scale,BAMLH0A0HYM2_log,-0.431881,0.006043154


In [16]:
coef, pval, tall = ts.get_cointegration()

In [17]:
coef

Unnamed: 0,VIXCLS_log,GDP_scale,CPIAUCSL_scale,UNRATE_log,BAMLH0A0HYM2_log,SP500_scale
VIXCLS_log,,-1.817339,-1.84814,-3.231868,-2.145962,-1.702583
GDP_scale,-0.37339,,-3.656479,0.120171,0.189242,-3.173143
CPIAUCSL_scale,-0.090394,-3.615594,,-0.159126,-0.834647,-1.052647
UNRATE_log,-3.168683,-2.997613,-3.090854,,-2.632266,-2.960132
BAMLH0A0HYM2_log,-2.29137,-2.780332,-2.821817,-3.527387,,-2.793203
SP500_scale,0.584814,-2.849108,-0.774093,0.286892,-0.377418,


In [18]:
pval

Unnamed: 0,VIXCLS_log,GDP_scale,CPIAUCSL_scale,UNRATE_log,BAMLH0A0HYM2_log,SP500_scale
VIXCLS_log,,0.621262,0.606058,0.064758,0.452379,0.675724
GDP_scale,0.972218,,0.020823,0.988334,0.98945,0.074535
CPIAUCSL_scale,0.983565,0.023434,,0.981435,0.929996,0.892399
UNRATE_log,0.075323,0.110774,0.090154,,0.224879,0.119989
BAMLH0A0HYM2_log,0.377867,0.172023,0.158842,0.030044,,0.167857
SP500_scale,0.99314,0.150559,0.938024,0.99074,0.971999,


In [19]:
tall

Unnamed: 0,Feature_1,Feature_2,Coefficient,P_Value
0,VIXCLS_log,VIXCLS_log,1.0,
1,VIXCLS_log,GDP_scale,0.221845,0.1746772
2,VIXCLS_log,CPIAUCSL_scale,0.23301,0.1534246
3,VIXCLS_log,UNRATE_log,0.435626,0.00557278
4,VIXCLS_log,BAMLH0A0HYM2_log,0.30707,0.0572389
5,VIXCLS_log,SP500_scale,0.272487,0.0933001
6,GDP_scale,GDP_scale,1.0,
7,GDP_scale,CPIAUCSL_scale,0.994724,3.3731559999999996e-38
8,GDP_scale,UNRATE_log,-0.373852,0.01905423
9,GDP_scale,BAMLH0A0HYM2_log,-0.431881,0.006043154


In [20]:
ts.decompose(var=ts.num_cols[0]).head()

KeyError: 'VIXCLS'

In [None]:
ratio, z_score, adf = ts.get_feature_ratios()

In [None]:
ratio.head()#.dropna(how = 'any', axis=0)

NameError: name 'ratio' is not defined

In [None]:
z_score.head()

Unnamed: 0_level_0,VIXCLS/GDP,VIXCLS/CPIAUCSL,VIXCLS/UNRATE,VIXCLS/BAMLH0A0HYM2,VIXCLS/SP500,GDP/CPIAUCSL,GDP/UNRATE,GDP/BAMLH0A0HYM2,GDP/SP500,CPIAUCSL/UNRATE,CPIAUCSL/BAMLH0A0HYM2,CPIAUCSL/SP500,UNRATE/BAMLH0A0HYM2,UNRATE/SP500,BAMLH0A0HYM2/SP500
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-01-31,1.09199,0.793006,-0.230632,-0.203126,1.947546,-1.435874,-1.16651,-1.234743,1.534677,-1.117689,-1.192285,1.623118,-0.086152,1.39378,1.429803
2015-04-30,-0.086547,-0.297827,-0.895469,-0.731432,0.443442,-1.352673,-1.042722,-0.905226,1.293371,-0.972354,-0.776203,1.394037,0.195471,1.087598,0.869645
2015-07-31,-0.530019,-0.71777,-1.141059,-1.319235,-0.07393,-1.36744,-0.954722,-1.235339,1.27798,-0.857104,-1.205639,1.387499,-0.43052,0.953147,1.315533
2015-10-31,-0.017117,-0.227197,-0.679672,-1.129685,0.55864,-1.321758,-0.8698,-1.420735,1.37731,-0.756616,-1.456148,1.449311,-0.802774,0.876466,1.687969
2016-01-31,0.864191,0.62263,0.126864,-1.100251,1.904887,-1.243628,-0.770966,-1.865571,1.946565,-0.645153,-2.047051,1.873939,-1.499688,0.955719,3.188629
2016-04-30,0.052594,-0.136655,-0.636161,-1.14654,0.70944,-1.178554,-0.880149,-1.487698,1.539343,-0.795511,-1.566241,1.521571,-0.882271,0.950523,1.905776
2016-07-31,-0.628032,-0.775961,-1.045067,-1.432482,-0.201551,-1.103585,-0.727205,-1.303128,1.232256,-0.6162,-1.340298,1.247955,-0.814523,0.65212,1.405598
2016-10-31,0.229059,0.055227,-0.363879,-0.534402,0.889055,-1.050025,-0.750218,-0.962109,1.47123,-0.655136,-0.911943,1.416462,-0.346131,0.762216,1.009057
2017-01-31,-0.648706,-0.784658,-0.992294,-0.842968,-0.286491,-1.016669,-0.632016,-0.401991,1.03917,-0.513343,-0.201605,1.062641,0.191009,0.485357,0.296706
2017-04-30,-0.859462,-0.977656,-1.054403,-0.944912,-0.594225,-0.928118,-0.451163,-0.232552,0.78289,-0.305379,-0.007548,0.829313,0.12989,0.23738,0.094558


In [None]:
adf

Unnamed: 0,id,ADF Statistic,p-value,lags,n-obs,Critical Value 1%,Critical Value 5%,Critical Value 10%,bool
0,VIXCLS/GDP,-3.437264,0.009752,0.0,38.0,-3.615509,-2.941262,-2.6092,True
1,VIXCLS/CPIAUCSL,-3.276342,0.01597,0.0,38.0,-3.615509,-2.941262,-2.6092,True
2,VIXCLS/UNRATE,-3.173595,0.021565,0.0,38.0,-3.615509,-2.941262,-2.6092,True
3,VIXCLS/BAMLH0A0HYM2,-2.639684,0.08507,0.0,38.0,-3.615509,-2.941262,-2.6092,False
4,VIXCLS/SP500,-1.659515,0.452083,5.0,33.0,-3.646135,-2.954127,-2.615968,False
5,GDP/CPIAUCSL,-0.605225,0.869797,0.0,38.0,-3.615509,-2.941262,-2.6092,False
6,GDP/UNRATE,-1.653931,0.454991,0.0,38.0,-3.615509,-2.941262,-2.6092,False
7,GDP/BAMLH0A0HYM2,-1.013004,0.748492,4.0,34.0,-3.639224,-2.95123,-2.614447,False
8,GDP/SP500,-0.983466,0.759247,0.0,38.0,-3.615509,-2.941262,-2.6092,False
9,CPIAUCSL/UNRATE,-1.924813,0.32048,0.0,38.0,-3.615509,-2.941262,-2.6092,False


# Regression

In [None]:
summary, spread, spread_z, spread_adf = ts.get_regression_spread()

            VIXCLS        GDP  CPIAUCSL  UNRATE  BAMLH0A0HYM2    SP500
dt                                                                    
2015-01-31   20.97  18063.529   234.747     5.7          5.26  1994.99
2015-04-30   14.55  18279.784   236.222     5.4          4.59  2085.51
2015-07-31   12.12  18401.626   238.034     5.2          5.36  2103.84
2015-10-31   15.07  18435.137   237.733     5.0          5.90  2079.36
2016-01-31   20.20  18525.933   237.652     4.8          7.77  1940.24
2016-04-30   15.70  18711.702   238.992     5.1          6.21  2065.30
2016-07-31   11.87  18892.639   240.101     4.8          5.69  2173.60
2016-10-31   17.06  19089.379   241.741     4.9          4.91  2126.15
2017-01-31   11.99  19280.084   243.618     4.7          4.00  2278.87
2017-04-30   10.82  19438.643   244.193     4.4          3.81  2384.20
2017-07-31   10.26  19692.595   244.243     4.3          3.61  2470.30
2017-10-31   10.18  20037.088   246.626     4.2          3.51  2575.26
2018-0

In [None]:
summary

Unnamed: 0,ts1,ts2,alpha,beta,f_statistic,r_squared,p_value_intercept,p_value_coefficient,id
0,BAMLH0A0HYM2,CPIAUCSL,301.44,-8.33,5.73,0.13,0.0,0.02,BAMLH0A0HYM2_CPIAUCSL
1,BAMLH0A0HYM2,GDP,28468.29,-1362.79,8.36,0.18,0.0,0.01,BAMLH0A0HYM2_GDP
2,BAMLH0A0HYM2,SP500,5230.82,-447.64,12.24,0.25,0.0,0.0,BAMLH0A0HYM2_SP500
3,BAMLH0A0HYM2,UNRATE,0.43,1.0,15.34,0.29,0.71,0.0,BAMLH0A0HYM2_UNRATE
4,BAMLH0A0HYM2,VIXCLS,9.92,1.91,4.05,0.1,0.03,0.05,BAMLH0A0HYM2_VIXCLS
5,CPIAUCSL,BAMLH0A0HYM2,8.64,-0.02,5.73,0.13,0.0,0.02,CPIAUCSL_BAMLH0A0HYM2
6,CPIAUCSL,GDP,-14298.88,138.88,3478.51,0.99,0.0,0.0,CPIAUCSL_GDP
7,CPIAUCSL,SP500,-6463.49,36.74,238.18,0.87,0.0,0.0,CPIAUCSL_SP500
8,CPIAUCSL,UNRATE,10.27,-0.02,2.53,0.06,0.01,0.12,CPIAUCSL_UNRATE
9,CPIAUCSL,VIXCLS,5.36,0.05,1.27,0.03,0.64,0.27,CPIAUCSL_VIXCLS


In [None]:
spread

Unnamed: 0,date,id,value
0,2015-01-31,BAMLH0A0HYM2_CPIAUCSL,-22.895766
1,2015-04-30,BAMLH0A0HYM2_CPIAUCSL,-26.999456
2,2015-07-31,BAMLH0A0HYM2_CPIAUCSL,-18.776125
3,2015-10-31,BAMLH0A0HYM2_CPIAUCSL,-14.580867
4,2016-01-31,BAMLH0A0HYM2_CPIAUCSL,0.908508
...,...,...,...
1165,2023-07-31,VIXCLS_UNRATE,-0.603965
1166,2023-10-31,VIXCLS_UNRATE,-0.988271
1167,2024-01-31,VIXCLS_UNRATE,-0.513211
1168,2024-04-30,VIXCLS_UNRATE,-0.510461


In [None]:
spread_z

Unnamed: 0,date,id,value
0,2015-01-31,BAMLH0A0HYM2_CPIAUCSL,-0.966234
1,2015-04-30,BAMLH0A0HYM2_CPIAUCSL,-1.139415
2,2015-07-31,BAMLH0A0HYM2_CPIAUCSL,-0.792379
3,2015-10-31,BAMLH0A0HYM2_CPIAUCSL,-0.615333
4,2016-01-31,BAMLH0A0HYM2_CPIAUCSL,0.038340
...,...,...,...
1165,2023-07-31,VIXCLS_UNRATE,-0.336506
1166,2023-10-31,VIXCLS_UNRATE,-0.550626
1167,2024-01-31,VIXCLS_UNRATE,-0.285941
1168,2024-04-30,VIXCLS_UNRATE,-0.284409


In [None]:
spread_adf

Unnamed: 0,id,ADF Statistic,p-value,lags,n-obs,Critical Value 1%,Critical Value 5%,Critical Value 10%,bool
0,BAMLH0A0HYM2_CPIAUCSL,-0.339725,0.919687,1.0,37.0,-3.620918,-2.943539,-2.6104,False
1,BAMLH0A0HYM2_GDP,-0.416,0.907393,1.0,37.0,-3.620918,-2.943539,-2.6104,False
2,BAMLH0A0HYM2_SP500,-0.315241,0.923318,1.0,37.0,-3.620918,-2.943539,-2.6104,False
3,BAMLH0A0HYM2_UNRATE,-3.005319,0.034401,0.0,38.0,-3.615509,-2.941262,-2.6092,True
4,BAMLH0A0HYM2_VIXCLS,-2.823739,0.054962,0.0,38.0,-3.615509,-2.941262,-2.6092,False
5,CPIAUCSL_BAMLH0A0HYM2,-3.074832,0.028473,0.0,38.0,-3.615509,-2.941262,-2.6092,True
6,CPIAUCSL_GDP,-3.606562,0.005637,0.0,38.0,-3.615509,-2.941262,-2.6092,True
7,CPIAUCSL_SP500,-1.21082,0.668924,9.0,29.0,-3.67906,-2.967882,-2.623158,False
8,CPIAUCSL_UNRATE,-3.552296,0.006741,0.0,38.0,-3.615509,-2.941262,-2.6092,True
9,CPIAUCSL_VIXCLS,-3.224302,0.01862,0.0,38.0,-3.615509,-2.941262,-2.6092,True


# Feature Engine