## Feature Selection

In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

  from pandas.core import datetools


In [2]:
# Import dataframe and create combination pairs

df = pd.read_csv('cap1_df.csv', index_col='time')

In [3]:
# Standardizing the data

sc_x = StandardScaler()
df_scaled = pd.DataFrame(sc_x.fit_transform(df), index=df.index, columns=df.columns)

In [4]:
# Perfrom differencing to stationalize the series

# Creat difference function, with default value of lag 24
def difference(dataset, interval=24):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return diff

# Differencing the dataset
inter_d = 24
df_diff = pd.DataFrame(index=df.index)
for i in df.columns:
    data = difference(df[i], inter_d)
    data = pd.Series(np.append(np.repeat(np.nan, inter_d), data), index=df.index, name=i)
    df_diff = pd.concat([df_diff, data], ignore_index=False, axis=1)


In [5]:
# Perform Dickey-fuller test to test the differenced series for stationality

diff = df_diff.dropna()

for i in diff.columns: 
    x = diff[i].values
    result = adfuller(x)
    print('\033[1m' + i + '\033[0m')
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])

[1mXEM[0m
ADF Statistic: -6.83150095951
p-value: 1.88858182501e-09
[1mIOT[0m
ADF Statistic: -5.98245899634
p-value: 1.82481756577e-07
[1mamd[0m
ADF Statistic: -5.01907813356
p-value: 2.03424427636e-05
[1mnvda[0m
ADF Statistic: -5.49441702533
p-value: 2.13933248844e-06
[1mmc_iota[0m
ADF Statistic: -5.6864627114
p-value: 8.25761702993e-07
[1mmc_nem[0m
ADF Statistic: -5.91136047062
p-value: 2.63416200312e-07
[1mgg_crypto[0m
ADF Statistic: -5.64278120984
p-value: 1.02742466525e-06
[1mgg_nem_p[0m
ADF Statistic: -6.9377547564
p-value: 1.043980964e-09
[1mgg_iota_p[0m
ADF Statistic: -6.95764227342
p-value: 9.33941718838e-10
[1mgg_bitcoin_p[0m
ADF Statistic: -6.09183701313
p-value: 1.03202549508e-07
[1mgg_nem_w[0m
ADF Statistic: -7.02679923165
p-value: 6.33382076644e-10
[1mgg_iota_w[0m
ADF Statistic: -6.46430742298
p-value: 1.41774221815e-08
[1mgg_gpu[0m
ADF Statistic: -5.40719447168
p-value: 3.27132126135e-06
[1mrd_CrypCurr_g[0m
ADF Statistic: -6.16210166326
p-value

In [6]:
# Splitting data to test/train sets

X = diff[['IOT', 'amd', 'nvda', 'mc_iota', 'mc_nem', 'gg_crypto','gg_nem_p', 'gg_iota_p', 'gg_bitcoin_p', 'gg_nem_w', 'gg_iota_w','gg_gpu', 'rd_CrypCurr_g', 'rd_nem_g', 'rd_Iota_g']]
#Xtest = diff[1310:][['IOT', 'amd', 'nvda', 'mc_iota', 'mc_nem', 'gg_crypto','gg_nem_p', 'gg_iota_p', 'gg_bitcoin_p', 'gg_nem_w', 'gg_iota_w','gg_gpu', 'rd_CrypCurr_g', 'rd_nem_g', 'rd_Iota_g']]
Ytrain = pd.DataFrame(diff[:1310]['XEM'])
Ytest = pd.DataFrame(diff[1310:]['XEM'])

In [7]:
# Performing Ridge Regression and extract predictors with high coefficient values

ridge = Ridge(alpha=0.1, normalize=True)
ridge.fit(X[:1310], Ytrain)
coef = list(ridge.coef_)
count = 0
for i in X.columns:
    print(i, ':', coef[0][count])
    count = count + 1

IOT : 0.145924325373
amd : 0.000453795679276
nvda : 0.00044978898306
mc_iota : -1.76028646061e-11
mc_nem : 3.8710608909e-11
gg_crypto : -7.26360715452e-05
gg_nem_p : 1.06398857884e-05
gg_iota_p : 4.27204867118e-05
gg_bitcoin_p : -0.000112799003464
gg_nem_w : 0.000104502545265
gg_iota_w : -0.000166305719068
gg_gpu : 4.7757483564e-05
rd_CrypCurr_g : 6.64618024296e-06
rd_nem_g : 0.000101917798405
rd_Iota_g : -5.20553266801e-06


In [8]:
# Identifying selected features

sig = ['IOT', 'amd', 'nvda', 'gg_bitcoin_p', 'gg_nem_w', 'gg_iota_w', 'rd_nem_g']

In [9]:
# Constructing dataframe

X_sig = pd.DataFrame(index=X.index, columns=[sig])
for i in sig:
    X_sig[i] = X[i]
    count = count + 1

# Save dataframe
X_sig[:1310].to_csv('X_train.csv')
X_sig[1310:].to_csv('X_test.csv')
Ytrain.to_csv('Y_train.csv')
Ytest.to_csv('Y_test.csv')

### Conclusions: 
We performed Ridge Regression on 16 predictors. The smaller the coefficent, the less significant a predictor is. We selected 7 predictors with highest coefficents for our model. 