In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

from sklearn.model_selection import cross_val_score, train_test_split,GridSearchCV
from sklearn.metrics import plot_confusion_matrix, plot_roc_curve, accuracy_score, recall_score, precision_score, f1_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [2]:
df = pd.read_csv('APPL_formatted.csv', index_col = 0)
df.dropna(inplace = True)

In [3]:
df.index = pd.to_datetime(df.index)

In [4]:
df2 = df.drop(['sma252','sma50','ema22zone','ema252zone', 'ema50', 'BBU_5_2.0', 'ema7zone', 'sma22', 'ema50zone', 'BBL_5_2.0', 'BBM_5_2.0', 'ema22', 'sma7','ema252'],axis = 1)

In [5]:
corrs = df2.corr().stack().reset_index()
corrs.columns = ['1','2','R2']
temp = corrs[(corrs.R2 > .8) & (corrs.R2 < .999)].sort_values('R2', ascending = False).reset_index(drop = True)
correlations = temp[temp.index % 2 == 0]

In [6]:
correlations

Unnamed: 0,1,2,R2


In [7]:
df2 = df2[df2.index > '2010-1-1']

In [8]:
X = df2.shift(1).dropna()
y = df2['adjustedclose'].iloc[1:]

In [9]:
train_num = round(len(X)*.8)
test_num = round(len(X)*.2)
print(train_num, test_num)

2410 602


In [10]:
X_train = X.iloc[:train_num]
X_test = X.iloc[-test_num:]
y_train = y.iloc[:train_num]
y_test = y.iloc[-test_num:]

In [11]:
ss = StandardScaler()
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [12]:
X_train_scaled.set_index(y_train.index, inplace = True)
exog = sm.add_constant(X_train_scaled)

In [20]:
rols = RollingOLS(y_train,exog, window = 22)
rres = rols.fit()
rsquared = rres.rsquared_adj.mean()

In [24]:
rres.rsquared

date
2010-01-05           NaN
2010-01-06           NaN
2010-01-07           NaN
2010-01-08           NaN
2010-01-11           NaN
                 ...    
2019-07-26    -13.210137
2019-07-29     -3.352417
2019-07-30   -176.200971
2019-07-31      0.763068
2019-08-01           NaN
Length: 2410, dtype: float64

In [22]:
params = rres.params.copy()
params.index = np.arange(1, params.shape[0] + 1)
params.dropna(inplace = True)