In [1]:
import numpy as np
import pandas as pd
import ta
import seaborn as sn
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from ta import add_all_ta_features
from ta import momentum
from ta.utils import dropna

In [2]:
#pip install xgboost

In [3]:
pip install autofeat

Note: you may need to restart the kernel to use updated packages.


In [4]:
def get_data(company_symbol):
    return pd.read_csv("dataset/"+company_symbol+".csv")

In [5]:
data = get_data("NFLX")
data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,5/23/2002,1.156429,1.242857,1.145714,1.196429,1.196429,104790000
1,5/24/2002,1.214286,1.225000,1.197143,1.210000,1.210000,11104800
2,5/28/2002,1.213571,1.232143,1.157143,1.157143,1.157143,6609400
3,5/29/2002,1.164286,1.164286,1.085714,1.103571,1.103571,6757800
4,5/30/2002,1.107857,1.107857,1.071429,1.071429,1.071429,10154200
...,...,...,...,...,...,...,...
4890,10/25/2021,663.739990,675.880005,657.070007,671.659973,671.659973,3833500
4891,10/26/2021,673.760010,676.489990,662.770020,668.520020,668.520020,2904800
4892,10/27/2021,669.000000,671.409973,661.849976,662.919983,662.919983,2276900
4893,10/28/2021,670.950012,676.799988,668.030029,674.049988,674.049988,2859400


In [6]:
data.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [7]:
data.isna().any()

Date         False
Open         False
High         False
Low          False
Close        False
Adj Close    False
Volume       False
dtype: bool

In [8]:
data[data.columns[data.isna().any()]]

0
1
2
3
4
...
4890
4891
4892
4893
4894


In [9]:
cond1 = data.Date.isna() # aapl['Date'].isna()
cond2 = data.Date.str.contains(r'^\s*$', na=False)
cond3 = data.Date == ''

data_checked = data.assign(cond1= cond1, cond2= cond2, cond3= cond3)
print (data_checked)

            Date        Open        High         Low       Close   Adj Close  \
0      5/23/2002    1.156429    1.242857    1.145714    1.196429    1.196429   
1      5/24/2002    1.214286    1.225000    1.197143    1.210000    1.210000   
2      5/28/2002    1.213571    1.232143    1.157143    1.157143    1.157143   
3      5/29/2002    1.164286    1.164286    1.085714    1.103571    1.103571   
4      5/30/2002    1.107857    1.107857    1.071429    1.071429    1.071429   
...          ...         ...         ...         ...         ...         ...   
4890  10/25/2021  663.739990  675.880005  657.070007  671.659973  671.659973   
4891  10/26/2021  673.760010  676.489990  662.770020  668.520020  668.520020   
4892  10/27/2021  669.000000  671.409973  661.849976  662.919983  662.919983   
4893  10/28/2021  670.950012  676.799988  668.030029  674.049988  674.049988   
4894  10/29/2021  673.059998  690.969971  671.239990  690.309998  690.309998   

         Volume  cond1  cond2  cond3  


In [10]:
def simple_moving_average_5(close):
    return close.rolling(5, min_periods=1).mean()

In [11]:
def simple_moving_average_10(close):
    return close.rolling(10, min_periods=1).mean()

In [12]:
def stochastic_k(high, low, close):
    return (((close-low.rolling(14).min())/(high.rolling(14).max()-low.rolling(14).min()))*100)

In [13]:
def stochastic_d(stochastic_k):
    return stochastic_k.rolling(3,min_periods=1).mean()

In [14]:
def larry_williams_r(high, low, close):
    return (((high.rolling(14).max()-close)/(high.rolling(14).max()-low.rolling(14).min()))*100)

In [15]:
def rate_of_change(close):
    return ((close-close.shift(12))/close.shift(12))

In [16]:
def price_volume_trend(close, volume):
    return (((close-close.shift(1))/close.shift(1))*volume)

In [17]:
def accumulation_distribution_oscillator(high, low, close):
    return((high-close.shift(1))/(high-low))

In [18]:
def weighted_moving_average_10(close):
    return ((10*close+9*close.shift(1)+8*close.shift(2)+7*close.shift(3)
             +6*close.shift(4)+5*close.shift(5)+4*close.shift(6)+3*close.shift(7)
             +2*close.shift(8)+close.shift(9))/(10+9+8+7+6+5+4+3+2+1))

In [19]:
def exponential_moving_average_12(close):
    return (close.ewm(span=12, adjust=False).mean())

In [20]:
def exponential_moving_average_26(close):
    return (close.ewm(span=26, adjust=False).mean())

In [21]:
def moving_average_convergence_divergence(ema_12, ema_26):
    return (ema_12 - ema_26)

In [22]:
def momentum(close):
    return ((close/close.shift(14))*100)

In [23]:
def change(close):
    return (close.shift(-1) - close)

In [24]:
def direction(change):
    return (np.where(change>0, 1, 0))

In [25]:
def relative_strength_index(direction):
    return (100-(100/(1+(direction.rolling(14).sum()/14)/(14-direction.rolling(14).sum())/14)))

In [26]:
def channel_commodity_index(df, ndays): 
    df['TP'] = (df['High'] + df['Low'] + df['Close']) / 3 
    df['sma'] = df['TP'].rolling(ndays).mean()
    df['mad'] = df['TP'].rolling(ndays).apply(lambda x: pd.Series(x).mad())
    df['CCI'] = (df['TP'] - df['sma']) / (0.015 * df['mad']) 
    return df['CCI']

In [27]:
def disparity_index(close):
    return (((close - close.rolling(14).mean())/(close.rolling(14).mean()))*100)

In [28]:
def get_adx(high, low, close, lookback):
    plus_dm = high.diff()
    minus_dm = low.diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0
    
    tr1 = pd.DataFrame(high - low)
    tr2 = pd.DataFrame(abs(high - close.shift(1)))
    tr3 = pd.DataFrame(abs(low - close.shift(1)))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis = 1, join = 'inner').max(axis = 1)
    atr = tr.rolling(lookback).mean()
    
    plus_di = 100 * (plus_dm.ewm(alpha = 1/lookback).mean() / atr)
    minus_di = abs(100 * (minus_dm.ewm(alpha = 1/lookback).mean() / atr))
    dx = (abs(plus_di - minus_di) / abs(plus_di + minus_di)) * 100
    adx = ((dx.shift(1) * (lookback - 1)) + dx) / lookback
    adx_smooth = adx.ewm(alpha = 1/lookback).mean()
    return adx_smooth

In [29]:
def aroon(close):
    return (ta.trend.AroonIndicator(close, 25, True).aroon_indicator())

In [30]:
def compute_all_indicators(data):
    data['SMA5'] = simple_moving_average_5(data['Close'])
    data['SMA10'] = simple_moving_average_10(data['Close'])
    data['StochasticK'] = stochastic_k(data['High'], data['Low'], data['Close'])
    data['StochasticD'] = stochastic_d(data['StochasticK'])
    data['LarryWilliamsR'] = larry_williams_r(data['High'], data['Low'], data['Close'])
    data['ROC'] = rate_of_change(data['Close'])
    data['PVT'] = price_volume_trend(data['Close'], data['Volume'])
    data['ADO'] = accumulation_distribution_oscillator(data['High'], data['Low'], data['Close'])
    data['WMA10'] = weighted_moving_average_10(data['Close'])
    data['EMA12'] = exponential_moving_average_12(data['Close'])
    data['EMA26'] = exponential_moving_average_26(data['Close'])
    data['MACD'] = moving_average_convergence_divergence(data['EMA12'], data['EMA26'])
    data['Momentum'] = momentum(data['Close'])
    data['Change'] = change(data['Close'])
    data['Direction'] = direction(data['Change'])
    data['RSI'] = relative_strength_index(data['Direction'])
    data['CCI'] = channel_commodity_index(data, 14)
    data['DI'] = disparity_index(data['Close'])
    data['ADX'] = get_adx(data['High'], data['Low'], data['Close'], 14)
    data['Aroon'] = aroon(data['Close'])    
    return data

In [31]:
complete_data = compute_all_indicators(data)
complete_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,...,Change,Direction,RSI,TP,sma,mad,CCI,DI,ADX,Aroon
0,5/23/2002,1.156429,1.242857,1.145714,1.196429,1.196429,104790000,1.196429,1.196429,,...,0.013571,1,,1.195000,,,,,,0.0
1,5/24/2002,1.214286,1.225000,1.197143,1.210000,1.210000,11104800,1.203215,1.203215,,...,-0.052857,0,,1.210714,,,,,,4.0
2,5/28/2002,1.213571,1.232143,1.157143,1.157143,1.157143,6609400,1.187857,1.187857,,...,-0.053572,0,,1.182143,,,,,,-4.0
3,5/29/2002,1.164286,1.164286,1.085714,1.103571,1.103571,6757800,1.166786,1.166786,,...,-0.032142,0,,1.117857,,,,,,-8.0
4,5/30/2002,1.107857,1.107857,1.071429,1.071429,1.071429,10154200,1.147714,1.147714,,...,0.005000,1,,1.083572,,,,,,-12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4890,10/25/2021,663.739990,675.880005,657.070007,671.659973,671.659973,3833500,650.747998,640.849994,92.814518,...,-3.139953,0,0.675676,668.203328,637.748567,8.702045,233.314986,5.191655,30.298693,96.0
4891,10/26/2021,673.760010,676.489990,662.770020,668.520020,668.520020,2904800,656.652002,645.207996,86.568968,...,-5.600037,0,0.675676,669.260010,640.187855,11.809801,164.113158,4.356440,30.769399,76.0
4892,10/27/2021,669.000000,671.409973,661.849976,662.919983,662.919983,2276900,664.207996,648.523993,77.131758,...,11.130005,1,0.675676,665.393311,642.260234,13.930309,110.708606,3.125012,31.225713,76.0
4893,10/28/2021,670.950012,676.799988,668.030029,674.049988,674.049988,2859400,668.385999,652.548993,95.389771,...,16.260010,1,0.910010,672.960002,644.916425,16.039255,116.562258,4.376381,31.577094,88.0


In [32]:
complete_data = complete_data.dropna()

In [33]:
complete_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,...,Change,Direction,RSI,TP,sma,mad,CCI,DI,ADX,Aroon
14,6/13/2002,1.104286,1.122143,1.080000,1.082857,1.082857,2567600,1.120857,1.125643,7.110768,...,-0.096428,0,0.282646,1.095000,1.135578,0.031001,-87.262252,-4.041967,6.531289,-12.0
15,6/14/2002,1.082143,1.085714,0.980000,0.986429,0.986429,4783800,1.094429,1.116643,2.549744,...,-0.064286,0,0.282646,1.017381,1.121769,0.031939,-217.891706,-11.332225,9.143382,-56.0
16,6/17/2002,1.013571,1.059286,0.917857,0.922143,0.922143,4855200,1.047571,1.096000,1.363726,...,-0.011429,0,0.282646,0.966429,1.106361,0.041992,-222.155602,-15.840926,19.140010,-60.0
17,6/18/2002,0.938571,0.939286,0.842143,0.910714,0.910714,10808000,0.999000,1.075286,17.582308,...,0.040715,1,0.381194,0.897381,1.090612,0.058975,-218.434212,-15.825737,27.026513,-64.0
18,6/19/2002,0.911429,0.996429,0.892857,0.951429,0.951429,5482400,0.970714,1.055714,28.022051,...,0.005714,1,0.381194,0.946905,1.080850,0.071526,-124.845930,-11.360356,33.442196,-64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4889,10/22/2021,651.809998,665.460022,651.809998,664.780029,664.780029,6179700,644.009998,636.387994,98.839008,...,6.879944,1,0.910010,660.683350,634.831425,5.952110,289.554743,4.545124,30.296476,92.0
4890,10/25/2021,663.739990,675.880005,657.070007,671.659973,671.659973,3833500,650.747998,640.849994,92.814518,...,-3.139953,0,0.675676,668.203328,637.748567,8.702045,233.314986,5.191655,30.298693,96.0
4891,10/26/2021,673.760010,676.489990,662.770020,668.520020,668.520020,2904800,656.652002,645.207996,86.568968,...,-5.600037,0,0.675676,669.260010,640.187855,11.809801,164.113158,4.356440,30.769399,76.0
4892,10/27/2021,669.000000,671.409973,661.849976,662.919983,662.919983,2276900,664.207996,648.523993,77.131758,...,11.130005,1,0.675676,665.393311,642.260234,13.930309,110.708606,3.125012,31.225713,76.0


In [34]:
complete_data = complete_data.drop(['TP', 'sma', 'mad', 'Change'], axis=1)

In [35]:
complete_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,...,EMA12,EMA26,MACD,Momentum,Direction,RSI,CCI,DI,ADX,Aroon
14,6/13/2002,1.104286,1.122143,1.080000,1.082857,1.082857,2567600,1.120857,1.125643,7.110768,...,1.130077,1.149996,-0.019919,90.507418,0,0.282646,-87.262252,-4.041967,6.531289,-12.0
15,6/14/2002,1.082143,1.085714,0.980000,0.986429,0.986429,4783800,1.094429,1.116643,2.549744,...,1.107977,1.137880,-0.029902,81.523058,0,0.282646,-217.891706,-11.332225,9.143382,-56.0
16,6/17/2002,1.013571,1.059286,0.917857,0.922143,0.922143,4855200,1.047571,1.096000,1.363726,...,1.079387,1.121899,-0.042512,79.691361,0,0.282646,-222.155602,-15.840926,19.140010,-60.0
17,6/18/2002,0.938571,0.939286,0.842143,0.910714,0.910714,10808000,0.999000,1.075286,17.582308,...,1.053438,1.106256,-0.052818,82.524278,1,0.381194,-218.434212,-15.825737,27.026513,-64.0
18,6/19/2002,0.911429,0.996429,0.892857,0.951429,0.951429,5482400,0.970714,1.055714,28.022051,...,1.037744,1.094787,-0.057043,88.800004,1,0.381194,-124.845930,-11.360356,33.442196,-64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4889,10/22/2021,651.809998,665.460022,651.809998,664.780029,664.780029,6179700,644.009998,636.387994,98.839008,...,636.218990,619.660215,16.558775,110.181496,1,0.910010,289.554743,4.545124,30.296476,92.0
4890,10/25/2021,663.739990,675.880005,657.070007,671.659973,671.659973,3833500,650.747998,640.849994,92.814518,...,641.671449,623.512049,18.159400,105.804883,0,0.675676,233.314986,5.191655,30.298693,96.0
4891,10/26/2021,673.760010,676.489990,662.770020,668.520020,668.520020,2904800,656.652002,645.207996,86.568968,...,645.801998,626.845973,18.956025,104.603356,0,0.675676,164.113158,4.356440,30.769399,76.0
4892,10/27/2021,669.000000,671.409973,661.849976,662.919983,662.919983,2276900,664.207996,648.523993,77.131758,...,648.435534,629.518122,18.917412,104.917308,1,0.675676,110.708606,3.125012,31.225713,76.0


In [None]:
#corrMatrix = complete_data.corr()
#print(corrMatrix)

In [None]:
#sn.heatmap(corrMatrix, annot=False)
#plt.show()

In [None]:
#corr_pairs = corrMatrix.unstack()
#corr_pairs

In [None]:
#sorted_pairs = corr_pairs.sort_values(kind="quicksort")
#sorted_pairs

In [None]:
#strong_pairs = sorted_pairs[abs(sorted_pairs) > 0.5]

#print(strong_pairs)

In [None]:
## strong_pairs[strong_pairs.index[0][0] == strong_pairs.index[0][1]]

#removed_diagonal = [(i, j) for (i, j) in strong_pairs.index if i!=j]
#len(removed_diagonal)

In [None]:
## # Create correlation matrix
## corr_matrix = complete_data.corr().abs()

## # Select upper triangle of correlation matrix
## upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool8))

## # Find index of feature columns with correlation greater than 0.95
## to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
## to_drop

In [None]:
## Find index of feature columns with correlation greater than 0.8

#correlated_features = set()
#for i in range(len(corrMatrix.columns)):
 #   for j in range(i):
  #      if abs(corrMatrix.iloc[i, j]) > 0.8:
   #         colname = corrMatrix.columns[i]
    #        correlated_features.add(colname)

In [None]:
#correlated_features

In [None]:
#complete_data = complete_data.drop(labels=correlated_features, axis=1)

In [None]:
#complete_data

In [36]:
target = complete_data['Direction']
complete_data = complete_data.drop(['Date', 'Direction'], axis=1)
complete_data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,StochasticD,...,WMA10,EMA12,EMA26,MACD,Momentum,RSI,CCI,DI,ADX,Aroon
14,1.104286,1.122143,1.080000,1.082857,1.082857,2567600,1.120857,1.125643,7.110768,9.805238,...,1.125195,1.130077,1.149996,-0.019919,90.507418,0.282646,-87.262252,-4.041967,6.531289,-12.0
15,1.082143,1.085714,0.980000,0.986429,0.986429,4783800,1.094429,1.116643,2.549744,7.386740,...,1.099883,1.107977,1.137880,-0.029902,81.523058,0.282646,-217.891706,-11.332225,9.143382,-56.0
16,1.013571,1.059286,0.917857,0.922143,0.922143,4855200,1.047571,1.096000,1.363726,3.674746,...,1.064520,1.079387,1.121899,-0.042512,79.691361,0.282646,-222.155602,-15.840926,19.140010,-60.0
17,0.938571,0.939286,0.842143,0.910714,0.910714,10808000,0.999000,1.075286,17.582308,7.165259,...,1.030831,1.053438,1.106256,-0.052818,82.524278,0.381194,-218.434212,-15.825737,27.026513,-64.0
18,0.911429,0.996429,0.892857,0.951429,0.951429,5482400,0.970714,1.055714,28.022051,15.656028,...,1.008312,1.037744,1.094787,-0.057043,88.800004,0.381194,-124.845930,-11.360356,33.442196,-64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4889,651.809998,665.460022,651.809998,664.780029,664.780029,6179700,644.009998,636.387994,98.839008,85.267842,...,641.291452,636.218990,619.660215,16.558775,110.181496,0.910010,289.554743,4.545124,30.296476,92.0
4890,663.739990,675.880005,657.070007,671.659973,671.659973,3833500,650.747998,640.849994,92.814518,96.740267,...,647.704539,641.671449,623.512049,18.159400,105.804883,0.675676,233.314986,5.191655,30.298693,96.0
4891,673.760010,676.489990,662.770020,668.520020,668.520020,2904800,656.652002,645.207996,86.568968,92.740831,...,652.735453,645.801998,626.845973,18.956025,104.603356,0.675676,164.113158,4.356440,30.769399,76.0
4892,669.000000,671.409973,661.849976,662.919983,662.919983,2276900,664.207996,648.523993,77.131758,85.505081,...,655.955814,648.435534,629.518122,18.917412,104.917308,0.675676,110.708606,3.125012,31.225713,76.0


### autofeat Classification

In [42]:
from autofeat import AutoFeatClassifier
X_train, X_test, y_train, y_test = train_test_split(complete_data,target,test_size=0.3)
model = AutoFeatClassifier()
df = model.fit_transform(X_train, y_train)
y_pred = model.predict(X_test)

In [48]:
df_test = model.transform(X_test)
model.score(df_test,y_test)

0.49043715846994534

In [49]:
df

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,StochasticD,...,DI,ADX,Aroon,sqrt(RSI)/Momentum,sqrt(StochasticD)/RSI,sqrt(LarryWilliamsR)*RSI,Momentum**3/RSI,ADO*MACD**2,ADX*Abs(ROC),sqrt(StochasticK)*log(RSI)
0,89.242859,89.732857,88.644287,88.808571,88.808571,6237700.0,89.119998,89.008141,82.260137,84.442831,...,0.441085,54.630275,64.0,0.006649,18.102878,2.138006,2.424342e+06,6.023377,0.749191,-6.149586
1,367.149994,374.489990,360.000000,364.579987,364.579987,17427300.0,349.006000,337.586999,84.408428,81.537481,...,7.228958,34.239647,24.0,0.006876,17.788727,2.004374,2.192028e+06,21.352008,1.491663,-6.229369
2,3.874286,3.875714,3.752857,3.794286,3.794286,5618900.0,3.890857,3.903857,13.063192,41.653093,...,-3.017650,13.137066,-28.0,0.007452,12.714224,4.732990,1.721686e+06,0.000635,0.252260,-2.450619
3,343.500000,343.559998,335.850006,338.619995,338.619995,5016000.0,336.870001,335.175000,72.169586,72.883212,...,1.785280,29.272088,68.0,0.006999,16.818218,2.677895,2.078355e+06,31.538602,1.386148,-5.760077
4,30.549999,30.615713,29.000000,29.524286,29.524286,40524400.0,30.488286,32.347000,8.237940,16.060339,...,-8.393277,31.061655,52.0,0.005608,14.178660,2.707533,3.013708e+06,0.065297,2.182657,-3.626651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3411,4.445714,4.614286,4.366429,4.575000,4.575000,26857600.0,4.428000,4.217714,96.085383,89.269692,...,10.363076,24.917934,68.0,0.008948,7.501922,2.491863,1.566320e+06,0.047238,4.168881,2.261118
3412,5.300000,5.382857,5.157143,5.195000,5.195000,32034800.0,5.199714,5.206786,57.858884,59.529619,...,2.880644,42.978525,56.0,0.003974,37.883321,1.322122,7.188940e+06,0.077226,5.037268,-12.104030
3413,102.430000,105.500000,101.720001,104.040001,104.040001,19775100.0,109.842000,111.046000,13.495479,17.631293,...,-7.682946,15.698793,-96.0,0.006963,11.015280,3.545406,1.829057e+06,-3.459945,1.987389,-3.543007
3414,4.340000,4.474286,4.325714,4.412857,4.412857,14311500.0,4.255429,4.076000,89.787155,90.271512,...,9.006124,18.638699,80.0,0.006276,18.717230,1.622210,2.882267e+06,0.000411,2.138979,-6.424780


In [50]:
df_test

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,StochasticD,...,DI,ADX,Aroon,sqrt(RSI)/Momentum,sqrt(StochasticD)/RSI,sqrt(LarryWilliamsR)*RSI,Momentum**3/RSI,ADO*MACD**2,ADX*Abs(ROC),sqrt(StochasticK)*log(RSI)
0,30.150000,30.474285,29.959999,30.212856,30.212856,21245700.0,30.506000,28.777143,84.474107,85.412193,...,6.681123,21.010869,76.0,0.006451,18.206487,2.000148,2.653978e+06,0.683303,1.932071,-6.231792
1,115.730003,116.419998,113.510002,115.209999,115.209999,6746800.0,114.479999,117.370999,29.900980,27.144021,...,-3.040541,29.785443,16.0,0.005762,18.432953,2.366454,2.778779e+06,0.302079,1.735901,-6.909381
2,0.917857,0.935714,0.910714,0.917857,0.917857,285600.0,0.922286,0.929428,53.469143,60.152848,...,-1.940480,16.691617,36.0,0.006853,15.278979,3.462616,2.213707e+06,0.000016,1.092547,-4.957957
3,353.230011,357.309998,349.010010,351.829987,351.829987,7970900.0,345.744000,348.229999,49.905732,39.317963,...,-0.039374,38.887027,-60.0,0.008764,9.280198,4.782250,1.220998e+06,90.814860,0.993044,-2.769542
4,8.744286,9.070000,8.728571,9.051429,9.051429,7203700.0,8.820572,8.802000,92.461065,86.022838,...,6.971533,29.689117,52.0,0.007662,10.192029,2.498626,2.120811e+06,0.089518,7.970595,-0.906753
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1459,8.707143,9.104286,8.671429,8.992857,8.992857,35578200.0,9.525429,9.785428,12.096757,10.873540,...,-10.581480,51.613944,-96.0,0.005547,16.190750,1.909506,2.643519e+06,0.083560,9.679279,-5.534513
1460,3.208571,3.301429,3.182857,3.285714,3.285714,7024500.0,3.170857,3.153857,94.179630,70.152906,...,4.011876,19.825076,92.0,0.006849,16.500194,1.224642,2.217602e+06,0.000180,0.751492,-6.580057
1461,1.600000,1.624286,1.572857,1.578571,1.578571,8273300.0,1.661714,1.695714,1.544324,8.524518,...,-8.287912,17.074821,-56.0,0.006428,10.329820,2.804546,2.001821e+06,0.000035,2.218389,-1.570239
1462,3.912857,3.912857,3.800000,3.841429,3.841429,8615600.0,3.952000,3.989571,11.600115,23.119782,...,-3.703882,15.735316,-72.0,0.007594,9.472358,4.772650,1.627085e+06,0.000711,0.465639,-2.309311


### Generate New Features Based on autofeat Results

In [88]:
import math

In [103]:
def sqrt_rsi_momentum(rsi, momentum):
    return (np.sqrt(rsi)/momentum)

In [104]:
def sqrt_stochastic_d_rsi(stochastic_d, rsi):
    return(np.sqrt(stochastic_d)/rsi)

In [105]:
def sqrt_lwr_rsi (larry_williams_r, rsi):
    return (np.sqrt(larry_williams_r)*rsi)

In [106]:
def momentum_3_rsi(momentum, rsi):
    return ((momentum**3)/rsi)

In [107]:
def ado_macd_2(ado, macd):
    return (ado*(macd**2))

In [108]:
def adx_abs_roc(adx, roc):
    return (adx*np.abs(roc))

In [109]:
def sqrt_stochastic_k_log_rsi (stochastic_k, rsi):
    return ((np.sqrt(stochastic_k))*np.log(rsi))

In [110]:
def compute_adv_indicators(complete_data):
    complete_data['SqrtRSI/Momentum'] = sqrt_rsi_momentum(complete_data['RSI'], complete_data['Momentum'])
    complete_data['SqrtStochasticD/RSI'] = sqrt_stochastic_d_rsi(complete_data['StochasticD'], complete_data['RSI'])
    complete_data['SqrtLarryWilliamsR*RSI'] = sqrt_lwr_rsi(complete_data['LarryWilliamsR'], complete_data['RSI'])
    complete_data['Momentum**3/RSI'] = momentum_3_rsi(complete_data['Momentum'], complete_data['RSI'])
    complete_data['ADO*MACD**2'] = ado_macd_2(complete_data['ADO'], complete_data['MACD'])
    complete_data['ADX*AbsROC'] = adx_abs_roc(complete_data['ADX'], complete_data['ROC'])
    complete_data['SqrtStochasticK*LogRSI'] = sqrt_stochastic_k_log_rsi(complete_data['StochasticK'], complete_data['RSI'])   
    return complete_data

In [111]:
enhanced_data = compute_adv_indicators(complete_data)
enhanced_data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,StochasticD,...,DI,ADX,Aroon,SqrtRSI/Momentum,SqrtStochasticD/RSI,SqrtLarryWilliamsR*RSI,Momentum**3/RSI,ADO*MACD**2,ADX*AbsROC,SqrtStochasticK*LogRSI
14,1.104286,1.122143,1.080000,1.082857,1.082857,2567600,1.120857,1.125643,7.110768,9.805238,...,-4.041967,6.531289,-12.0,0.005874,11.078652,2.724111,2.623073e+06,0.000276,0.419294,-3.369416
15,1.082143,1.085714,0.980000,0.986429,0.986429,4783800,1.094429,1.116643,2.549744,7.386740,...,-11.332225,9.143382,-56.0,0.006521,9.615774,2.790189,1.916899e+06,0.000024,0.970553,-2.017645
16,1.013571,1.059286,0.917857,0.922143,0.922143,4855200,1.047571,1.096000,1.363726,3.674746,...,-15.840926,19.140010,-60.0,0.006671,6.782214,2.807117,1.790571e+06,0.000931,2.666845,-1.475571
17,0.938571,0.939286,0.842143,0.910714,0.910714,10808000,0.999000,1.075286,17.582308,7.165259,...,-15.825737,27.026513,-64.0,0.007482,7.022139,3.460643,1.474343e+06,0.000492,4.160700,-4.044043
18,0.911429,0.996429,0.892857,0.951429,0.951429,5482400,0.970714,1.055714,28.022051,15.656028,...,-11.360356,33.442196,-64.0,0.006953,10.379926,3.234046,1.836929e+06,0.002693,5.249131,-5.105377
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4889,651.809998,665.460022,651.809998,664.780029,664.780029,6179700,644.009998,636.387994,98.839008,85.267842,...,4.545124,30.296476,92.0,0.008658,10.147205,0.980530,1.469873e+06,247.075573,1.217361,-0.937506
4890,663.739990,675.880005,657.070007,671.659973,671.659973,3833500,650.747998,640.849994,92.814518,96.740267,...,5.191655,30.298693,96.0,0.007769,14.556781,1.811199,1.752988e+06,194.597054,1.908983,-3.776945
4891,673.760010,676.489990,662.770020,668.520020,668.520020,2904800,656.652002,645.207996,86.568968,92.740831,...,4.356440,30.769399,76.0,0.007858,14.252702,2.476241,1.693942e+06,126.499863,1.744052,-3.647655
4892,669.000000,671.409973,661.849976,662.919983,662.919983,2276900,664.207996,648.523993,77.131758,85.505081,...,3.125012,31.225713,76.0,0.007835,13.685406,3.231132,1.709240e+06,108.182370,1.786774,-3.443097


### autofeat Feature Selection

In [112]:
from autofeat import FeatureSelector
fsel = FeatureSelector(verbose=1)
selected_data = fsel.fit_transform(pd.DataFrame(enhanced_data), pd.DataFrame(target))

  return f(**kwargs)


[featsel] Scaling data...done.
[featsel] Feature selection run 1/5
[featsel] Feature selection run 2/5
[featsel] Feature selection run 3/5
[featsel] Feature selection run 4/5
[featsel] Feature selection run 5/5
[featsel] 10 features after 5 feature selection runs
[featsel] 10 features after correlation filtering
[featsel] 10 features after noise filtering


In [113]:
selected_data

Unnamed: 0,SqrtRSI/Momentum,SqrtStochasticD/RSI,SqrtLarryWilliamsR*RSI,ADO*MACD**2,SqrtStochasticK*LogRSI,ADX*AbsROC,ROC,MACD,Aroon,Momentum**3/RSI
0,0.005874,11.078652,2.724111,0.000276,-3.369416,0.419294,-0.064198,-0.019919,-12.0,2.623073e+06
1,0.006521,9.615774,2.790189,0.000024,-2.017645,0.970553,-0.106148,-0.029902,-56.0,1.916899e+06
2,0.006671,6.782214,2.807117,0.000931,-1.475571,2.666845,-0.139334,-0.042512,-60.0,1.790571e+06
3,0.007482,7.022139,3.460643,0.000492,-4.044043,4.160700,-0.153949,-0.052818,-64.0,1.474343e+06
4,0.006953,10.379926,3.234046,0.002693,-5.105377,5.249131,-0.156961,-0.057043,-64.0,1.836929e+06
...,...,...,...,...,...,...,...,...,...,...
4875,0.008658,10.147205,0.980530,247.075573,-0.937506,1.217361,0.040182,16.558775,92.0,1.469873e+06
4876,0.007769,14.556781,1.811199,194.597054,-3.776945,1.908983,0.063005,18.159400,96.0,1.752988e+06
4877,0.007858,14.252702,2.476241,126.499863,-3.647655,1.744052,0.056681,18.956025,76.0,1.693942e+06
4878,0.007835,13.685406,3.231132,108.182370,-3.443097,1.786774,0.057221,18.917412,76.0,1.709240e+06


In [189]:
selected_scaled_data = scaler.transform(selected_data)

In [190]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_scaled_data,target,test_size=0.3)

In [191]:
from sklearn.preprocessing import StandardScaler

In [192]:
scaler = StandardScaler()  
scaler.fit(X_train)

X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test) 

In [199]:
selected_scaled_data_df = pd.DataFrame(selected_scaled_data)
selected_scaled_data_1250 = selected_scaled_data_df.tail(1250)
selected_scaled_data_250 = selected_scaled_data_df.tail(250)
selected_scaled_data_125 = selected_scaled_data_df.tail(125)
target_1250 = target.tail(1250)
target_250 = target.tail(250)
target_125 = target.tail(125)

In [200]:
X_train_1250, X_test_1250, y_train_1250, y_test_1250 = train_test_split(selected_scaled_data_1250,target_1250,test_size=0.3)

In [201]:
X_train_250, X_test_250, y_train_250, y_test_250 = train_test_split(selected_scaled_data_250,target_250,test_size=0.3)

In [202]:
X_train_125, X_test_125, y_train_125, y_test_125 = train_test_split(selected_scaled_data_125,target_125,test_size=0.3)

### Logistic Regression

#### Max Duration

In [139]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [140]:
lr = LogisticRegression(solver='liblinear', multi_class='ovr')
trained_model_lr = lr.fit(X_train, y_train)
predictions_lr = trained_model_lr.predict(X_test)

Train_accuracy_lr = accuracy_score(y_train,trained_model_lr.predict(X_train))
Test_accuracy_lr = accuracy_score(y_test, predictions_lr)
Confusion_matrix_lr = confusion_matrix(y_test,predictions_lr)

In [181]:
Train_accuracy_lr

0.6495901639344263

In [142]:
Test_accuracy_lr

0.6495901639344263

#### 5 Years

In [252]:
trained_model_lr_1250 = lr.fit(X_train_1250, y_train_1250)
predictions_lr_1250 = trained_model_lr_1250.predict(X_test_1250)

Train_accuracy_lr_1250 = accuracy_score(y_train_1250,trained_model_lr_1250.predict(X_train_1250))
Test_accuracy_lr_1250 = accuracy_score(y_test_1250, predictions_lr_1250)
Confusion_matrix_lr_1250 = confusion_matrix(y_test_1250,predictions_lr_1250)

In [253]:
Train_accuracy_lr_1250

0.45714285714285713

In [254]:
Test_accuracy_lr_1250

0.512

#### 1 Year

In [256]:
trained_model_lr_250 = lr.fit(X_train_250, y_train_250)
predictions_lr_250 = trained_model_lr_250.predict(X_test_250)

Train_accuracy_lr_250 = accuracy_score(y_train_250,trained_model_lr_250.predict(X_train_250))
Test_accuracy_lr_250 = accuracy_score(y_test_250, predictions_lr_250)
Confusion_matrix_lr_250 = confusion_matrix(y_test_250,predictions_lr_250)

In [257]:
Train_accuracy_lr_250

0.7142857142857143

In [258]:
Test_accuracy_lr_250

0.5866666666666667

#### 6 Months

In [259]:
trained_model_lr_125 = lr.fit(X_train_125, y_train_125)
predictions_lr_125 = trained_model_lr_125.predict(X_test_125)

Train_accuracy_lr_125 = accuracy_score(y_train_125,trained_model_lr_125.predict(X_train_125))
Test_accuracy_lr_125 = accuracy_score(y_test_125, predictions_lr_125)
Confusion_matrix_lr_125 = confusion_matrix(y_test_125,predictions_lr_125)

In [260]:
Train_accuracy_lr_125

0.632183908045977

In [261]:
Test_accuracy_lr_125

0.5526315789473685

#### Last 100 Days based on Best Model - Max Duration

In [262]:
last100 = selected_scaled_data_df.tail(100)

In [263]:
predictions_lr_100 = trained_model_lr.predict(last100)
predictions_lr_100

array([1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

### SVM

In [264]:
svm = SVC(gamma='auto')
trained_model_svm = svm.fit(X_train, y_train)
predictions_svm = trained_model_svm.predict(X_test)

Train_accuracy_svm = accuracy_score(y_train,trained_model_svm.predict(X_train))
Test_accuracy_svm = accuracy_score(y_test, predictions_svm)
Confusion_matrix_svm = confusion_matrix(y_test,predictions_svm)

In [265]:
Train_accuracy_svm

0.6612997658079626

In [266]:
Test_accuracy_svm

0.6550546448087432

#### 5 Years

In [267]:
trained_model_svm_1250 = svm.fit(X_train_1250, y_train_1250)
predictions_svm_1250 = trained_model_svm_1250.predict(X_test_1250)

Train_accuracy_svm_1250 = accuracy_score(y_train_1250,trained_model_svm_1250.predict(X_train_1250))
Test_accuracy_svm_1250 = accuracy_score(y_test_1250, predictions_svm_1250)
Confusion_matrix_svm_1250 = confusion_matrix(y_test_1250,predictions_svm_1250)

In [268]:
Train_accuracy_svm_1250

1.0

In [269]:
Test_accuracy_svm_1250

0.488

#### 1 Year

In [270]:
trained_model_svm_250 = svm.fit(X_train_250, y_train_250)
predictions_svm_250 = trained_model_svm_1250.predict(X_test_250)

Train_accuracy_svm_250 = accuracy_score(y_train_250,trained_model_svm_250.predict(X_train_250))
Test_accuracy_svm_250 = accuracy_score(y_test_250, predictions_svm_250)
Confusion_matrix_svm_250 = confusion_matrix(y_test_250,predictions_svm_250)

In [271]:
Train_accuracy_svm_250

1.0

In [272]:
Test_accuracy_svm_250

0.5333333333333333

#### 6 Months

In [273]:
trained_model_svm_125 = svm.fit(X_train_125, y_train_125)
predictions_svm_125 = trained_model_svm_1250.predict(X_test_125)

Train_accuracy_svm_125 = accuracy_score(y_train_125,trained_model_svm_125.predict(X_train_125))
Test_accuracy_svm_125 = accuracy_score(y_test_125, predictions_svm_125)
Confusion_matrix_svm_125 = confusion_matrix(y_test_125,predictions_svm_125)

In [274]:
Train_accuracy_svm_125

1.0

In [275]:
Test_accuracy_svm_125

0.6578947368421053

#### Last 100 Days based on Best Model - Max Duration

In [276]:
predictions_svm_100 = trained_model_svm.predict(last100)
predictions_svm_100

array([1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1])

### RandomForest

In [277]:
rf = RandomForestClassifier(n_estimators=10)
trained_model_rf = rf.fit(X_train, y_train)
predictions_rf = trained_model_rf.predict(X_test)

Train_accuracy_rf = accuracy_score(y_train, trained_model_rf.predict(X_train))
Test_accuracy_rf = accuracy_score(y_test, predictions_rf)
Confusion_matrix_rf = confusion_matrix(y_test, predictions_rf)

In [278]:
Train_accuracy_rf

0.9844847775175644

In [279]:
Test_accuracy_rf

0.60724043715847

#### 5 Years

In [280]:
trained_model_rf_1250 = rf.fit(X_train_1250, y_train_1250)
predictions_rf_1250 = trained_model_rf_1250.predict(X_test_1250)

Train_accuracy_rf_1250 = accuracy_score(y_train_1250,trained_model_rf_1250.predict(X_train_1250))
Test_accuracy_rf_1250 = accuracy_score(y_test_1250, predictions_rf_1250)
Confusion_matrix_rf_1250 = confusion_matrix(y_test_1250,predictions_rf_1250)

In [281]:
Train_accuracy_rf_1250

0.9714285714285714

In [282]:
Test_accuracy_rf_1250

0.584

#### 1 Year

In [283]:
trained_model_rf_250 = rf.fit(X_train_250, y_train_250)
predictions_rf_250 = trained_model_rf_250.predict(X_test_250)

Train_accuracy_rf_250 = accuracy_score(y_train_250,trained_model_rf_250.predict(X_train_250))
Test_accuracy_rf_250 = accuracy_score(y_test_250, predictions_rf_250)
Confusion_matrix_rf_250 = confusion_matrix(y_test_250,predictions_rf_250)

In [284]:
Train_accuracy_rf_250

0.9942857142857143

In [285]:
Test_accuracy_rf_250

0.6133333333333333

#### 6 Months

In [286]:
trained_model_rf_125 = rf.fit(X_train_125, y_train_125)
predictions_rf_125 = trained_model_rf_125.predict(X_test_125)

Train_accuracy_rf_125 = accuracy_score(y_train_125,trained_model_rf_125.predict(X_train_125))
Test_accuracy_rf_125 = accuracy_score(y_test_125, predictions_rf_125)
Confusion_matrix_rf_125 = confusion_matrix(y_test_125,predictions_rf_125)

In [287]:
Train_accuracy_rf_125

0.9885057471264368

In [288]:
Test_accuracy_rf_125

0.7105263157894737

#### Last 100 Days based on Best Model

In [289]:
predictions_rf_100 = trained_model_rf.predict(last100)
predictions_rf_100

array([1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1])

### KNN

In [290]:
knn = KNeighborsClassifier(n_neighbors=5)
trained_model_knn = knn.fit(X_train, y_train)
predictions_knn = trained_model_knn.predict(X_test)

Train_accuracy_knn = accuracy_score(y_train, trained_model_knn.predict(X_train))
Test_accuracy_knn = accuracy_score(y_test, predictions_knn)
Confusion_matrix_knn = confusion_matrix(y_test, predictions_knn)

In [291]:
Train_accuracy_knn

0.7379976580796253

In [292]:
Test_accuracy_knn

0.5956284153005464

#### 5 Years

In [293]:
trained_model_knn_1250 = knn.fit(X_train_1250, y_train_1250)
predictions_knn_1250 = trained_model_knn_1250.predict(X_test_1250)

Train_accuracy_knn_1250 = accuracy_score(y_train_1250,trained_model_knn_1250.predict(X_train_1250))
Test_accuracy_knn_1250 = accuracy_score(y_test_1250, predictions_knn_1250)
Confusion_matrix_knn_1250 = confusion_matrix(y_test_1250,predictions_knn_1250)

In [294]:
Train_accuracy_knn_1250

0.7474285714285714

In [295]:
Test_accuracy_knn_1250

0.5866666666666667

#### 1 Year

In [296]:
trained_model_knn_250 = knn.fit(X_train_250, y_train_250)
predictions_knn_250 = trained_model_knn_250.predict(X_test_250)

Train_accuracy_knn_250 = accuracy_score(y_train_250,trained_model_knn_250.predict(X_train_250))
Test_accuracy_knn_250 = accuracy_score(y_test_250, predictions_knn_250)
Confusion_matrix_knn_250 = confusion_matrix(y_test_250,predictions_knn_250)

In [297]:
Train_accuracy_knn_250

0.7485714285714286

In [298]:
Test_accuracy_knn_250

0.56

#### 6 Months

In [299]:
trained_model_knn_125 = knn.fit(X_train_125, y_train_125)
predictions_knn_125 = trained_model_knn_125.predict(X_test_125)

Train_accuracy_knn_125 = accuracy_score(y_train_125,trained_model_knn_125.predict(X_train_125))
Test_accuracy_knn_125 = accuracy_score(y_test_125, predictions_knn_125)
Confusion_matrix_knn_125 = confusion_matrix(y_test_125,predictions_knn_125)

In [300]:
Train_accuracy_knn_125

0.7931034482758621

In [301]:
Test_accuracy_knn_125

0.5789473684210527

#### Last 100 Days based on Best Model

In [302]:
predictions_knn_100 = trained_model_knn.predict(last100)
predictions_knn_100

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1])

### XGBoost

In [175]:
xgb = XGBClassifier(use_label_encoder=False)
trained_model_xgb = xgb.fit(X_train, y_train)
predictions_xgb = trained_model_xgb.predict(X_test)

Train_accuracy_xgb = accuracy_score(y_train, trained_model_xgb.predict(X_train))
Test_accuracy_xgb = accuracy_score(y_test, predictions_xgb)
Confusion_matrix_xgb = confusion_matrix(y_test, predictions_xgb)



In [176]:
Train_accuracy_xgb

0.9847775175644028

In [177]:
Test_accuracy_xgb

0.6243169398907104

#### 5 Years

In [303]:
trained_model_xgb_1250 = xgb.fit(X_train_1250, y_train_1250)
predictions_xgb_1250 = trained_model_xgb_1250.predict(X_test_1250)

Train_accuracy_xgb_1250 = accuracy_score(y_train_1250,trained_model_xgb_1250.predict(X_train_1250))
Test_accuracy_xgb_1250 = accuracy_score(y_test_1250, predictions_xgb_1250)
Confusion_matrix_xgb_1250 = confusion_matrix(y_test_1250,predictions_xgb_1250)



In [304]:
Train_accuracy_xgb_1250

1.0

In [305]:
Test_accuracy_xgb_1250

0.6106666666666667

#### 1 Year

In [306]:
trained_model_xgb_250 = xgb.fit(X_train_250, y_train_250)
predictions_xgb_250 = trained_model_xgb_250.predict(X_test_250)

Train_accuracy_xgb_250 = accuracy_score(y_train_250,trained_model_xgb_250.predict(X_train_250))
Test_accuracy_xgb_250 = accuracy_score(y_test_250, predictions_xgb_250)
Confusion_matrix_xgb_250 = confusion_matrix(y_test_250,predictions_xgb_250)



In [307]:
Train_accuracy_xgb_250

1.0

In [308]:
Test_accuracy_xgb_250

0.6266666666666667

#### 6 Months

In [309]:
trained_model_xgb_125 = xgb.fit(X_train_125, y_train_125)
predictions_xgb_125 = trained_model_xgb_125.predict(X_test_125)

Train_accuracy_xgb_125 = accuracy_score(y_train_125,trained_model_xgb_125.predict(X_train_125))
Test_accuracy_xgb_125 = accuracy_score(y_test_125, predictions_xgb_125)
Confusion_matrix_xgb_125 = confusion_matrix(y_test_125,predictions_xgb_125)



In [310]:
Train_accuracy_xgb_125

1.0

In [311]:
Test_accuracy_xgb_125

0.7368421052631579

#### Last 100 Days based on Best Model

In [179]:
predictions_xgb_100 = trained_model_xgb.predict(last100)
predictions_xgb_100

array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1])

## KFold cross validation
### Basic example

In [None]:
from sklearn.model_selection import cross_val_score

cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'), complete_data, target, cv=10)

In [None]:
cross_val_score(SVC(gamma='auto'), complete_data, target, cv=10)

In [None]:
cross_val_score(RandomForestClassifier(n_estimators=5), complete_data, target, cv=10)

In [None]:
cross_val_score(KNeighborsClassifier(n_neighbors=5), complete_data, target, cv=10)

In [None]:
cross_val_score(XGBClassifier(use_label_encoder=False), complete_data, target, cv=10)