In [1]:
import numpy as np
import pandas as pd
import ta
import seaborn as sn
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from ta import add_all_ta_features
from ta import momentum
from ta.utils import dropna

In [2]:
#pip install xgboost

In [3]:
pip install autofeat

Note: you may need to restart the kernel to use updated packages.


In [4]:
def get_data(company_symbol):
    return pd.read_csv("dataset/"+company_symbol+".csv")

In [5]:
data = get_data("AAPL")
data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,12/12/1980,0.128348,0.128906,0.128348,0.128348,0.100453,469033600
1,12/15/1980,0.122210,0.122210,0.121652,0.121652,0.095213,175884800
2,12/16/1980,0.113281,0.113281,0.112723,0.112723,0.088224,105728000
3,12/17/1980,0.115513,0.116071,0.115513,0.115513,0.090408,86441600
4,12/18/1980,0.118862,0.119420,0.118862,0.118862,0.093029,73449600
...,...,...,...,...,...,...,...
10321,11/18/2021,153.710007,158.669998,153.050003,157.869995,157.869995,137827700
10322,11/19/2021,157.649994,161.020004,156.529999,160.550003,160.550003,117147500
10323,11/22/2021,161.679993,165.699997,161.000000,161.020004,161.020004,117467900
10324,11/23/2021,161.119995,161.800003,159.059998,161.410004,161.410004,95933900


In [6]:
data.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [7]:
data.isna().any()

Date         False
Open         False
High         False
Low          False
Close        False
Adj Close    False
Volume       False
dtype: bool

In [8]:
data[data.columns[data.isna().any()]]

0
1
2
3
4
...
10321
10322
10323
10324
10325


In [9]:
cond1 = data.Date.isna() # aapl['Date'].isna()
cond2 = data.Date.str.contains(r'^\s*$', na=False)
cond3 = data.Date == ''

data_checked = data.assign(cond1= cond1, cond2= cond2, cond3= cond3)
print (data_checked)

             Date        Open        High         Low       Close   Adj Close  \
0      12/12/1980    0.128348    0.128906    0.128348    0.128348    0.100453   
1      12/15/1980    0.122210    0.122210    0.121652    0.121652    0.095213   
2      12/16/1980    0.113281    0.113281    0.112723    0.112723    0.088224   
3      12/17/1980    0.115513    0.116071    0.115513    0.115513    0.090408   
4      12/18/1980    0.118862    0.119420    0.118862    0.118862    0.093029   
...           ...         ...         ...         ...         ...         ...   
10321  11/18/2021  153.710007  158.669998  153.050003  157.869995  157.869995   
10322  11/19/2021  157.649994  161.020004  156.529999  160.550003  160.550003   
10323  11/22/2021  161.679993  165.699997  161.000000  161.020004  161.020004   
10324  11/23/2021  161.119995  161.800003  159.059998  161.410004  161.410004   
10325  11/24/2021  160.750000  162.139999  159.639999  161.940002  161.940002   

          Volume  cond1  co

In [10]:
def simple_moving_average_5(close):
    return close.rolling(5, min_periods=1).mean()

In [11]:
def simple_moving_average_10(close):
    return close.rolling(10, min_periods=1).mean()

In [12]:
def stochastic_k(high, low, close):
    return (((close-low.rolling(14).min())/(high.rolling(14).max()-low.rolling(14).min()))*100)

In [13]:
def stochastic_d(stochastic_k):
    return stochastic_k.rolling(3,min_periods=1).mean()

In [14]:
def larry_williams_r(high, low, close):
    return (((high.rolling(14).max()-close)/(high.rolling(14).max()-low.rolling(14).min()))*100)

In [15]:
def rate_of_change(close):
    return ((close-close.shift(12))/close.shift(12))

In [16]:
def price_volume_trend(close, volume):
    return (((close-close.shift(1))/close.shift(1))*volume)

In [17]:
def accumulation_distribution_oscillator(high, low, close):
    return((high-close.shift(1))/(high-low))

In [18]:
def weighted_moving_average_10(close):
    return ((10*close+9*close.shift(1)+8*close.shift(2)+7*close.shift(3)
             +6*close.shift(4)+5*close.shift(5)+4*close.shift(6)+3*close.shift(7)
             +2*close.shift(8)+close.shift(9))/(10+9+8+7+6+5+4+3+2+1))

In [19]:
def exponential_moving_average_12(close):
    return (close.ewm(span=12, adjust=False).mean())

In [20]:
def exponential_moving_average_26(close):
    return (close.ewm(span=26, adjust=False).mean())

In [21]:
def moving_average_convergence_divergence(ema_12, ema_26):
    return (ema_12 - ema_26)

In [22]:
def momentum(close):
    return ((close/close.shift(14))*100)

In [23]:
def change(close):
    return (close.shift(-1) - close)

In [24]:
def direction(change):
    return (np.where(change>0, 1, 0))

In [25]:
def relative_strength_index(direction):
    return (100-(100/(1+(direction.rolling(14).sum()/14)/(14-direction.rolling(14).sum())/14)))

In [26]:
def channel_commodity_index(df, ndays): 
    df['TP'] = (df['High'] + df['Low'] + df['Close']) / 3 
    df['sma'] = df['TP'].rolling(ndays).mean()
    df['mad'] = df['TP'].rolling(ndays).apply(lambda x: pd.Series(x).mad())
    df['CCI'] = (df['TP'] - df['sma']) / (0.015 * df['mad']) 
    return df['CCI']

In [27]:
def disparity_index(close):
    return (((close - close.rolling(14).mean())/(close.rolling(14).mean()))*100)

In [28]:
def get_adx(high, low, close, lookback):
    plus_dm = high.diff()
    minus_dm = low.diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0
    
    tr1 = pd.DataFrame(high - low)
    tr2 = pd.DataFrame(abs(high - close.shift(1)))
    tr3 = pd.DataFrame(abs(low - close.shift(1)))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis = 1, join = 'inner').max(axis = 1)
    atr = tr.rolling(lookback).mean()
    
    plus_di = 100 * (plus_dm.ewm(alpha = 1/lookback).mean() / atr)
    minus_di = abs(100 * (minus_dm.ewm(alpha = 1/lookback).mean() / atr))
    dx = (abs(plus_di - minus_di) / abs(plus_di + minus_di)) * 100
    adx = ((dx.shift(1) * (lookback - 1)) + dx) / lookback
    adx_smooth = adx.ewm(alpha = 1/lookback).mean()
    return adx_smooth

In [29]:
def aroon(close):
    return (ta.trend.AroonIndicator(close, 25, True).aroon_indicator())

In [30]:
def compute_all_indicators(data):
    data['SMA5'] = simple_moving_average_5(data['Close'])
    data['SMA10'] = simple_moving_average_10(data['Close'])
    data['StochasticK'] = stochastic_k(data['High'], data['Low'], data['Close'])
    data['StochasticD'] = stochastic_d(data['StochasticK'])
    data['LarryWilliamsR'] = larry_williams_r(data['High'], data['Low'], data['Close'])
    data['ROC'] = rate_of_change(data['Close'])
    data['PVT'] = price_volume_trend(data['Close'], data['Volume'])
    data['ADO'] = accumulation_distribution_oscillator(data['High'], data['Low'], data['Close'])
    data['WMA10'] = weighted_moving_average_10(data['Close'])
    data['EMA12'] = exponential_moving_average_12(data['Close'])
    data['EMA26'] = exponential_moving_average_26(data['Close'])
    data['MACD'] = moving_average_convergence_divergence(data['EMA12'], data['EMA26'])
    data['Momentum'] = momentum(data['Close'])
    data['Change'] = change(data['Close'])
    data['Direction'] = direction(data['Change'])
    data['RSI'] = relative_strength_index(data['Direction'])
    data['CCI'] = channel_commodity_index(data, 14)
    data['DI'] = disparity_index(data['Close'])
    data['ADX'] = get_adx(data['High'], data['Low'], data['Close'], 14)
    data['Aroon'] = aroon(data['Close'])    
    return data

In [31]:
complete_data = compute_all_indicators(data)
complete_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,...,Change,Direction,RSI,TP,sma,mad,CCI,DI,ADX,Aroon
0,12/12/1980,0.128348,0.128906,0.128348,0.128348,0.100453,469033600,0.128348,0.128348,,...,-0.006696,0,,0.128534,,,,,,0.0
1,12/15/1980,0.122210,0.122210,0.121652,0.121652,0.095213,175884800,0.125000,0.125000,,...,-0.008929,0,,0.121838,,,,,,-4.0
2,12/16/1980,0.113281,0.113281,0.112723,0.112723,0.088224,105728000,0.120908,0.120908,,...,0.002790,1,,0.112909,,,,,,-8.0
3,12/17/1980,0.115513,0.116071,0.115513,0.115513,0.090408,86441600,0.119559,0.119559,,...,0.003349,1,,0.115699,,,,,,-8.0
4,12/18/1980,0.118862,0.119420,0.118862,0.118862,0.093029,73449600,0.119420,0.119420,,...,0.007254,1,,0.119048,,,,,,-8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10321,11/18/2021,153.710007,158.669998,153.050003,157.869995,157.869995,137827700,152.470001,151.067000,92.850734,...,2.680008,1,1.259446,156.529999,150.785238,1.340068,285.794377,4.643715,15.458962,96.0
10322,11/19/2021,157.649994,161.020004,156.529999,160.550003,160.550003,117147500,154.582001,151.994000,96.528798,...,0.470001,1,1.259446,159.366669,151.538572,2.062994,252.968849,5.839366,17.452579,96.0
10323,11/22/2021,161.679993,165.699997,161.000000,161.020004,161.020004,117467900,156.786001,153.052000,74.313981,...,0.390000,1,1.259446,162.573334,152.430953,3.129456,216.062687,5.602220,19.791095,28.0
10324,11/23/2021,161.119995,161.800003,159.059998,161.410004,161.410004,95933900,158.868002,154.112001,76.454485,...,0.529998,1,1.836394,160.756668,153.121190,3.825817,133.051798,5.368344,22.560291,32.0


In [32]:
complete_data = complete_data.dropna()

In [33]:
complete_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,...,Change,Direction,RSI,TP,sma,mad,CCI,DI,ADX,Aroon
14,1/5/1981,0.151228,0.151228,0.150670,0.150670,0.117924,35728000,0.154911,0.147433,78.162269,...,-0.006697,0,0.910010,0.150856,0.138991,0.015240,51.905087,8.558577,39.422947,32.0
15,1/6/1981,0.144531,0.144531,0.143973,0.143973,0.112683,45158400,0.151563,0.149219,64.367958,...,-0.006138,0,0.910010,0.144159,0.140585,0.014156,16.831178,2.555255,34.025492,32.0
16,1/7/1981,0.138393,0.138393,0.137835,0.137835,0.107879,55686400,0.147768,0.149777,48.781660,...,-0.002790,0,0.675676,0.138021,0.142379,0.012106,-23.998205,-3.055644,26.178222,32.0
17,1/8/1981,0.135603,0.135603,0.135045,0.135045,0.105695,39827200,0.144308,0.149498,38.158453,...,0.007254,1,0.675676,0.135231,0.143774,0.010511,-54.181713,-5.940912,19.002172,32.0
18,1/9/1981,0.142299,0.142857,0.142299,0.142299,0.111372,21504000,0.141964,0.149219,46.031972,...,-0.001116,0,0.507614,0.142485,0.145448,0.008807,-22.428997,-2.030802,15.894879,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10320,11/17/2021,151.000000,155.000000,150.990005,153.490005,153.490005,88807000,150.470001,150.376001,82.421470,...,4.379990,1,0.910010,153.160003,150.227143,1.070885,182.581771,2.130675,14.297264,96.0
10321,11/18/2021,153.710007,158.669998,153.050003,157.869995,157.869995,137827700,152.470001,151.067000,92.850734,...,2.680008,1,1.259446,156.529999,150.785238,1.340068,285.794377,4.643715,15.458962,96.0
10322,11/19/2021,157.649994,161.020004,156.529999,160.550003,160.550003,117147500,154.582001,151.994000,96.528798,...,0.470001,1,1.259446,159.366669,151.538572,2.062994,252.968849,5.839366,17.452579,96.0
10323,11/22/2021,161.679993,165.699997,161.000000,161.020004,161.020004,117467900,156.786001,153.052000,74.313981,...,0.390000,1,1.259446,162.573334,152.430953,3.129456,216.062687,5.602220,19.791095,28.0


In [34]:
complete_data = complete_data.drop(['TP', 'sma', 'mad', 'Change'], axis=1)

In [35]:
complete_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,...,EMA12,EMA26,MACD,Momentum,Direction,RSI,CCI,DI,ADX,Aroon
14,1/5/1981,0.151228,0.151228,0.150670,0.150670,0.117924,35728000,0.154911,0.147433,78.162269,...,0.145433,0.138131,0.007302,117.391779,0,0.910010,51.905087,8.558577,39.422947,32.0
15,1/6/1981,0.144531,0.144531,0.143973,0.143973,0.112683,45158400,0.151563,0.149219,64.367958,...,0.145208,0.138564,0.006645,118.348239,0,0.910010,16.831178,2.555255,34.025492,32.0
16,1/7/1981,0.138393,0.138393,0.137835,0.137835,0.107879,55686400,0.147768,0.149777,48.781660,...,0.144074,0.138510,0.005564,122.277619,0,0.675676,-23.998205,-3.055644,26.178222,32.0
17,1/8/1981,0.135603,0.135603,0.135045,0.135045,0.105695,39827200,0.144308,0.149498,38.158453,...,0.142685,0.138253,0.004432,116.908919,1,0.675676,-54.181713,-5.940912,19.002172,32.0
18,1/9/1981,0.142299,0.142857,0.142299,0.142299,0.111372,21504000,0.141964,0.149219,46.031972,...,0.142626,0.138553,0.004073,119.717824,0,0.507614,-22.428997,-2.030802,15.894879,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10320,11/17/2021,151.000000,155.000000,150.990005,153.490005,153.490005,88807000,150.470001,150.376001,82.421470,...,150.348567,149.181217,1.167349,100.603001,1,0.910010,182.581771,2.130675,14.297264,96.0
10321,11/18/2021,153.710007,158.669998,153.050003,157.869995,157.869995,137827700,152.470001,151.067000,92.850734,...,151.505710,149.824831,1.680879,105.387177,1,1.259446,285.794377,4.643715,15.458962,96.0
10322,11/19/2021,157.649994,161.020004,156.529999,160.550003,160.550003,117147500,154.582001,151.994000,96.528798,...,152.897139,150.619288,2.277852,107.780609,1,1.259446,252.968849,5.839366,17.452579,96.0
10323,11/22/2021,161.679993,165.699997,161.000000,161.020004,161.020004,117467900,156.786001,153.052000,74.313981,...,154.146811,151.389711,2.757100,107.332355,1,1.259446,216.062687,5.602220,19.791095,28.0


In [36]:
#corrMatrix = complete_data.corr()
#print(corrMatrix)

In [37]:
#sn.heatmap(corrMatrix, annot=False)
#plt.show()

In [38]:
#corr_pairs = corrMatrix.unstack()
#corr_pairs

In [39]:
#sorted_pairs = corr_pairs.sort_values(kind="quicksort")
#sorted_pairs

In [40]:
#strong_pairs = sorted_pairs[abs(sorted_pairs) > 0.5]

#print(strong_pairs)

In [41]:
## strong_pairs[strong_pairs.index[0][0] == strong_pairs.index[0][1]]

#removed_diagonal = [(i, j) for (i, j) in strong_pairs.index if i!=j]
#len(removed_diagonal)

In [42]:
## # Create correlation matrix
## corr_matrix = complete_data.corr().abs()

## # Select upper triangle of correlation matrix
## upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool8))

## # Find index of feature columns with correlation greater than 0.95
## to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
## to_drop

In [43]:
## Find index of feature columns with correlation greater than 0.8

#correlated_features = set()
#for i in range(len(corrMatrix.columns)):
 #   for j in range(i):
  #      if abs(corrMatrix.iloc[i, j]) > 0.8:
   #         colname = corrMatrix.columns[i]
    #        correlated_features.add(colname)

In [44]:
#correlated_features

In [45]:
#complete_data = complete_data.drop(labels=correlated_features, axis=1)

In [46]:
#complete_data

In [47]:
target = complete_data['Direction']
complete_data = complete_data.drop(['Date', 'Direction'], axis=1)
complete_data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA5,SMA10,StochasticK,StochasticD,...,WMA10,EMA12,EMA26,MACD,Momentum,RSI,CCI,DI,ADX,Aroon
14,0.151228,0.151228,0.150670,0.150670,0.117924,35728000,0.154911,0.147433,78.162269,81.610332,...,0.151826,0.145433,0.138131,0.007302,117.391779,0.910010,51.905087,8.558577,39.422947,32.0
15,0.144531,0.144531,0.143973,0.143973,0.112683,45158400,0.151563,0.149219,64.367958,75.862874,...,0.151197,0.145208,0.138564,0.006645,118.348239,0.910010,16.831178,2.555255,34.025492,32.0
16,0.138393,0.138393,0.137835,0.137835,0.107879,55686400,0.147768,0.149777,48.781660,63.770629,...,0.149127,0.144074,0.138510,0.005564,122.277619,0.675676,-23.998205,-3.055644,26.178222,32.0
17,0.135603,0.135603,0.135045,0.135045,0.105695,39827200,0.144308,0.149498,38.158453,50.436024,...,0.146449,0.142685,0.138253,0.004432,116.908919,0.675676,-54.181713,-5.940912,19.002172,32.0
18,0.142299,0.142857,0.142299,0.142299,0.111372,21504000,0.141964,0.149219,46.031972,44.324028,...,0.145140,0.142626,0.138553,0.004073,119.717824,0.507614,-22.428997,-2.030802,15.894879,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10320,151.000000,155.000000,150.990005,153.490005,153.490005,88807000,150.470001,150.376001,82.421470,67.809125,...,150.522365,150.348567,149.181217,1.167349,100.603001,0.910010,182.581771,2.130675,14.297264,96.0
10321,153.710007,158.669998,153.050003,157.869995,157.869995,137827700,152.470001,151.067000,92.850734,81.057205,...,151.884909,151.505710,149.824831,1.680879,105.387177,1.259446,285.794377,4.643715,15.458962,96.0
10322,157.649994,161.020004,156.529999,160.550003,160.550003,117147500,154.582001,151.994000,96.528798,90.600334,...,153.609091,152.897139,150.619288,2.277852,107.780609,1.259446,252.968849,5.839366,17.452579,96.0
10323,161.679993,165.699997,161.000000,161.020004,161.020004,117467900,156.786001,153.052000,74.313981,87.897838,...,155.250183,154.146811,151.389711,2.757100,107.332355,1.259446,216.062687,5.602220,19.791095,28.0


### autofeat Classification

In [48]:
from sklearn.model_selection import train_test_split

In [49]:
from autofeat import AutoFeatClassifier
X_train, X_test, y_train, y_test = train_test_split(complete_data,target,test_size=0.3)
model = AutoFeatClassifier()
df = model.fit_transform(X_train, y_train)
y_pred = model.predict(X_test)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
df_test = model.transform(X_test)
model.score(df_test,y_test)

In [None]:
df

In [None]:
df_test

### Generate New Features Based on autofeat Results

In [None]:
import math

In [None]:
def sqrt_rsi_momentum(rsi, momentum):
    return (np.sqrt(rsi)/momentum)

In [None]:
def sqrt_stochastic_d_rsi(stochastic_d, rsi):
    return(np.sqrt(stochastic_d)/rsi)

In [None]:
def sqrt_lwr_rsi (larry_williams_r, rsi):
    return (np.sqrt(larry_williams_r)*rsi)

In [None]:
def momentum_3_rsi(momentum, rsi):
    return ((momentum**3)/rsi)

In [None]:
def ado_macd_2(ado, macd):
    return (ado*(macd**2))

In [None]:
def adx_abs_roc(adx, roc):
    return (adx*np.abs(roc))

In [None]:
def sqrt_stochastic_k_log_rsi (stochastic_k, rsi):
    return ((np.sqrt(stochastic_k))*np.log(rsi))

In [None]:
def compute_adv_indicators(complete_data):
    complete_data['SqrtRSI/Momentum'] = sqrt_rsi_momentum(complete_data['RSI'], complete_data['Momentum'])
    complete_data['SqrtStochasticD/RSI'] = sqrt_stochastic_d_rsi(complete_data['StochasticD'], complete_data['RSI'])
    complete_data['SqrtLarryWilliamsR*RSI'] = sqrt_lwr_rsi(complete_data['LarryWilliamsR'], complete_data['RSI'])
    complete_data['Momentum**3/RSI'] = momentum_3_rsi(complete_data['Momentum'], complete_data['RSI'])
    complete_data['ADO*MACD**2'] = ado_macd_2(complete_data['ADO'], complete_data['MACD'])
    complete_data['ADX*AbsROC'] = adx_abs_roc(complete_data['ADX'], complete_data['ROC'])
    complete_data['SqrtStochasticK*LogRSI'] = sqrt_stochastic_k_log_rsi(complete_data['StochasticK'], complete_data['RSI'])   
    return complete_data

In [None]:
enhanced_data = compute_adv_indicators(complete_data)
enhanced_data

### autofeat Feature Selection

In [None]:
from autofeat import FeatureSelector
fsel = FeatureSelector(verbose=1)
selected_data = fsel.fit_transform(pd.DataFrame(enhanced_data), pd.DataFrame(target))

In [None]:
selected_data

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()  
scaler.fit(selected_data)

In [None]:

selected_scaled_data = scaler.transform(selected_data)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_scaled_data,target,test_size=0.3)

In [None]:
X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test) 

In [None]:
selected_scaled_data_df = pd.DataFrame(selected_scaled_data)
selected_scaled_data_1250 = selected_scaled_data_df.tail(1250)
selected_scaled_data_250 = selected_scaled_data_df.tail(250)
selected_scaled_data_125 = selected_scaled_data_df.tail(125)
target_1250 = target.tail(1250)
target_250 = target.tail(250)
target_125 = target.tail(125)

In [None]:
X_train_1250, X_test_1250, y_train_1250, y_test_1250 = train_test_split(selected_scaled_data_1250,target_1250,test_size=0.3)

In [None]:
X_train_250, X_test_250, y_train_250, y_test_250 = train_test_split(selected_scaled_data_250,target_250,test_size=0.3)

In [None]:
X_train_125, X_test_125, y_train_125, y_test_125 = train_test_split(selected_scaled_data_125,target_125,test_size=0.3)

### Logistic Regression

#### Max Duration

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [None]:
lr = LogisticRegression(solver='liblinear', multi_class='ovr')
trained_model_lr = lr.fit(X_train, y_train)
predictions_lr = trained_model_lr.predict(X_test)

Train_accuracy_lr = accuracy_score(y_train,trained_model_lr.predict(X_train))
Test_accuracy_lr = accuracy_score(y_test, predictions_lr)
Confusion_matrix_lr = confusion_matrix(y_test,predictions_lr)

In [None]:
Train_accuracy_lr

In [None]:
Test_accuracy_lr

#### 5 Years

In [None]:
trained_model_lr_1250 = lr.fit(X_train_1250, y_train_1250)
predictions_lr_1250 = trained_model_lr_1250.predict(X_test_1250)

Train_accuracy_lr_1250 = accuracy_score(y_train_1250,trained_model_lr_1250.predict(X_train_1250))
Test_accuracy_lr_1250 = accuracy_score(y_test_1250, predictions_lr_1250)
Confusion_matrix_lr_1250 = confusion_matrix(y_test_1250,predictions_lr_1250)

In [None]:
Train_accuracy_lr_1250

In [None]:
Test_accuracy_lr_1250

#### 1 Year

In [None]:
trained_model_lr_250 = lr.fit(X_train_250, y_train_250)
predictions_lr_250 = trained_model_lr_250.predict(X_test_250)

Train_accuracy_lr_250 = accuracy_score(y_train_250,trained_model_lr_250.predict(X_train_250))
Test_accuracy_lr_250 = accuracy_score(y_test_250, predictions_lr_250)
Confusion_matrix_lr_250 = confusion_matrix(y_test_250,predictions_lr_250)

In [None]:
Train_accuracy_lr_250

In [None]:
Test_accuracy_lr_250

#### 6 Months

In [None]:
trained_model_lr_125 = lr.fit(X_train_125, y_train_125)
predictions_lr_125 = trained_model_lr_125.predict(X_test_125)

Train_accuracy_lr_125 = accuracy_score(y_train_125,trained_model_lr_125.predict(X_train_125))
Test_accuracy_lr_125 = accuracy_score(y_test_125, predictions_lr_125)
Confusion_matrix_lr_125 = confusion_matrix(y_test_125,predictions_lr_125)

In [None]:
Train_accuracy_lr_125

In [None]:
Test_accuracy_lr_125

#### Last 100 Days based on Best Model - Max Duration

In [None]:
last100 = selected_scaled_data_df.tail(100)

In [None]:
predictions_lr_100 = trained_model_lr_1250.predict(last100)
predictions_lr_100

### SVM

In [None]:
svm = SVC(gamma='auto')
trained_model_svm = svm.fit(X_train, y_train)
predictions_svm = trained_model_svm.predict(X_test)

Train_accuracy_svm = accuracy_score(y_train,trained_model_svm.predict(X_train))
Test_accuracy_svm = accuracy_score(y_test, predictions_svm)
Confusion_matrix_svm = confusion_matrix(y_test,predictions_svm)

In [None]:
Train_accuracy_svm

In [None]:
Test_accuracy_svm

#### 5 Years

In [None]:
trained_model_svm_1250 = svm.fit(X_train_1250, y_train_1250)
predictions_svm_1250 = trained_model_svm_1250.predict(X_test_1250)

Train_accuracy_svm_1250 = accuracy_score(y_train_1250,trained_model_svm_1250.predict(X_train_1250))
Test_accuracy_svm_1250 = accuracy_score(y_test_1250, predictions_svm_1250)
Confusion_matrix_svm_1250 = confusion_matrix(y_test_1250,predictions_svm_1250)

In [None]:
Train_accuracy_svm_1250

In [None]:
Test_accuracy_svm_1250

#### 1 Year

In [None]:
trained_model_svm_250 = svm.fit(X_train_250, y_train_250)
predictions_svm_250 = trained_model_svm_1250.predict(X_test_250)

Train_accuracy_svm_250 = accuracy_score(y_train_250,trained_model_svm_250.predict(X_train_250))
Test_accuracy_svm_250 = accuracy_score(y_test_250, predictions_svm_250)
Confusion_matrix_svm_250 = confusion_matrix(y_test_250,predictions_svm_250)

In [None]:
Train_accuracy_svm_250

In [None]:
Test_accuracy_svm_250

#### 6 Months

In [None]:
trained_model_svm_125 = svm.fit(X_train_125, y_train_125)
predictions_svm_125 = trained_model_svm_1250.predict(X_test_125)

Train_accuracy_svm_125 = accuracy_score(y_train_125,trained_model_svm_125.predict(X_train_125))
Test_accuracy_svm_125 = accuracy_score(y_test_125, predictions_svm_125)
Confusion_matrix_svm_125 = confusion_matrix(y_test_125,predictions_svm_125)

In [None]:
Train_accuracy_svm_125

In [None]:
Test_accuracy_svm_125

#### Last 100 Days based on Best Model - Max Duration

In [None]:
predictions_svm_100 = trained_model_svm.predict(last100)
predictions_svm_100

### RandomForest

In [None]:
rf = RandomForestClassifier(n_estimators=10)
trained_model_rf = rf.fit(X_train, y_train)
predictions_rf = trained_model_rf.predict(X_test)

Train_accuracy_rf = accuracy_score(y_train, trained_model_rf.predict(X_train))
Test_accuracy_rf = accuracy_score(y_test, predictions_rf)
Confusion_matrix_rf = confusion_matrix(y_test, predictions_rf)

In [None]:
Train_accuracy_rf

In [None]:
Test_accuracy_rf

#### 5 Years

In [None]:
trained_model_rf_1250 = rf.fit(X_train_1250, y_train_1250)
predictions_rf_1250 = trained_model_rf_1250.predict(X_test_1250)

Train_accuracy_rf_1250 = accuracy_score(y_train_1250,trained_model_rf_1250.predict(X_train_1250))
Test_accuracy_rf_1250 = accuracy_score(y_test_1250, predictions_rf_1250)
Confusion_matrix_rf_1250 = confusion_matrix(y_test_1250,predictions_rf_1250)

In [None]:
Train_accuracy_rf_1250

In [None]:
Test_accuracy_rf_1250

#### 1 Year

In [None]:
trained_model_rf_250 = rf.fit(X_train_250, y_train_250)
predictions_rf_250 = trained_model_rf_250.predict(X_test_250)

Train_accuracy_rf_250 = accuracy_score(y_train_250,trained_model_rf_250.predict(X_train_250))
Test_accuracy_rf_250 = accuracy_score(y_test_250, predictions_rf_250)
Confusion_matrix_rf_250 = confusion_matrix(y_test_250,predictions_rf_250)

In [None]:
Train_accuracy_rf_250

In [None]:
Test_accuracy_rf_250

#### 6 Months

In [None]:
trained_model_rf_125 = rf.fit(X_train_125, y_train_125)
predictions_rf_125 = trained_model_rf_125.predict(X_test_125)

Train_accuracy_rf_125 = accuracy_score(y_train_125,trained_model_rf_125.predict(X_train_125))
Test_accuracy_rf_125 = accuracy_score(y_test_125, predictions_rf_125)
Confusion_matrix_rf_125 = confusion_matrix(y_test_125,predictions_rf_125)

In [None]:
Train_accuracy_rf_125

In [None]:
Test_accuracy_rf_125

#### Last 100 Days based on Best Model

In [None]:
predictions_rf_100 = trained_model_rf_125.predict(last100)
predictions_rf_100

### KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
trained_model_knn = knn.fit(X_train, y_train)
predictions_knn = trained_model_knn.predict(X_test)

Train_accuracy_knn = accuracy_score(y_train, trained_model_knn.predict(X_train))
Test_accuracy_knn = accuracy_score(y_test, predictions_knn)
Confusion_matrix_knn = confusion_matrix(y_test, predictions_knn)

In [None]:
Train_accuracy_knn

In [None]:
Test_accuracy_knn

#### 5 Years

In [None]:
trained_model_knn_1250 = knn.fit(X_train_1250, y_train_1250)
predictions_knn_1250 = trained_model_knn_1250.predict(X_test_1250)

Train_accuracy_knn_1250 = accuracy_score(y_train_1250,trained_model_knn_1250.predict(X_train_1250))
Test_accuracy_knn_1250 = accuracy_score(y_test_1250, predictions_knn_1250)
Confusion_matrix_knn_1250 = confusion_matrix(y_test_1250,predictions_knn_1250)

In [None]:
Train_accuracy_knn_1250

In [None]:
Test_accuracy_knn_1250

#### 1 Year

In [None]:
trained_model_knn_250 = knn.fit(X_train_250, y_train_250)
predictions_knn_250 = trained_model_knn_250.predict(X_test_250)

Train_accuracy_knn_250 = accuracy_score(y_train_250,trained_model_knn_250.predict(X_train_250))
Test_accuracy_knn_250 = accuracy_score(y_test_250, predictions_knn_250)
Confusion_matrix_knn_250 = confusion_matrix(y_test_250,predictions_knn_250)

In [None]:
Train_accuracy_knn_250

In [None]:
Test_accuracy_knn_250

#### 6 Months

In [None]:
trained_model_knn_125 = knn.fit(X_train_125, y_train_125)
predictions_knn_125 = trained_model_knn_125.predict(X_test_125)

Train_accuracy_knn_125 = accuracy_score(y_train_125,trained_model_knn_125.predict(X_train_125))
Test_accuracy_knn_125 = accuracy_score(y_test_125, predictions_knn_125)
Confusion_matrix_knn_125 = confusion_matrix(y_test_125,predictions_knn_125)

In [None]:
Train_accuracy_knn_125

In [None]:
Test_accuracy_knn_125

#### Last 100 Days based on Best Model

In [None]:
predictions_knn_100 = trained_model_knn.predict(last100)
predictions_knn_100

### XGBoost

In [None]:
xgb = XGBClassifier(use_label_encoder=False)
trained_model_xgb = xgb.fit(X_train, y_train)
predictions_xgb = trained_model_xgb.predict(X_test)

Train_accuracy_xgb = accuracy_score(y_train, trained_model_xgb.predict(X_train))
Test_accuracy_xgb = accuracy_score(y_test, predictions_xgb)
Confusion_matrix_xgb = confusion_matrix(y_test, predictions_xgb)

In [None]:
Train_accuracy_xgb

In [None]:
Test_accuracy_xgb

#### 5 Years

In [None]:
trained_model_xgb_1250 = xgb.fit(X_train_1250, y_train_1250)
predictions_xgb_1250 = trained_model_xgb_1250.predict(X_test_1250)

Train_accuracy_xgb_1250 = accuracy_score(y_train_1250,trained_model_xgb_1250.predict(X_train_1250))
Test_accuracy_xgb_1250 = accuracy_score(y_test_1250, predictions_xgb_1250)
Confusion_matrix_xgb_1250 = confusion_matrix(y_test_1250,predictions_xgb_1250)

In [None]:
Train_accuracy_xgb_1250

In [None]:
Test_accuracy_xgb_1250

#### 1 Year

In [None]:
trained_model_xgb_250 = xgb.fit(X_train_250, y_train_250)
predictions_xgb_250 = trained_model_xgb_250.predict(X_test_250)

Train_accuracy_xgb_250 = accuracy_score(y_train_250,trained_model_xgb_250.predict(X_train_250))
Test_accuracy_xgb_250 = accuracy_score(y_test_250, predictions_xgb_250)
Confusion_matrix_xgb_250 = confusion_matrix(y_test_250,predictions_xgb_250)

In [None]:
Train_accuracy_xgb_250

In [None]:
Test_accuracy_xgb_250

#### 6 Months

In [None]:
trained_model_xgb_125 = xgb.fit(X_train_125, y_train_125)
predictions_xgb_125 = trained_model_xgb_125.predict(X_test_125)

Train_accuracy_xgb_125 = accuracy_score(y_train_125,trained_model_xgb_125.predict(X_train_125))
Test_accuracy_xgb_125 = accuracy_score(y_test_125, predictions_xgb_125)
Confusion_matrix_xgb_125 = confusion_matrix(y_test_125,predictions_xgb_125)

In [None]:
Train_accuracy_xgb_125

In [None]:
Test_accuracy_xgb_125

#### Last 100 Days based on Best Model

In [None]:
predictions_xgb_100 = trained_model_xgb.predict(last100)
predictions_xgb_100

## KFold cross validation
### Basic example

In [None]:
from sklearn.model_selection import cross_val_score

cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'), complete_data, target, cv=10)

In [None]:
cross_val_score(SVC(gamma='auto'), complete_data, target, cv=10)

In [None]:
cross_val_score(RandomForestClassifier(n_estimators=5), complete_data, target, cv=10)

In [None]:
cross_val_score(KNeighborsClassifier(n_neighbors=5), complete_data, target, cv=10)

In [None]:
cross_val_score(XGBClassifier(use_label_encoder=False), complete_data, target, cv=10)