In [1]:
import numpy as np
import pandas as pd
import pywt
import matplotlib.pyplot as plt

import statsmodels.tsa.stattools as ts
from sklearn.preprocessing import normalize
from statsmodels.tsa.stattools import adfuller
import seaborn as sns

n=16

In [2]:
def getWeights_FFD(series,d,thres):
    w=[1.]
    for k in range(1,series.shape[0]):
        w_=-w[-1]/k*(d-k+1)
        w.append(w_)
    w1=[v for i,v in enumerate(w) if np.absolute(v)>thres]
    w1=np.array(w1[::-1]).reshape(-1,1)
    return w1


def fracDiff_FFD(series,d,thres=0.00006):
    w=getWeights_FFD(series,d,thres)
    width=len(w)-1
    df={}
    for name in series.columns:
        seriesF,df_=series[[name]].fillna(method='ffill').dropna(),pd.Series()
        for iloc1 in range(width,seriesF.shape[0]):
            loc0,loc1=seriesF.index[iloc1-width],seriesF.index[iloc1]
            if not np.isfinite(series.loc[loc1,name]):continue 
            df_[loc1]=np.dot(w.T,seriesF.loc[loc0:loc1])[0,0]
        df[name+"_fd"]=df_.copy(deep=True)
    df=pd.concat(df,axis=1)
    return df

In [3]:
def GetMin_FFD(Stock,thres,d_0,d_1,tick='Close'):
    from statsmodels.tsa.stattools import adfuller
    path,instName='ABSOLUTE_PATH_TO_YOUR_DATA', Stock
    out0=pd.DataFrame(columns=['adfStat','pVal','lags','nObs','95% conf','corr'])
    df0=pd.read_csv(path+instName+'.csv',index_col=['Date'],parse_dates=['Date'])
    df1=np.log(df0[[tick]]).resample('1D').last() 
    df1=df1.dropna()

    d0=d_0
    d1=d_1
    
    count=10
    
    delta =10
    while delta > 0.01:
        if count==10 or count==0:
            df2=fracDiff_FFD(df1,d0,thres)
            df2=adfuller(df2[tick+'_fd'],maxlag=1,regression='c',autolag=None)
            adS0= df2[0] 
        
        if count==10 or count==1:

            df2=fracDiff_FFD(df1,d1,thres)
            df2=adfuller(df2[tick+'_fd'],maxlag=1,regression='c',autolag=None)
            adS1= df2[0] 

        d2=(d0+d1)/2
        df2=fracDiff_FFD(df1,(d0+d1)/2,thres)
        corr=np.corrcoef(df1.loc[df2.index,tick],df2[tick+'_fd'])[0,1]
        df2=adfuller(df2[tick+'_fd'],maxlag=1,regression='c',autolag=None)
        adS3= df2[0] 
        
        if adS3>df2[4]['5%']:
            d0 = (d0+d1)/2
            count=0
        else:
            d1= (d0+d1)/2
            count=1
        
        delta = abs(adS3-df2[4]['5%'])
        print(delta, d2,adS3,corr)
        
    return d2




In [4]:
print(GetMin_FFD('stock_name',0.00005,0,1,'Close'))

111111
2222222
3333333
9.17365314545536 0.5 -12.036021897767611 0.6694795464901911
111111
2222222
3333333
0.37394660117695855 0.25 -3.236440013748731 0.9249061760062991
111111
2222222
3333333
1.5688149446308852 0.125 -1.2937450900979495 0.9841407281853407
111111
2222222
3333333
0.7224169304935373 0.1875 -2.1401209182901635 0.9619084614962062
111111
2222222
3333333
0.10185851300874615 0.21875 -2.7606577645075743 0.9466294063766781
111111
2222222
3333333
0.0650918285639217 0.234375 -2.9275966991691535 0.9357044876944031
111111
2222222
3333333
0.005744269105367739 0.2265625 -2.856766434417339 0.9409921965444354
0.2265625


In [5]:
print(GetMin_FFD('stock_name',0.00005,0,1,'Open'))

111111
2222222
3333333
10.327692609086732 0.5 -13.190061361398984 0.6590003383610292
111111
2222222
3333333
0.6267402445692287 0.25 -3.489233657141001 0.9242713336426573
111111
2222222
3333333
1.4701405108400658 0.125 -1.3924195238887689 0.9841142951013546
111111
2222222
3333333
0.5930770251882547 0.1875 -2.269460823595446 0.9617660753601409
111111
2222222
3333333
0.08699605179200853 0.21875 -2.949512329308329 0.9462936957470208
111111
2222222
3333333
0.32628323789204483 0.203125 -2.5362443818854925 0.9544949310450571
111111
2222222
3333333
0.1435076189486817 0.2109375 -2.7190142969452453 0.9506025042192969
111111
2222222
3333333
0.05386267416770618 0.21484375 -2.808656580281252 0.9485560515288239
111111
2222222
3333333
0.06253246654111466 0.216796875 -2.7999854625883516 0.9475147711384526
111111
2222222
3333333
0.09510621446041245 0.2177734375 -2.957623151952084 0.9468854942226697
111111
2222222
3333333
0.0013822434005916762 0.21728515625 -2.8638998417605217 0.9472141048441636
0.21728

In [10]:
print(GetMin_FFD('stock_name',0.00005,0,1,'High'))

111111
2222222
3333333
0.012072094847887627 0.22807397643 -2.850437631629379 0.9402161196828843
111111
2222222
3333333
0.012072094821578894 0.22807397643150001 -2.8504376316556876 0.9402161196819929
111111
2222222
3333333
0.01148208725479849 0.22807397643225003 -2.873991488487185 0.9401540302467787
111111
2222222
3333333
0.012072094814993495 0.22807397643187502 -2.850437631662273 0.94021611968177
111111
2222222
3333333
0.012072094811693468 0.2280739764320625 -2.850437631665573 0.9402161196816585
111111
2222222
3333333
0.011482087253151807 0.22807397643215627 -2.8739914884855384 0.9401540302468331
111111
2222222
3333333
0.011482087252330242 0.2280739764321094 -2.873991488484717 0.9401540302468624
111111
2222222
3333333
0.011482087251919015 0.22807397643208593 -2.8739914884843056 0.9401540302468757
111111
2222222
3333333
0.012072094811492295 0.22807397643207422 -2.8504376316657742 0.9402161196816514
111111
2222222
3333333
0.012072094811408363 0.22807397643208008 -2.850437631665858 0.9402

KeyboardInterrupt: 

In [11]:
print(GetMin_FFD('stock_name',0.00005,0,1,'Low'))

111111
2222222
3333333
9.390759546879533 0.5 -12.253128299191784 0.6658072866512323
111111
2222222
3333333
0.3847966050941882 0.25 -3.2472900176659607 0.9247560918447176
111111
2222222
3333333
1.572135684867882 0.125 -1.2904243498609527 0.9841146944650836
111111
2222222
3333333
0.7334465708984532 0.1875 -2.1290912778852475 0.9618064365401868
111111
2222222
3333333
0.08440426256538514 0.21875 -2.7781120149509353 0.9464140129624674
111111
2222222
3333333
0.05843885875080934 0.234375 -2.920943729356041 0.9355701448232719
111111
2222222
3333333
0.003862521350406656 0.2265625 -2.8586481821723 0.9408384836843372
0.2265625


In [17]:
def lowpassfilter(signal, thresh = 0.2, wavelet="sym5", mode='symmetric'):
    thresh = thresh*np.nanmax(signal)
    coeff = pywt.wavedec(signal, wavelet, mode)
    reconstructed_signal = pywt.waverec(coeff[:-4]+ [None] * 4, wavelet, mode)
    return reconstructed_signal

def lowpassfilter4(signal, thresh = 0.2, wavelet="bior1.1", mode='symmetric'):
#     thresh = thresh*np.nanmax(signal)
    coeff = pywt.wavedec(signal, wavelet, mode)
#     print(coeff, type(coeff),coeff[:-1]+ [None] * 1)   #, [np.array([v if i<8 else 0 for i,v in enumerate(coeff[0])])])
    reconstructed_signal = pywt.waverec(coeff[:-1]+ [None] * 1, wavelet, mode)
#     print(len(reconstructed_signal),len(signal))
    return reconstructed_signal

def wt_component(signal, wavelet="bior1.1", mode='symmetric'):
    coeff = pywt.wavedec(signal, wavelet, mode)
    return coeff 

def wt_component_haar(signal, wavelet="haar", mode='symmetric'):
    coeff = pywt.wavedec(signal, wavelet, mode)
    return coeff 

def wt_component_rbio(signal, wavelet="rbio6.8", mode='symmetric'):
    coeff = pywt.wavedec(signal, wavelet, mode)
    return coeff 

def wt_component_array(signal, wavelet="bior1.1", mode='symmetric'):
    coeff = pywt.wavedec(signal, wavelet, mode)
    coeff = pywt.coeffs_to_array(coeff, padding=0, axes=None)[0][:]
    coeff = normalize(coeff.reshape(1, -1), norm="l1")
    return coeff 

In [10]:
df = pd.read_csv('ABBSOLUTE_PATH_TO_YOUR_STOCK_DATA', parse_dates=['Date'], index_col=["Date"])
df_log=pd.DataFrame()
df_fd=pd.DataFrame()


In [11]:
df_log['Open']= np.log(df.Open)
df_log['High']= np.log(df.High)
df_log['Low']= np.log(df.Low)
df_log['Close']= np.log(df.Close)
df_log['Volume']= np.log(df.Volume)


In [12]:
df_fd=df

In [13]:
df_fd[['Open_dff']]=fracDiff_FFD(df_log[['Open']].resample('1D').last().dropna(),d=0.21728515625,thres=0.00005)
df_fd[['High_dff']]=fracDiff_FFD(df_log[['High']].resample('1D').last().dropna(),d=0.22807397643208566 ,thres=0.00005)
df_fd[['Low_dff']]=fracDiff_FFD(df_log[['Low']].resample('1D').last().dropna(),d=0.2265625,thres=0.00005)
df_fd[['Close_dff']]=fracDiff_FFD(df_log[['Close']].resample('1D').last().dropna(),d=0.2265625,thres=0.00005)

In [14]:
df_fd[['Open_log']]=np.log(df[['Open']])
df_fd[['High_log']]=np.log(df[['High']])
df_fd[['Low_log']]=np.log(df[['Low']])
df_fd[['Close_log']]=np.log(df[['Close']])
df_fd[['Volume_log']]=np.log(df[['Volume']])

In [None]:
df_fd['spring_effect']=np.nan
df_fd['spring_effect_Open']=np.nan
df_fd['spring_effect_High']=np.nan
df_fd['spring_effect_Low']=np.nan

df_fd['spring_effect_2']=np.nan
df_fd['spring_effect_3']=np.nan
df_fd['spring_effect_4']=np.nan
df_fd['spring_effect_5']=np.nan
df_fd['spring_effect_10']=np.nan
df_fd['spring_effect_11']=np.nan
df_fd['spring_effect_12']=np.nan
df_fd['spring_effect_13']=np.nan
df_fd['spring_effect_14']=np.nan
df_fd['spring_effect_15']=np.nan
df_fd['spring_effect_16']=np.nan

df_fd.loc[264:,['spring_effect']] = [[(df_fd['Close_dff'][i]-lowpassfilter(df_fd['Close_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['Close_dff'][i+1-264:i+1])[264-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_2']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(2).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(2).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_3']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(3).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(3).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_4']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(4).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(4).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_5']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(5).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(5).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_10']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(10).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(10).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_11']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(11).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(11).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_12']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(12).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(12).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_13']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(13).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(13).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_14']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(14).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(14).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_15']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(15).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(15).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_16']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(16).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(16).mean()[-1]] for i in range(264,len(df_fd))]

df_fd.loc[264:,['spring_effect_Open']] = [[(df_fd['Open_dff'][i]-lowpassfilter(df_fd['Open_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['Open_dff'][i-264+1:i+1])[264-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_Low']] = [[(df_fd['Low_dff'][i]-lowpassfilter(df_fd['Low_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['Low_dff'][i-264+1:i+1])[264-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_High']] = [[(df_fd['High_dff'][i]-lowpassfilter(df_fd['High_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['High_dff'][i-264+1:i+1])[264-1]] for i in range(264,len(df_fd))]

In [19]:
df_fd['Smile_Close_2']=df_fd['Close_dff'].rolling(2).std().diff(periods=1)
df_fd['Smile_Close_3']=df_fd['Close_dff'].rolling(3).std().diff(periods=1)
df_fd['Smile_Close_4']=df_fd['Close_dff'].rolling(4).std().diff(periods=1)
df_fd['Smile_Close_5']=df_fd['Close_dff'].rolling(5).std().diff(periods=1)
df_fd['Smile_Close_6']=df_fd['Close_dff'].rolling(6).std().diff(periods=1)
df_fd['Smile_Close_7']=df_fd['Close_dff'].rolling(7).std().diff(periods=1)
df_fd['Smile_Close_8']=df_fd['Close_dff'].rolling(8).std().diff(periods=1)
df_fd['Smile_Close_9']=df_fd['Close_dff'].rolling(9).std().diff(periods=1)
df_fd['Smile_Close_10']=df_fd['Close_dff'].rolling(10).std().diff(periods=1)
df_fd['Smile_Close_11']=df_fd['Close_dff'].rolling(11).std().diff(periods=1)
df_fd['Smile_Close_12']=df_fd['Close_dff'].rolling(12).std().diff(periods=1)
df_fd['Smile_Close_13']=df_fd['Close_dff'].rolling(13).std().diff(periods=1)
df_fd['Smile_Close_14']=df_fd['Close_dff'].rolling(14).std().diff(periods=1)
df_fd['Smile_Close_15']=df_fd['Close_dff'].rolling(15).std().diff(periods=1)
df_fd['Smile_Close_16']=df_fd['Close_dff'].rolling(16).std().diff(periods=1)
df_fd['Smile_Close_17']=df_fd['Close_dff'].rolling(17).std().diff(periods=1)
df_fd['Smile_Close_18']=df_fd['Close_dff'].rolling(18).std().diff(periods=1)
df_fd['Smile_Close_19']=df_fd['Close_dff'].rolling(19).std().diff(periods=1)
df_fd['Smile_Close_20']=df_fd['Close_dff'].rolling(20).std().diff(periods=1)
df_fd['Smile_Close_21']=df_fd['Close_dff'].rolling(21).std().diff(periods=1)
df_fd['Smile_Close_22']=df_fd['Close_dff'].rolling(22).std().diff(periods=1)
df_fd['Smile_Close_30']=df_fd['Close_dff'].rolling(30).std().diff(periods=1)
df_fd['Smile_Close_40']=df_fd['Close_dff'].rolling(40).std().diff(periods=1)

In [30]:
df_fd['bin']= np.nan 

df_fd.loc[:-1,['bin']] = [[1] if (df_fd['Close_dff'][i+1]-df_fd['Close_dff'][i])>=0.01 else [-1] for i in range(0,len(df_fd)-1)]

In [31]:
df_fd1=df_fd

In [35]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import BaggingRegressor

from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

from sklearn.naive_bayes import GaussianNB

from sklearn.utils import resample
from joblib import dump, load
from imblearn.ensemble import BalancedBaggingClassifier,BalancedRandomForestClassifier,BalanceCascade
from sklearn.multiclass import OneVsRestClassifier

In [37]:
df_fd=df_fd1

In [33]:
df_fd=df_fd.dropna()

In [102]:
df_fd.to_csv('~/PycharmProjects/Trading_1/Stock-Trading-Environment/data/features.csv')

In [None]:
from sklearn.utils import resample
from joblib import dump, load
from imblearn.ensemble import BalancedBaggingClassifier,BalancedRandomForestClassifier,BalanceCascade
from sklearn.multiclass import OneVsRestClassifier

    
for i in range(0,61):   # Optional : you can go for one model, i choose to average 61 models to improve accuracy and avoid overfitting  

        df1_100=df_fd[['spring_effect','spring_effect_Open','spring_effect_High', 'spring_effect_Low',
                   'spring_effect_2', 'spring_effect_3','spring_effect_4', 'spring_effect_5', 'spring_effect_10',
                   'spring_effect_11', 'spring_effect_12','spring_effect_13', 'spring_effect_14', 'spring_effect_15', 
                   'spring_effect_16', 'Smile_Close_2',  'Smile_Close_3', 'Smile_Close_4',
                   'Smile_Close_5', 'Smile_Close_6', 'Smile_Close_7', 'Smile_Close_8', 'Smile_Close_9', 'Smile_Close_10', 
                   'Smile_Close_11', 'Smile_Close_12', 'Smile_Close_13', 'Smile_Close_14', 'Smile_Close_15',
                   'Smile_Close_16', 'Smile_Close_17', 'Smile_Close_18', 'Smile_Close_19', 'Smile_Close_20',
                   'Smile_Close_21', 'Smile_Close_22', 'Smile_Close_30', 'Smile_Close_40',
                   
                    'bin']][:-48] 


        df1_100_p= df1_100[df1_100['bin']==1]
        df1_100_n= df1_100[df1_100['bin']==-1]
        
        df1_100_n_ = resample(df1_100_n, 
                                 replace=False,     # sample with replacement
                                 n_samples=int(df1_100.bin.value_counts()[1]),    # to match majority class
                                 random_state=None) # reproducible results
#         print(df1_100.bin.value_counts()[1])
#         mask1 = np.random.rand(len(df1_100_n)) < 0.19  # 1111111111111
#         df1_100_n_ = df1_100_n[mask1] # 111111111111111
        df1_100= pd.concat([df1_100_p,df1_100_n_]).sort_index()
        df1_100=df1_100.dropna()

#         print(df1_100.bin.value_counts())

    # for i in range(0,101):

        y_col_glass = 'bin'
        x_cols_glass = list(df1_100.columns.values)
        x_cols_glass.remove(y_col_glass)
        
        X_train = df1_100[x_cols_glass].values
        Y_train = df1_100[y_col_glass].values

        
    #     train_test_ratio = 0.96
#         df_train, df_test, X_train, Y_train, X_test, Y_test = get_train_test1(df1_100,aaa, y_col_glass, x_cols_glass)

    #     print(df_test)
    #     cls = BalancedBaggingClassifier(n_estimators=1000) #,random_state=777)
        cls = BalancedRandomForestClassifier(n_estimators=1000,class_weight='balanced_subsample', criterion='entropy')

    #     cls = BClassifier(n_estimators=1000)
        cls.fit(X_train, Y_train)
#         test_score = cls.score(X_test, Y_test)
        probas = cls.predict_proba(X_test) 
        print("Test Score for the dataset {}".format(i))

        df_proba.loc[L, 'pro_'+str(i)]=probas[:,1]
        dump(cls, 'models_1_'+str(i)+'.joblib')

In [256]:
df_proba.to_csv('~/PycharmProjects/Trading_1/Stock-Trading-Environment/data/proba_1.csv')

In [None]:
# For making predictions :

df = pd.read_csv('ABSOLUTE_PATH_TO_YOUR_CSV_FILE', parse_dates=['Date'], index_col=["Date"])[-1110:]
df_log=pd.DataFrame()
df_fd=pd.DataFrame()

df_log['Open']= np.log(df.Open)
df_log['High']= np.log(df.High)
df_log['Low']= np.log(df.Low)
df_log['Close']= np.log(df.Close)
df_log['Volume']= np.log(df.Volume)

df_fd=df

df_fd[['Open_dff']]=fracDiff_FFD(df_log[['Open']].resample('1D').last().dropna(),d=0.21728515625,thres=0.00005)
df_fd[['High_dff']]=fracDiff_FFD(df_log[['High']].resample('1D').last().dropna(),d=0.22807397643208566 ,thres=0.00005)
df_fd[['Low_dff']]=fracDiff_FFD(df_log[['Low']].resample('1D').last().dropna(),d=0.2265625,thres=0.00005)
df_fd[['Close_dff']]=fracDiff_FFD(df_log[['Close']].resample('1D').last().dropna(),d=0.2265625,thres=0.00005)

df_fd[['Open_log']]=np.log(df[['Open']])
df_fd[['High_log']]=np.log(df[['High']])
df_fd[['Low_log']]=np.log(df[['Low']])
df_fd[['Close_log']]=np.log(df[['Close']])
df_fd[['Volume_log']]=np.log(df[['Volume']])


print ('step1')
#############
df_fd['spring_effect']=np.nan
df_fd['spring_effect_Open']=np.nan
df_fd['spring_effect_High']=np.nan
df_fd['spring_effect_Low']=np.nan
df_fd['spring_effect_2']=np.nan
df_fd['spring_effect_3']=np.nan
df_fd['spring_effect_4']=np.nan
df_fd['spring_effect_5']=np.nan
df_fd['spring_effect_10']=np.nan
df_fd['spring_effect_11']=np.nan
df_fd['spring_effect_12']=np.nan
df_fd['spring_effect_13']=np.nan
df_fd['spring_effect_14']=np.nan
df_fd['spring_effect_15']=np.nan
df_fd['spring_effect_16']=np.nan


df_fd.loc[264:,['spring_effect']] = [[(df_fd['Close_dff'][i]-lowpassfilter(df_fd['Close_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['Close_dff'][i+1-264:i+1])[264-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_2']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(2).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(2).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_3']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(3).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(3).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_4']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(4).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(4).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_5']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(5).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(5).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_10']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(10).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(10).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_11']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(11).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(11).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_12']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(12).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(12).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_13']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(13).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(13).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_14']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(14).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(14).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_15']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(15).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(15).mean()[-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_16']] = [[(df_fd['Close_dff'][i]-df_fd['Close_dff'][i-264+1:i+1].rolling(16).mean()[-1])/
                                  df_fd['Close_dff'][i-264+1:i+1].rolling(16).mean()[-1]] for i in range(264,len(df_fd))]

df_fd.loc[264:,['spring_effect_Open']] = [[(df_fd['Open_dff'][i]-lowpassfilter(df_fd['Open_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['Open_dff'][i-264+1:i+1])[264-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_Low']] = [[(df_fd['Low_dff'][i]-lowpassfilter(df_fd['Low_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['Low_dff'][i-264+1:i+1])[264-1]] for i in range(264,len(df_fd))]
df_fd.loc[264:,['spring_effect_High']] = [[(df_fd['High_dff'][i]-lowpassfilter(df_fd['High_dff'][i-264+1:i+1])[264-1])/
                                  lowpassfilter(df_fd['High_dff'][i-264+1:i+1])[264-1]] for i in range(264,len(df_fd))]
print("spring done")

############
df_fd['Smile_Close_2']=df_fd['Close_dff'].rolling(2).std().diff(periods=1)
df_fd['Smile_Close_3']=df_fd['Close_dff'].rolling(3).std().diff(periods=1)
df_fd['Smile_Close_4']=df_fd['Close_dff'].rolling(4).std().diff(periods=1)
df_fd['Smile_Close_5']=df_fd['Close_dff'].rolling(5).std().diff(periods=1)
df_fd['Smile_Close_6']=df_fd['Close_dff'].rolling(6).std().diff(periods=1)
df_fd['Smile_Close_7']=df_fd['Close_dff'].rolling(7).std().diff(periods=1)
df_fd['Smile_Close_8']=df_fd['Close_dff'].rolling(8).std().diff(periods=1)
df_fd['Smile_Close_9']=df_fd['Close_dff'].rolling(9).std().diff(periods=1)
df_fd['Smile_Close_10']=df_fd['Close_dff'].rolling(10).std().diff(periods=1)
df_fd['Smile_Close_11']=df_fd['Close_dff'].rolling(11).std().diff(periods=1)
df_fd['Smile_Close_12']=df_fd['Close_dff'].rolling(12).std().diff(periods=1)
df_fd['Smile_Close_13']=df_fd['Close_dff'].rolling(13).std().diff(periods=1)
df_fd['Smile_Close_14']=df_fd['Close_dff'].rolling(14).std().diff(periods=1)
df_fd['Smile_Close_15']=df_fd['Close_dff'].rolling(15).std().diff(periods=1)
df_fd['Smile_Close_16']=df_fd['Close_dff'].rolling(16).std().diff(periods=1)
df_fd['Smile_Close_17']=df_fd['Close_dff'].rolling(17).std().diff(periods=1)
df_fd['Smile_Close_18']=df_fd['Close_dff'].rolling(18).std().diff(periods=1)
df_fd['Smile_Close_19']=df_fd['Close_dff'].rolling(19).std().diff(periods=1)
df_fd['Smile_Close_20']=df_fd['Close_dff'].rolling(20).std().diff(periods=1)
df_fd['Smile_Close_21']=df_fd['Close_dff'].rolling(21).std().diff(periods=1)
df_fd['Smile_Close_22']=df_fd['Close_dff'].rolling(22).std().diff(periods=1)
df_fd['Smile_Close_30']=df_fd['Close_dff'].rolling(30).std().diff(periods=1)
df_fd['Smile_Close_40']=df_fd['Close_dff'].rolling(40).std().diff(periods=1)


##################

df_fd['bin']= np.nan  
df_fd.loc[:-1,['bin']] = [[1] if (df_fd['Close_dff'][i+1]-df_fd['Close_dff'][i])>=0.01 else [-1] for i in range(0,len(df_fd)-1)]



In [None]:
df_test1= df_fd[['spring_effect',
                           'spring_effect_Open','spring_effect_High', 'spring_effect_Low',
                       'spring_effect_2', 'spring_effect_3','spring_effect_4', 'spring_effect_5', 'spring_effect_10',
                       'spring_effect_11', 'spring_effect_12','spring_effect_13', 'spring_effect_14', 'spring_effect_15', 
                       'spring_effect_16', 'Smile_Close_2',  'Smile_Close_3', 'Smile_Close_4',
                       'Smile_Close_5', 'Smile_Close_6', 'Smile_Close_7', 'Smile_Close_8', 'Smile_Close_9', 'Smile_Close_10', 
                       'Smile_Close_11', 'Smile_Close_12', 'Smile_Close_13', 'Smile_Close_14', 'Smile_Close_15',
                       'Smile_Close_16', 'Smile_Close_17', 'Smile_Close_18', 'Smile_Close_19', 'Smile_Close_20',
                       'Smile_Close_21', 'Smile_Close_22', 'Smile_Close_30', 'Smile_Close_40',
                        'bin' ]][-3:].copy() 
df2 = df_test1[['bin']].copy()

cls_rn_for = load('models_1'.joblib') 
y_col_glass = 'bin'
x_cols_glass = list(df_test1.columns.values)
x_cols_glass.remove(y_col_glass)

X_test1 = df_test1[x_cols_glass].values

predictions = cls_rn_for.predict(X_test1)

probas = cls_rn_for.predict_proba(X_test1) 

df2['pro_']= probas[:,1]


In [48]:
df2.to_csv('~/PycharmProjects/Trading_1/Stock-Trading-Environment/data/prediction_1.csv')