*** 1: Importing libraries ***

In [3]:
import investpy
from datetime import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from scipy import stats 
import numpy as np
import pandas as pd
import talib


***2:Fetching the data***



In [31]:
usdinr = investpy.get_currency_cross_historical_data(currency_cross='USD/INR', from_date='01/12/2019', to_date='29/10/2020')

dfusd = investpy.get_commodity_historical_data(commodity='Silver', from_date='01/12/2019', to_date='29/10/2020')

df = investpy.commodities.get_commodity_historical_data(commodity='MCX Silver',from_date='01/12/2019',to_date='29/10/2020', country = "India")

df['High'] = df['High'] / 1000
df['Low'] = df['Low'] / 1000
df['Open'] = df['Open'] / 1000
df['Close'] = df['Close'] / 1000

df.drop(['Currency'], axis = 1)

df['usdinr'] = usdinr['Close']

df['usdsilver'] = dfusd['Close']



In [7]:
print(df.tail(5))

              Open    High     Low   Close  Volume Currency  usdinr  usdsilver
Date                                                                          
2020-10-23  62.658  63.066  62.063  62.449   21726      INR  73.829     24.675
2020-10-26  61.720  62.480  61.251  61.906   25339      INR  73.960     24.420
2020-10-27  62.341  62.580  61.510  62.281   20830      INR  73.622     24.570
2020-10-28  62.060  62.500  59.100  60.138   38872      INR  74.180     23.359
2020-10-29  60.069  60.334  58.475  58.671   19977      INR  74.438     22.780


***3. Calculating the z-scores of the closing prices***

In [8]:
df['zscore'] = stats.zscore(df['Close'])

print(df['zscore'].tail(30))



Date
2020-09-17    1.727376
2020-09-18    1.699703
2020-09-21    1.014561
2020-09-22    1.003805
2020-09-23    0.719243
2020-09-24    0.838394
2020-09-25    0.775529
2020-09-28    0.918489
2020-09-29    1.134652
2020-09-30    0.868677
2020-10-01    0.996704
2020-10-05    1.079828
2020-10-06    0.936763
2020-10-07    0.920891
2020-10-08    0.931333
2020-10-09    1.178302
2020-10-12    1.200649
2020-10-13    0.933735
2020-10-14    1.044532
2020-10-15    1.037431
2020-10-16    1.052155
2020-10-19    1.095909
2020-10-20    1.203364
2020-10-21    1.256100
2020-10-22    1.150211
2020-10-23    1.132876
2020-10-26    1.076173
2020-10-27    1.115333
2020-10-28    0.891547
2020-10-29    0.738353
Name: zscore, dtype: float64


***4. Calculating the pivot points, support and resistance levels ***

In [9]:
pd.set_option('display.max_columns', 10)
PP = pd.Series((df['High'] + df['Low'] + df['Close']) / 3)  
R1 = pd.Series(2 * PP - df['Low'])  
S1 = pd.Series(2 * PP - df['High'])  
R2 = pd.Series(PP + df['High'] - df['Low'])  
S2 = pd.Series(PP - df['High'] + df['Low'])  
R3 = pd.Series(df['High'] + 2 * (PP - df['Low']))  
S3 = pd.Series(df['Low'] - 2 * (df['High'] - PP))  
psr = {'PP':PP, 'R1':R1, 'S1':S1, 'R2':R2, 'S2':S2, 'R3':R3, 'S3':S3}  
PSR = pd.DataFrame(psr)  
print(PSR.tail())


                   PP         R1         S1         R2         S2         R3  \
Date                                                                           
2020-10-23  62.526000  62.989000  61.986000  63.529000  61.523000  63.992000   
2020-10-26  61.879000  62.507000  61.278000  63.108000  60.650000  63.736000   
2020-10-27  62.123667  62.737333  61.667333  63.193667  61.053667  63.807333   
2020-10-28  60.579333  62.058667  58.658667  63.979333  57.179333  65.458667   
2020-10-29  59.160000  59.845000  57.986000  61.019000  57.301000  61.704000   

                   S3  
Date                   
2020-10-23  60.983000  
2020-10-26  60.049000  
2020-10-27  60.597333  
2020-10-28  55.258667  
2020-10-29  56.127000  


*** 5 Calculating high/low percentage change ***

In [10]:
df[ 'PCT_Change' ] = (df[ 'Close' ] - df[ 'Open' ]) / df[ 'Open' ]


In [11]:
df[ 'HL_PCT' ] = (df[ 'High' ] - df[ 'Low' ]) / df[ 'Close' ]
print(df.tail())


              Open    High     Low   Close  Volume  ...  usdinr  usdsilver  \
Date                                                ...                      
2020-10-23  62.658  63.066  62.063  62.449   21726  ...  73.829     24.675   
2020-10-26  61.720  62.480  61.251  61.906   25339  ...  73.960     24.420   
2020-10-27  62.341  62.580  61.510  62.281   20830  ...  73.622     24.570   
2020-10-28  62.060  62.500  59.100  60.138   38872  ...  74.180     23.359   
2020-10-29  60.069  60.334  58.475  58.671   19977  ...  74.438     22.780   

              zscore  PCT_Change    HL_PCT  
Date                                        
2020-10-23  1.132876   -0.003336  0.016061  
2020-10-26  1.076173    0.003014  0.019853  
2020-10-27  1.115333   -0.000962  0.017180  
2020-10-28  0.891547   -0.030970  0.056537  
2020-10-29  0.738353   -0.023273  0.031685  

[5 rows x 11 columns]


*** 6. Calculating the moving average ***

In [12]:

df[ '5d_ma' ] = round(df[ 'Close' ].rolling( window = 5 , min_periods = 0 ).mean(),2)
df[ '20d_ma' ] = round(df[ 'Close' ].rolling( window = 20 , min_periods = 0 ).mean(),2)
df['50d_ma'] = round(df[ 'Close' ].rolling(window = 50, min_periods = 0).mean(),2)
df['100d_ma'] = round(df[ 'Close' ].rolling(window = 100, min_periods = 0).mean(),2)
df['200d_ma'] = round(df[ 'Close' ].rolling(window = 200, min_periods = 0).mean(),2)

#df[ '20d_ma' ] = round(df.Close.ewm(span=21, adjust=False).mean(),2)
#df['50d_ma'] = round(df.Close.ewm(span=49, adjust=False).mean(),2)

#df['100d_ma'] = round(df.Close.ewm(span=98, adjust=False).mean(),2)
#df['200d_ma'] = round(df.Close.ewm(span=196, adjust=False).mean(),2)

print(df[['Close','5d_ma','20d_ma','50d_ma','100d_ma','200d_ma']].tail(7))



             Close  5d_ma  20d_ma  50d_ma  100d_ma  200d_ma
Date                                                       
2020-10-21  63.629  62.41   61.24   64.75    59.85    52.17
2020-10-22  62.615  62.63   61.44   64.67    59.99    52.24
2020-10-23  62.449  62.78   61.58   64.49    60.12    52.32
2020-10-26  61.906  62.74   61.73   64.39    60.27    52.40
2020-10-27  62.281  62.58   61.82   64.25    60.41    52.48
2020-10-28  60.138  61.88   61.70   64.06    60.53    52.55
2020-10-29  58.671  61.09   61.64   63.88    60.64    52.61


***7. Calculating standard deviation ***

In [13]:
df['Std_dev']= df['Close'].rolling(7).std()  
print(df['Std_dev'].tail(7))


Date
2020-10-21    1.042806
2020-10-22    0.820813
2020-10-23    0.756501
2020-10-26    0.692300
2020-10-27    0.604622
2020-10-28    1.110273
2020-10-29    1.688569
Name: Std_dev, dtype: float64


***8. Calculating the RSI level***

In [14]:
df['RSI'] = talib.RSI(df['Close'].values, timeperiod = 9)    
print(df[['RSI']].tail())


                  RSI
Date                 
2020-10-23  50.779743
2020-10-26  47.052882
2020-10-27  49.909126
2020-10-28  37.057182
2020-10-29  30.924481


***9. Calculating the williams % R ***

In [15]:
df['Williams %R'] = talib.WILLR(df['High'].values,df['Low'].values, df['Close'].values, 7)
 
print(df[['Williams %R']].tail(7))


            Williams %R
Date                   
2020-10-21   -10.376471
2020-10-22   -34.235294
2020-10-23   -38.141176
2020-10-26   -74.801244
2020-10-27   -61.838922
2020-10-28   -79.114688
2020-10-29   -96.496872


***10. Average Directional Index ***

In [16]:
df['ADX'] = talib.ADX(df['High'].values,df['Low'].values, df['Close'].values, 7)


print(df[['ADX']].tail(7))


                  ADX
Date                 
2020-10-21  18.008233
2020-10-22  16.015403
2020-10-23  14.307263
2020-10-26  14.954488
2020-10-27  15.141639
2020-10-28  19.828925
2020-10-29  24.610467


***11. MACD ***

In [17]:
ShortEMA = df.Close.ewm(span=12, adjust=False).mean() #AKA Fast moving average
#Calculate the Long Term Exponential Moving Average
LongEMA = df.Close.ewm(span=26, adjust=False).mean() #AKA Slow moving average
#Calculate the Moving Average Convergence/Divergence (MACD)
MACD = ShortEMA - LongEMA
#Calcualte the signal line
signal = MACD.ewm(span=9, adjust=False).mean()

df['MACD'] = MACD

df['Signal Line'] = signal

df['MACD_IND'] = df['MACD'] - df['Signal Line']

print(df[['MACD_IND']].tail())


            MACD_IND
Date                
2020-10-23  0.338732
2020-10-26  0.266362
2020-10-27  0.236591
2020-10-28  0.073847
2020-10-29 -0.120736


***12. Bollinger Bands***

In [18]:

from talib import MA_Type

upper, middle, lower = talib.BBANDS(df['Close'].values, matype=MA_Type.T3)

print(round(upper[-1],2),round(middle[-1],2),round(lower[-1],2))


64.54 61.62 58.69


In [19]:
df.dropna( inplace = True )


***Predicting the movement***

In [21]:

tslagret = pd.DataFrame(index=df.index)
tslagret["Today"] = df["Close"]
no_lags = 6
for i in range(0, no_lags):
    tslagret["Lag%s" % str(i + 1)] = df["Close"].shift(i + 1)
    

df_ret = pd.DataFrame(index=tslagret.index)
df_ret["Today"] = tslagret["Today"].pct_change()*100.0

for i in range(0, no_lags):
    df_ret["Lag%s" % str(i + 1)] = tslagret["Lag%s" % str(i + 1)].pct_change() * 100.0

df_ret.drop(df_ret.index[:7], inplace=True)


df_ret["Direction"] = np.sign(df_ret["Today"])

df_ret["Direction"] = np.where(df_ret["Direction"] <= 0, -1 , 1)

#df_ret["Direction"] = np.where(df['Close'].shift(-1) > df['Close'], 1,-1)



data = df_ret.copy()

X = data[["Lag1", "Lag2", "Lag3", "Lag4","Lag5","Lag6"]]

X['usdinr'] = df['usdinr']

X['usdsilver'] = df['usdsilver']

X["20d_ma"] = df[ '20d_ma' ]

X["50d_ma"] =  df['50d_ma']

X["100d_ma"] =  df['100d_ma']

X["200d_ma"] =  df['200d_ma']

X['Std_dev'] = df['Std_dev']

X['RSI'] = df['RSI']

X['Williams %R'] = df['Williams %R']

X['MACD_IND'] = df['MACD_IND'] 


y = data["Direction"]

print(X.tail())

data.describe()



                Lag1      Lag2      Lag3      Lag4      Lag5  ...  200d_ma  \
Date                                                          ...            
2020-10-23 -1.593613  0.800013  1.657138  0.679357  0.229138  ...    52.32   
2020-10-26 -0.265112 -1.593613  0.800013  1.657138  0.679357  ...    52.40   
2020-10-27 -0.869510 -0.265112 -1.593613  0.800013  1.657138  ...    52.48   
2020-10-28  0.605757 -0.869510 -0.265112 -1.593613  0.800013  ...    52.55   
2020-10-29 -3.440857  0.605757 -0.869510 -0.265112 -1.593613  ...    52.61   

             Std_dev        RSI  Williams %R  MACD_IND  
Date                                                    
2020-10-23  0.756501  50.779743   -38.141176  0.338732  
2020-10-26  0.692300  47.052882   -74.801244  0.266362  
2020-10-27  0.604622  49.909126   -61.838922  0.236591  
2020-10-28  1.110273  37.057182   -79.114688  0.073847  
2020-10-29  1.688569  30.924481   -96.496872 -0.120736  

[5 rows x 16 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['usdinr'] = df['usdinr']


Unnamed: 0,Today,Lag1,Lag2,Lag3,Lag4,Lag5,Lag6,Direction
count,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0
mean,0.141321,0.151664,0.169252,0.169608,0.182378,0.191538,0.200151,0.089202
std,2.66247,2.65667,2.64515,2.645214,2.646739,2.648558,2.645707,0.99836
min,-11.221052,-11.221052,-11.221052,-11.221052,-11.221052,-11.221052,-11.221052,-1.0
25%,-0.903496,-0.86951,-0.850223,-0.850223,-0.844359,-0.844359,-0.816205,-1.0
50%,0.169328,0.169328,0.195046,0.195046,0.21262,0.219325,0.229138,1.0
75%,1.205706,1.205706,1.205706,1.205706,1.301824,1.323359,1.323359,1.0
max,7.031671,7.031671,7.031671,7.031671,7.031671,7.031671,7.031671,1.0


In [22]:
start_test = datetime(2020, 8, 1)

X_train = X[X.index < start_test]
X_test = X[X.index >= start_test]
y_train = y[y.index < start_test]
y_test = y[y.index >= start_test]


In [23]:

model = LogisticRegression()


In [24]:
model.fit(X_train, y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [25]:
y_pred = model.predict(X_test)


In [26]:
print('Accuracy: ',metrics.accuracy_score(y_test, y_pred))


Accuracy:  0.8253968253968254


In [27]:
print(confusion_matrix(y_pred,y_test))


[[22  1]
 [10 30]]


In [28]:
comp = pd.DataFrame()
comp['y_test'] = y_test
comp['y_pred'] = y_pred


In [29]:

print(comp)

predict = model.predict(X_test.tail(1))
print(int(predict)) # 1: UP, -1: DOWN


            y_test  y_pred
Date                      
2020-08-03       1       1
2020-08-04       1       1
2020-08-05       1       1
2020-08-06       1       1
2020-08-07      -1      -1
...            ...     ...
2020-10-23      -1       1
2020-10-26      -1      -1
2020-10-27       1       1
2020-10-28      -1      -1
2020-10-29      -1      -1

[63 rows x 2 columns]
-1
