In [138]:
import pandas as pd
import pandas_ta as ta
import numpy as np

In [139]:
# Reading the data
columns = ['index', 'date', 'open', 'high', 'low', 'close', 'volume']
df = pd.read_csv('/content/Nifty 50_ONE_DAY.csv', names=columns)
df = df.drop(columns=['index', 'volume'])
df.head()

Unnamed: 0,date,open,high,low,close
0,2016-10-03T00:00:00+05:30,8666.15,8745.2,8635.0,8738.1
1,2016-10-04T00:00:00+05:30,8770.0,8783.65,8736.1,8769.15
2,2016-10-05T00:00:00+05:30,8806.35,8806.95,8731.4,8743.95
3,2016-10-06T00:00:00+05:30,8768.7,8781.15,8684.65,8709.55
4,2016-10-07T00:00:00+05:30,8721.7,8723.7,8663.8,8697.6


In [140]:
# Technical Indicators
df['ATR'] = df.ta.atr(length=20)
df['RSI'] = df.ta.rsi()
df['Average'] = df.ta.midprice(length=1)
df['MA40'] = df.ta.sma(length=40)
df['MA80'] = df.ta.sma(length=80)
df['MA160'] = df.ta.sma(length=160)

In [141]:
df.tail()

Unnamed: 0,date,open,high,low,close,ATR,RSI,Average,MA40,MA80,MA160
1755,2023-11-20T00:00:00+05:30,19731.15,19756.45,19670.5,19694.0,157.518013,58.279253,19713.475,19494.90625,19550.108125,19032.099062
1756,2023-11-21T00:00:00+05:30,19770.9,19829.1,19754.05,19783.4,156.397112,61.106924,19791.575,19497.635,19550.236875,19049.797188
1757,2023-11-22T00:00:00+05:30,19784.0,19825.55,19703.85,19811.85,154.662256,61.989814,19764.7,19501.0675,19551.6325,19066.866875
1758,2023-11-23T00:00:00+05:30,19828.45,19875.15,19786.75,19802.0,151.349143,61.469539,19830.95,19504.5,19553.6825,19082.130937
1759,2023-11-24T00:00:00+05:30,19809.6,19832.85,19768.85,19794.7,146.981686,61.06052,19800.85,19506.45625,19554.356875,19097.11


In [142]:
pipdiff = 150 #for TP
SLTPRatio = 1 #pipdiff/Ratio gives SL

def mytarget(barsupfront, df1):
    length = len(df1)
    high = list(df1['high'])
    low = list(df1['low'])
    close = list(df1['close'])
    open = list(df1['open'])
    trendcat = [None] * length

    for line in range (0,length-barsupfront-2):
        valueOpenLow = 0
        valueOpenHigh = 0
        for i in range(1,barsupfront+2):
            value1 = open[line+1]-low[line+i]
            value2 = open[line+1]-high[line+i]
            valueOpenLow = max(value1, valueOpenLow)
            valueOpenHigh = min(value2, valueOpenHigh)

            if ( (valueOpenLow >= pipdiff) and (-valueOpenHigh <= (pipdiff/SLTPRatio)) ):
                trendcat[line] = 1 #-1 downtrend
                break
            elif ( (valueOpenLow <= (pipdiff/SLTPRatio)) and (-valueOpenHigh >= pipdiff) ):
                trendcat[line] = 2 # uptrend
                break
            else:
                trendcat[line] = 0 # no clear trend

    return trendcat
df['mytarget'] = mytarget(100, df)
df.groupby('mytarget').count()


Unnamed: 0_level_0,date,open,high,low,close,ATR,RSI,Average,MA40,MA80,MA160
mytarget,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0.0,15,15,15,15,15,15,15,15,15,15,15
1.0,788,788,788,788,788,769,775,788,756,741,735
2.0,855,855,855,855,855,854,854,855,848,823,749


In [143]:
df.dropna(inplace=True)

In [144]:
df_model = df[['ATR', 'RSI','Average','MA40','MA80','MA160']]
X = df_model[:]
y = df['mytarget']

In [145]:
print(len(X), len(y))

1499 1499


In [146]:
# Splitting data into training and testing
train_index = int(0.8 * len(X))
X_train, X_test = X[:train_index], X[train_index:]
y_train, y_test = y[:train_index], y[train_index:]

In [147]:
# Training the model

from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=200)
model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = np.array(model.predict(X_test))

In [148]:
# Testing the model

from sklearn.metrics import accuracy_score
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print("Accuracy train: %.2f%%" % (accuracy_train * 100.0))
print("Accuracy test: %.2f%%" % (accuracy_test * 100.0))

# Gambler Model for comparision
pred_test = np.random.choice([0, 1, 2], len(y_pred_test))
accuracy_test = accuracy_score(y_test, pred_test)
print("Accuracy Gambler: %.2f%%" % (accuracy_test * 100.0))

Accuracy train: 54.38%
Accuracy test: 52.00%
Accuracy Gambler: 36.00%
