In [3]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [4]:
# Load the dataset
df = pd.read_csv('pnb_stock_price.csv')

In [5]:
# Prepare the data for modeling
df['Change'] = df['Close Price'] - df['Open Price']
df['Direction'] = np.where(df['Change'] > 0, 1, -1)
df['Volatility'] = df['High Price']-df['Low Price']
df.index = pd.to_datetime(df['Date'])
df = df.drop(['Date', 'High Price', 'Low Price', 'Symbol', 'Series', 'Prev Close', 'Last Price', 'Average Price', 'Total Traded Quantity', 'Turnover', 'No. of Trades', 'Deliverable Qty'], axis=1)

# Rate of Change Indicator

The Formula for the Price Rate of Change Indicator Is:
n = 7
ROC = ((ClosingP - ClosingPn)/ClosingPn)*100

<!-- where:
ClosingP = Closing price of most recent period
ClosingPn = Closing price n periods before most recent period -->

In [6]:
# Rate of Change Indicator Code

def rateOfChange(ddf):
    count = 0
    tidel = 7  #time delay
    list = []

    for i in ddf['Close Price']:
        count += 1
        if(count<tidel):
            list.append(0.0)
        else:
            list.append(round(((i - ddf['Close Price'][count-tidel])/ddf['Close Price'][count-tidel])*100,2))
    return list


df['ROC'] = rateOfChange(df)

# Relative Strength Index
RSI = (100 - (100/(1-(AverageGain/AverageLoss))))
n=7

In [7]:
# Relative Strength Index Code

def relativeStrengthIndex(ddf):
    count=0
    nDelay = 7
    Gains = []
    Loss = []
    avgGain = []
    avgLoss = []

    #For Seperating Gain and Loss
    for i in ddf['Change']:
        count += 1
        if(i<0):
            Gains.append(0.0)
            Loss.append(round(i,2))
        else:
            Gains.append(round(i,2))
            Loss.append(0.0)

    #For Average Gain and Loss

    for i in range(0,len(Gains)):
        if(i<nDelay):
            avgGain.append(0.0)
            avgLoss.append(0.0)
        else:
            avgGain.append(sum(Gains[i-nDelay:i])/len(Gains[i-nDelay:i]))
            avgLoss.append(sum(Loss[i-nDelay:i])/len(Loss[i-nDelay:i]))


    #Calculate RSI
    RSI = []
    for i in range(0, len(Gains)):
        if(i<2*nDelay):
            RSI.append(0.0)
        else:
            RSI.append(round(100 - (100/(1-(avgGain[i]/(avgLoss[i]+0.000000001)))),2))

    return RSI


df['RSI'] = relativeStrengthIndex(df)

In [8]:
# Split the data into training and testing sets
X = df.drop('Direction', axis=1).values
y = df['Direction'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

In [9]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100)

In [11]:
# Train the classifier on the training data
clf.fit(X_train, y_train)

In [12]:
# Predict the labels of the test data
y_pred = clf.predict(X_test)

In [13]:
# Evaluate the accuracy of the model
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)


Accuracy: 1.0


# Testing Model with New Data

In [14]:
test_data = pd.read_csv('Latest_Test_PNBALLN.csv')

In [15]:
# Prepare the data for modeling
test_data['Change'] = test_data['Close Price'] - test_data['Open Price']
test_data['Direction'] = np.where(test_data['Change'] > 0, 1, -1)
test_data['Volatility'] = test_data['High Price']-test_data['Low Price']
test_data.index = pd.to_datetime(test_data['Date'])
test_data = test_data.drop(['Date', 'High Price', 'Low Price', 'Symbol', 'Series', 'Prev Close', 'Last Price', 'Average Price', 'Total Traded Quantity', 'Turnover', 'No. of Trades', 'Deliverable Qty'], axis=1)

In [16]:
# Rate of Change Indicator Code

test_data['ROC'] = rateOfChange(test_data)

In [17]:
# Relative Strength Index Code

test_data['RSI'] = relativeStrengthIndex(test_data)

In [18]:
# Divide Data into Features and Output
test_data_X = test_data.drop('Direction', axis=1).values
test_data_y = test_data['Direction'].values

In [19]:
# Scale the data
scaler = StandardScaler()
test_data_X = scaler.fit_transform(test_data_X)

In [20]:
prediction = clf.predict(test_data_X)
print("Prediction:", prediction)

Prediction: [ 1  1 -1 -1 -1  1  1 -1  1  1]


In [21]:
print("Outputs_y:", test_data_y)

Outputs_y: [-1 -1 -1 -1 -1 -1  1 -1  1  1]


In [22]:
# Accuracy
accuracy = clf.score(test_data_X, test_data_y)
accuracy

0.7