<a href="https://colab.research.google.com/github/queenakki/AI-ML/blob/project/Stockmarket.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Stock market data of Reliance companies**

In [46]:
import pandas as pd
import numpy as ny
import matplotlib.pyplot as mlt

# **importing the data**

In [47]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Loading the data**

In [48]:
stock=pd.read_csv("/content/drive/MyDrive/RELIANCE.NS.csv")
stock.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1/1/1996,15.859429,16.783918,11.763055,13.673923,9.001143,3587399513
1,2/1/1996,13.538537,18.950085,13.499856,16.389366,10.788639,6910292355
2,3/1/1996,16.517015,18.412411,14.892391,16.00255,10.534009,5434431895
3,4/1/1996,16.072178,20.307806,15.936792,18.926874,12.459007,4130762758
4,5/1/1996,20.655939,20.880293,17.449242,18.439487,12.138176,5496793040


# **satestical information**

In [49]:
stock.shape

(325, 7)

In [50]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 325 entries, 0 to 324
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       325 non-null    object 
 1   Open       325 non-null    float64
 2   High       325 non-null    float64
 3   Low        325 non-null    float64
 4   Close      325 non-null    float64
 5   Adj Close  325 non-null    float64
 6   Volume     325 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 17.9+ KB


pie chart

# **Binary conversion**

In [51]:
timefmt = "%d/%m/%y %S:%M:%H" 
stock['Date'] = pd.to_datetime(stock['Date'], format = timefmt, errors='coerce')

In [52]:
stock.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,NaT,15.859429,16.783918,11.763055,13.673923,9.001143,3587399513
1,NaT,13.538537,18.950085,13.499856,16.389366,10.788639,6910292355
2,NaT,16.517015,18.412411,14.892391,16.00255,10.534009,5434431895
3,NaT,16.072178,20.307806,15.936792,18.926874,12.459007,4130762758
4,NaT,20.655939,20.880293,17.449242,18.439487,12.138176,5496793040


# **Categorical conversion**

In [53]:
# Dropping the first column from status dataset
status = pd.get_dummies(stock['Date'], drop_first = True)

# Adding the status to the original housing dataframe
stock = pd.concat([stock, status], axis = 1)

# Dropping 'furnishingstatus' as we have created the dummies for it
stock.drop(['Date'], axis = 1, inplace = True)

stock

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,15.859429,16.783918,11.763055,13.673923,9.001143,3587399513
1,13.538537,18.950085,13.499856,16.389366,10.788639,6910292355
2,16.517015,18.412411,14.892391,16.002550,10.534009,5434431895
3,16.072178,20.307806,15.936792,18.926874,12.459007,4130762758
4,20.655939,20.880293,17.449242,18.439487,12.138176,5496793040
...,...,...,...,...,...,...
320,2582.649902,2629.699951,2311.000000,2377.750000,2377.750000,118852012
321,2391.500000,2560.949951,2343.100098,2549.600098,2549.600098,94498167
322,2600.000000,2745.449951,2502.000000,2731.350098,2731.350098,104414767
323,2741.800049,2755.000000,2700.600098,2722.149902,2722.149902,9726291


# **checking null value**

In [54]:
stock.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

# **outerline direction**

In [55]:
def detect_outliers_zscore(data):
    outliers = []
    thres = 3
    mean = ny.mean(data)
    std = ny.std(data)
    #print(mean, std)
    for i in data:
        #print(i)
        z_score = (i-mean)/std
        if (ny.abs(z_score) > thres):
            outliers.append(i)
    return outliers# Driver code

In [56]:
sample_outliers_Open = detect_outliers_zscore(stock['Open'])
sample_outliers_Volume = detect_outliers_zscore(stock['Volume'])
sample_outliers_Low = detect_outliers_zscore(stock['Low'])
print("Outliers in tata-volume column from Z-scores method: ", sample_outliers_Open)
print("Outliers in reliance-volume from Z-scores method: ", sample_outliers_Volume)
print("Outliers in amazon-volume column from Z-scores method: ", sample_outliers_Low)

Outliers in tata-volume column from Z-scores method:  [2636.0, 2762.0, 2634.300049, 2574.899902, 2582.649902, 2600.0, 2741.800049, 2730.0]
Outliers in reliance-volume from Z-scores method:  [3587399513, 6910292355, 5434431895, 4130762758, 5496793040, 6504446914, 4061157843, 3331175205]
Outliers in amazon-volume column from Z-scores method:  [2495.0, 2521.800049, 2445.0, 2507.600098, 2502.0, 2700.600098, 2700.600098]


In [57]:
from scipy.stats import zscore
z1 = ny.abs(zscore(stock['Low']))
z2 = ny.abs(zscore(stock['Close']))
z3 = ny.abs(zscore(stock['Volume']))
stock['Zscore_tata-volume'] = z1
stock['Zscore_Reliance-Volume'] = z2
stock['Zscore_Reliance-Volume.1'] = z3
outliers_tata_volume=len(stock[stock['Zscore_tata-volume']>3])
outliers_Reliance_Volume=len(stock[stock['Zscore_Reliance-Volume']>3])
outliers_Reliance_Volume_1=len(stock[stock['Zscore_Reliance-Volume.1']>3])
data_z = stock[stock['Zscore_tata-volume']<3]
data_z = stock[stock['Zscore_Reliance-Volume']<3]
data_z = stock[stock['Zscore_Reliance-Volume.1']<3]
stock=data_z.drop(['Zscore_tata-volume', 'Zscore_Reliance-Volume', 'Zscore_Reliance-Volume.1'], axis=1)
stock

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
5,18.799227,19.452944,16.064442,16.524752,11.329605,2949582293
7,16.861280,17.019875,15.020040,16.366158,11.220869,2675395277
8,16.408707,16.625324,14.343113,14.780215,10.133524,1704430739
9,14.853709,17.089502,13.964034,15.627340,10.714327,2351973533
10,15.739517,16.710423,13.441833,14.025925,9.616371,1750434601
...,...,...,...,...,...,...
320,2582.649902,2629.699951,2311.000000,2377.750000,2377.750000,118852012
321,2391.500000,2560.949951,2343.100098,2549.600098,2549.600098,94498167
322,2600.000000,2745.449951,2502.000000,2731.350098,2731.350098,104414767
323,2741.800049,2755.000000,2700.600098,2722.149902,2722.149902,9726291


# **Missing value treatement**

In [58]:
m1=stock['Open'].mean()
m2=stock['Volume'].mean()
m3=stock['Low'].mean()
stock['Open'].fillna(value=m1, inplace=True)
stock['Volume'].fillna(value=m2, inplace=True)
stock['Low'].fillna(value=m3, inplace=True)
stock

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
5,18.799227,19.452944,16.064442,16.524752,11.329605,2949582293
7,16.861280,17.019875,15.020040,16.366158,11.220869,2675395277
8,16.408707,16.625324,14.343113,14.780215,10.133524,1704430739
9,14.853709,17.089502,13.964034,15.627340,10.714327,2351973533
10,15.739517,16.710423,13.441833,14.025925,9.616371,1750434601
...,...,...,...,...,...,...
320,2582.649902,2629.699951,2311.000000,2377.750000,2377.750000,118852012
321,2391.500000,2560.949951,2343.100098,2549.600098,2549.600098,94498167
322,2600.000000,2745.449951,2502.000000,2731.350098,2731.350098,104414767
323,2741.800049,2755.000000,2700.600098,2722.149902,2722.149902,9726291


# **Data normalization**

In [59]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
stock_norm = scaler.fit_transform(stock)

stock_norm


array([[1.76653432e-03, 1.10194627e-03, 9.75978614e-04, 9.00081165e-04,
        6.17992233e-04, 9.12624120e-01],
       [1.06132762e-03, 2.45179433e-04, 5.87314495e-04, 8.42955373e-04,
        5.78769335e-04, 8.27634424e-01],
       [8.96639159e-04, 1.06244527e-04, 3.35402649e-04, 2.71696369e-04,
        1.86545759e-04, 5.26664737e-01],
       ...,
       [9.41049221e-01, 9.61018760e-01, 9.26092891e-01, 9.78784169e-01,
        9.81774997e-01, 3.07081009e-02],
       [9.92649365e-01, 9.64381659e-01, 1.00000000e+00, 9.75470244e-01,
        9.78456332e-01, 1.35753259e-03],
       [9.88355402e-01, 9.61916719e-01, 1.00000000e+00, 9.75470244e-01,
        9.78456332e-01, 0.00000000e+00]])

In [60]:
x1=stock.drop('High',axis=1)

y1=stock['High']

print(type(x1))
print(type(y1))
print(x1.shape)
print(y1.shape)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
(317, 5)
(317,)


# **Data Split**

In [61]:
from sklearn.model_selection import train_test_split


In [62]:
x1_train,x1_test,y1_train,y1_test=train_test_split(x1,y1,test_size=0.2)
print(x1_train.shape)
print(x1_test.shape)
print(y1_train.shape)
print(y1_test.shape)

(253, 5)
(64, 5)
(253,)
(64,)


# **creating confussion matrix of class report**

In [63]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [64]:
def eval_model(y1_test,y1pred):
  cm=confusion_matrix(y1_test,y1pred)
  print(cm)
  print('accuracy score',accuracy_score(y1_test,y1pred))
  print(classification_report(y1_test,y1pred))
def mscore(model):
  print('training score',model.score(x1_train,y1_train))
  print('testing score',model.score(x1_test,y1_test))

# **modeling**

In [65]:
from sklearn.linear_model import LinearRegression

In [66]:
m1=LinearRegression()
m1.fit(x1_train,y1_train)

LinearRegression()

# **y=mx+c**

In [67]:
m=m1.coef_
c=m1.intercept_

In [68]:
mscore(m1)

training score 0.9985117824284477
testing score 0.9985142000793412


# **Prediction on Training and Testing Data**

In [69]:
y_pred_test =  m1.predict(x1_test)
y_pred_train =  m1.predict(x1_train)

# **Error Metrics Computation**

In [70]:
import math
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error


#Training Accuracies
rmse = math.sqrt(mean_squared_error(y1_train, y_pred_train)) 
print('Root mean square error', rmse) 
mse = (mean_squared_error(y1_train, y_pred_train)) 
print('Mean square error', mse) 
mae=mean_absolute_error(y1_train, y_pred_train)
print('Mean absolute error', mae)

Root mean square error 27.353607739959198
Mean square error 748.2198563915557
Mean absolute error 16.088556500946737


In [71]:
#Training Accuracies
rmse = math.sqrt(mean_squared_error(y1_test, y_pred_test)) 
print('Root mean square error', rmse) 
mse = (mean_squared_error(y1_test, y_pred_test)) 
print('Mean square error', mse) 
mae=mean_absolute_error(y1_test, y_pred_test)
print('Mean absolute error', mae)

Root mean square error 28.407964164149135
Mean square error 807.0124279515815
Mean absolute error 15.090118660717334


# **model saving**

In [72]:
import pickle
# Save the model
filename = 'model.pkl'
pickle.dump(m1, open(filename, 'wb'))

# **Deployment**

In [73]:
list_of_columns = stock.columns
input_data=pd.DataFrame(columns=list_of_columns)
input_data.drop(['High'], axis='columns', inplace=True)



input_data.at[0, 'Low'] = input('enter Low value')
input_data.at[0, 'Open'] = input('enter Open value')
input_data.at[0, 'Close'] = input('enter Close value')
input_data.at[0, 'Aclose'] = input('enter Adj Close value')
input_data.at[0, 'Volume'] = input('enter Volume')

enter Low value13.34
enter Open value14.97
enter Close value13.89
enter Adj Close value14.98
enter Volume514367873


# **Testing the model**

In [92]:
def gen_model(model,x1_test,x1_train,y1_test,y1_train):
  model.fit(x1_train,y1_train)
  print('training score',model.score(x1_train,y1_train))
  print('testing score',model.score(x1_test,y1_test))
  ypred=model.predict(x1_test,y1_test)
  cm=confusion_matrix(y1_test,y_pred_test)
  print(cm)
  print(classification_report(y1_test,y_pred_test))

In [95]:
print(y_pred_test.shape)

(64,)


In [93]:
x1_test.columns

Index(['Open', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [94]:
submission_test=pd.DataFrame({'Test_Open':x1_test['Open'],'Test_Close':x1_test['Close'],'Test_Volume':x1_test['Volume'],'Test_Low':x1_test['Low'],'pred_High':y_pred_test})
submission_test.head(10)

Unnamed: 0,Test_Open,Test_Close,Test_Volume,Test_Low,pred_High
128,276.132538,290.186829,291030689,265.75589,307.732712
239,477.47403,502.536469,133941578,452.13916,524.634671
39,20.253653,20.988602,644725399,17.955969,22.093747
197,350.354004,365.460785,134825630,333.538391,384.141293
210,426.952911,431.930725,153575175,415.560913,453.572297
305,2166.0,2110.649902,232391973,2081.0,2224.720195
265,954.205078,945.586792,146310388,862.821289,1012.288466
135,335.569153,386.597931,189606796,323.248413,402.22131
90,50.44846,55.376488,874227491,50.015224,58.505375
289,1392.104248,1316.17395,185522260,1312.558228,1404.647488


# **Thank you**