<a href="https://colab.research.google.com/github/phoneix116/Stock-Prediction-model/blob/main/STOCK_PREDICTION_MODEL_USING_TENSORFLOW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**SHORT TERM STOCK PRICE PREDICTION MODEL USING A NEURAL NETWORK**

In [594]:
import numpy as np
!pip install keras_tuner
import matplotlib.pyplot as plt
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px

import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)



In [595]:
data_1 = pd.read_csv('/content/NSE-Tata-Global-Beverages-Limited.csv')   #Loading
data_1.head()

Unnamed: 0,Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
0,2018-10-08,208.0,222.25,206.85,216.0,215.15,4642146.0,10062.83
1,2018-10-05,217.0,218.6,205.9,210.25,209.2,3519515.0,7407.06
2,2018-10-04,223.5,227.8,216.15,217.25,218.2,1728786.0,3815.79
3,2018-10-03,230.0,237.5,225.75,226.45,227.6,1708590.0,3960.27
4,2018-10-01,234.55,234.6,221.05,230.3,230.9,1534749.0,3486.05


**HIGH STOCK PRICE OVER TIME GRAPH**

In [596]:
fig = px.scatter(data_1, x='Date', y='High', title='High Stock Price Over Time')
fig.show() #plotting the data in a interactive graph using plotly

# Creating a scatter plot for Low stock price over time
fig_low = px.scatter(data_1, x='Date', y='Low', title='Low Stock Price Over Time')
fig_low.show()  # Displaying the graph for Low stock price

In [597]:
print(data_1.isnull().sum())  # Check for missing values

data_1 = data_1.dropna()  # Drop or fill missing values if any
data_1 = data_1[:-1]  # Drop last row


Date                    0
Open                    0
High                    0
Low                     0
Last                    0
Close                   0
Total Trade Quantity    0
Turnover (Lacs)         0
dtype: int64


In [598]:
data_1['Date'] = pd.to_datetime(data_1['Date'])
data_1 = data_1.sort_values(by='Date')  # Ensure data is in chronological order(SORTING THE DATASET BASED ON DATE)
data_1.head()


Unnamed: 0,Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
1233,2013-10-09,155.7,158.2,154.15,155.3,155.55,2049580.0,3204.49
1232,2013-10-10,156.0,160.8,155.85,160.3,160.15,3124853.0,4978.8
1231,2013-10-11,161.15,163.45,159.0,159.8,160.05,1880046.0,3030.76
1230,2013-10-14,160.85,161.45,157.7,159.3,159.45,1281419.0,2039.09
1229,2013-10-15,160.0,160.2,155.35,157.1,158.05,1145582.0,1805.49


**DEFINE TARGET**

In [599]:
# Define target: Next day's Close price
data_1['Target'] = data_1['Close'].shift(-1)


print("\n\nbefore dropping NaN values\n\n")

print(data_1.tail())  # Shift Close price by -1 day
print("\n\nafter dropping NaN values\n\n")
data_1.dropna(inplace=True)
print(data_1.tail())
#Why df['Target'] = df['Close'].shift(-1)?
#This line shifts the "Close" price by one day forward, meaning that the target value (Target) for each row is the next day's closing price



before dropping NaN values


        Date    Open    High     Low    Last   Close  Total Trade Quantity  \
4 2018-10-01  234.55  234.60  221.05  230.30  230.90             1534749.0   
3 2018-10-03  230.00  237.50  225.75  226.45  227.60             1708590.0   
2 2018-10-04  223.50  227.80  216.15  217.25  218.20             1728786.0   
1 2018-10-05  217.00  218.60  205.90  210.25  209.20             3519515.0   
0 2018-10-08  208.00  222.25  206.85  216.00  215.15             4642146.0   

   Turnover (Lacs)  Target  
4          3486.05  227.60  
3          3960.27  218.20  
2          3815.79  209.20  
1          7407.06  215.15  
0         10062.83     NaN  


after dropping NaN values


        Date    Open    High     Low    Last   Close  Total Trade Quantity  \
5 2018-09-28  234.05  235.95  230.20  233.50  233.75             3069914.0   
4 2018-10-01  234.55  234.60  221.05  230.30  230.90             1534749.0   
3 2018-10-03  230.00  237.50  225.75  226.45  227.60          

**SCALING**

In [600]:
# Features to use for prediction
features = ['Open', 'High', 'Low', 'Close']
X = data_1[features].values  # Independent variables
y = data_1['Target'].values   # Next day's Close price

print("\n\nBefore scaling values\n\n")

print("Features: \n",X,"\n\n")

print("Target values: \n",y,"\n\n")
# Scale features
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
y_scaler = MinMaxScaler(feature_range=(0, 1))
y = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()  # Normalize y

print("\n\nAfter scaling values\n\n")

print("Features: \n",X,"\n\n")

print("Target values: \n",y,"\n\n")





Before scaling values


Features: 
 [[155.7  158.2  154.15 155.55]
 [156.   160.8  155.85 160.15]
 [161.15 163.45 159.   160.05]
 ...
 [230.   237.5  225.75 227.6 ]
 [223.5  227.8  216.15 218.2 ]
 [217.   218.6  205.9  209.2 ]] 


Target values: 
 [160.15 160.05 159.45 ... 218.2  209.2  215.15] 




After scaling values


Features: 
 [[0.23453494 0.23912559 0.24430408 0.2371134 ]
 [0.23587005 0.25072496 0.25197383 0.25773196]
 [0.2587895  0.2625474  0.26618543 0.25728373]
 ...
 [0.56519804 0.59290654 0.56733589 0.56006275]
 [0.53627058 0.54963194 0.52402436 0.51792918]
 [0.50734312 0.508588   0.47778028 0.47758853]] 


Target values: 
 [0.25773196 0.25728373 0.25459435 ... 0.51792918 0.47758853 0.50425818] 




**SPLIT DATA INTO TRAIN SETS AND TEST SETS**

In [601]:
# Split data into training (80%), validation (10%), and test (10%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, shuffle=False)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False)



**DEFINING A MODEL CHOOSING THE BEST HYPERPARAMETER'S USING KERAS TUNER**

In [602]:
# Function to build the model with tunable hyperparameters
def build_model(hp):
    model = keras.Sequential()

    # Input layer
    model.add(keras.layers.Input(shape=(X_train.shape[1],)))

    # Tune number of layers and units per layer
    for i in range(hp.Int('num_layers',2,4)):  # Vary between 1 to 4 layers
        model.add(keras.layers.Dense(
            units=hp.Int(f'units_{i}', min_value=16, max_value=256, step=16),
            activation='relu',kernel_regularizer=regularizers.l2(0.001)#to comabat high variance (overfitting)
        ))

     # Output layer
    model.add(keras.layers.Dense(1, activation='linear'))

    # Tune learning rate
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[0.001, 0.0005, 0.0001])  # Try different LRs
        ),
        loss='mse',
        metrics=['mae']
    )

    return model

# Initialize tuner
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='test_4',
    project_name='price_prediction'
)

# Search for best hyperparameters
tuner.search(X_train, y_train,
             validation_data=(X_val, y_val),
             epochs=50,
             batch_size=32,
             callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)])

# Get the best model
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.get_best_models(num_models=1)[0]

# Print the best hyperparameters
print(f"Best number of layers: {best_hps.get('num_layers')}")
for i in range(best_hps.get('num_layers')):
    print(f"  - Units in layer {i}: {best_hps.get(f'units_{i}')}")
print(f"Best learning rate: {best_hps.get('learning_rate')}")

# Train the best model
best_model.fit(X_train, y_train,
               validation_data=(X_val, y_val),
               epochs=50,
               batch_size=32,
               verbose=2)

Trial 90 Complete [00h 00m 16s]
val_loss: 0.02052915282547474

Best val_loss So Far: 0.0035034341271966696
Total elapsed time: 00h 19m 23s
Best number of layers: 2
  - Units in layer 0: 256
  - Units in layer 1: 112
Best learning rate: 0.001
Epoch 1/50
31/31 - 2s - 67ms/step - loss: 0.0012 - mae: 0.0127 - val_loss: 0.0077 - val_mae: 0.0754
Epoch 2/50
31/31 - 0s - 9ms/step - loss: 0.0011 - mae: 0.0107 - val_loss: 0.0073 - val_mae: 0.0736
Epoch 3/50
31/31 - 0s - 9ms/step - loss: 0.0010 - mae: 0.0106 - val_loss: 0.0057 - val_mae: 0.0621
Epoch 4/50
31/31 - 0s - 10ms/step - loss: 9.8698e-04 - mae: 0.0114 - val_loss: 0.0081 - val_mae: 0.0784
Epoch 5/50
31/31 - 0s - 9ms/step - loss: 9.9927e-04 - mae: 0.0125 - val_loss: 0.0068 - val_mae: 0.0698
Epoch 6/50
31/31 - 0s - 8ms/step - loss: 9.4013e-04 - mae: 0.0116 - val_loss: 0.0072 - val_mae: 0.0729
Epoch 7/50
31/31 - 0s - 8ms/step - loss: 9.0211e-04 - mae: 0.0108 - val_loss: 0.0096 - val_mae: 0.0869
Epoch 8/50
31/31 - 0s - 9ms/step - loss: 8.7783

<keras.src.callbacks.history.History at 0x7fbf9060fc50>

**EVALUATION -->  PREDICTION OF NEXT DAY'S CLOSEING RANGE**

In [605]:
print(X_test.shape,"\n\nFirst five values of X: \n")


print(X_test[:5],"\n\n")


results = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {results[0]:.4f}")
print(f"Test MAE: {results[1]:.4f}")


(124, 4) 

First five values of X: 

[[0.76546506 0.78987285 0.78050981 0.78507396]
 [0.78860703 0.78563462 0.78547259 0.77476468]
 [0.77036048 0.76622797 0.77374239 0.76736889]
 [0.76056965 0.77269685 0.77441913 0.76736889]
 [0.76101469 0.77068927 0.77487029 0.77476468]] 


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0013 - mae: 0.0243
Test Loss (MSE): 0.0012
Test MAE: 0.0245


**Model Predictions vs  Data**

In [606]:


#Extract original unscaled data
original_dates = data_1['Date'].values
original_prices = data_1['Close'].values  # Original closing prices

#Get model predictions (Inverse transform for proper comparison)
y_test_unscaled = y_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# Ensure test set dates match predictions
test_dates = data_1.iloc[-len(y_test):]['Date'].values

# Scaled Training Data for reference
train_dates = data_1.iloc[:len(y_train)]['Date'].values
train_prices = y_scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()  # Inverse scaling

# Create interactive plot
fig = go.Figure()

#Original Data (Unscaled)
fig.add_trace(go.Scatter(x=original_dates, y=original_prices,
                         mode='lines', name='Original Data (Unscaled)',
                         line=dict(color='black')))


#Actual Test Data (After Scaling & Inverse Transform)
fig.add_trace(go.Scatter(x=test_dates, y=y_test_unscaled,
                         mode='lines', name='Actual Test Closing Price',
                         line=dict(color='blue')))

# Scaled Training Data (Inverse Transformed)
fig.add_trace(go.Scatter(x=train_dates, y=train_prices,
                         mode='lines', name='Training Data (Scaled & Inversed)',
                         line=dict(color='orange')))



y_pred = model.predict(X_test)
y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

#Predicted Data (After Scaling & Inverse Transform)
fig.add_trace(go.Scatter(x=test_dates, y=y_pred,
                         mode='lines', name='Predicted Closing Price',
                         line=dict(color='red', dash='dot')))
# Customize layout
fig.update_layout(title="Original Data vs. Model Predictions vs. Scaled Training Data",
                  xaxis_title="Date",
                  yaxis_title="Closing Price",
                  legend_title="Legend",
                  hovermode="x")

# Show plot
fig.show()


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
