In [None]:
import numpy as np
import pandas as pd

import os
import matplotlib.pyplot as plt
import pandas_datareader as web
import datetime as dt

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
# Load csv
# data = pd.read_csv('RELIANCE.csv')
data = pd.read_csv('TCS.csv')
# data = pd.read_csv('ITC.csv')
data.head()

In [None]:
data.shape

In [None]:
data.dtypes

In [None]:
# data['Date']=pd.to_datetime(data['Date'],infer_datetime_format=True)
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
# data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

In [None]:
data.dtypes

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
plt.plot(data['Date'],data['Close'])

In [None]:
data.isnull().sum()

In [None]:
# Create a boolean mask of missing values
missing_mask = data.isnull().any(axis=1)

# Get the index labels of rows with missing values
rows_with_missing = data[missing_mask].index

rows_with_missing

In [None]:
data.dropna(axis=0,inplace = True)

In [None]:
data.isnull().sum()

In [None]:
data.shape

In [None]:
train_size = data.shape[0] - 15
train_data = data.iloc[0:train_size,:]

In [None]:
train_data.shape

In [None]:
train_data = train_data[['Date','Close']]
train_data.head()

In [None]:
# data['year'] = data['Date'].dt.month_name() 
# data['month'] = data['Date'].dt.year 
# data['Day'] = data['Date'].dt.day
# data.head()

In [None]:
plt.plot(train_data['Date'],train_data['Close'])

In [None]:
# Normalize data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(train_data['Close'].values.reshape(-1,1))

In [None]:
scaled_data

In [None]:
scaled_data[0:3,0]

In [None]:
# Set the number of days used for prediction
prediction_days = 60

# Initialize empty lists for training data input and output
x_train = []
y_train = []

# Iterate through the scaled data, starting from the prediction_days index
for x in range(prediction_days, len(scaled_data)):
    # Append the previous 'prediction_days' values to x_train
    x_train.append(scaled_data[x - prediction_days:x, 0])
    # Append the current value to y_train
    y_train.append(scaled_data[x, 0])


In [None]:
# Convert the x_train and y_train lists to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
x_train[0]

In [None]:
x_train.shape

In [None]:
# Reshape x_train to a 3D array with the appropriate dimensions for the LSTM model
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [None]:
x_train.shape

In [None]:
def LSTM_model():
    """
    Create and configure an LSTM model for stock price prediction.

    :return: The configured LSTM model (keras.Sequential)
    """

    # Initialize a sequential model
    model = Sequential()

    # Add the first LSTM layer with 50 units, input shape, and return sequences
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
    # Add dropout to prevent overfitting
    model.add(Dropout(0.2))

    # Add a second LSTM layer with 50 units and return sequences
    model.add(LSTM(units=50, return_sequences=True))
    # Add dropout to prevent overfitting
    model.add(Dropout(0.2))

    # Add a third LSTM layer with 50 units
    model.add(LSTM(units=50))
    # Add dropout to prevent overfitting
    model.add(Dropout(0.2))

    # Add a dense output layer with one unit
    model.add(Dense(units=1))

    return model

In [None]:
model = LSTM_model()
model.summary()
model.compile(
    optimizer='adam', 
    loss='mean_squared_error'
)

In [None]:
# Define callbacks

# Save weights only for best model
checkpointer = ModelCheckpoint(
    filepath = 'weights_best.hdf5', 
    verbose = 2, 
    save_best_only = True
)

model.fit(
    x_train, 
    y_train, 
    epochs=25, 
    batch_size = 32,
    callbacks = [checkpointer]
)

In [None]:
# x_test = train_data['Close'][-60:]
# print(x_test.shape,'Series-Type')
# x_test = x_test.values.reshape(1,60)
# print(x_test.shape)
# x_test = scaler.fit_transform(x_test)
# print(x_test.shape)
# x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
# x_test.shape

In [None]:
df = train_data['Close'][-60:]
predicted_prices = []
forcast_days = 15

for i in range(forcast_days):

    x_test = df[-60:]
    x_test = x_test.values.reshape(60,1)
    x_test = scaler.transform(x_test)
    x_test = x_test.reshape(1,60)

# Reshape x_test to a 3D array with the appropriate dimensions for the LSTM model
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# Generate price predictions using the LSTM model
    predicted_price = model.predict(x_test)

# Invert the scaling applied to the predicted prices to obtain actual values
    predicted_price = scaler.inverse_transform(predicted_price)

    predicted_prices.append(predicted_price[0,0])
    df.loc[len(df)] = predicted_price[0,0]

In [None]:
predicted_prices = np.array(predicted_prices)

# test data loading
test_data = data.iloc[train_size:,:]

# Extract the actual closing prices from the test data
actual_prices = test_data['Close'].values

In [None]:
COMPANY = 'TCS'

In [None]:
# Plot the actual prices using a black line
plt.plot(actual_prices, color='black', label=f"Actual {COMPANY} price")

# Plot the predicted prices using a green line
plt.plot(predicted_prices, color='green', label=f"Predicted {COMPANY} price")

# Set the title of the plot using the company name
plt.title(f"{COMPANY} share price")

# Set the x-axis label as 'time'
plt.xlabel("time")

# Set the y-axis label using the company name
plt.ylabel(f"{COMPANY} share price")

# Display a legend to differentiate the actual and predicted prices
plt.legend()

# Show the plot on the screen
plt.show()


In [None]:
# Calculate evaluation metrics (MSE and RMSE)
from sklearn.metrics import mean_squared_error, mean_absolute_error
mse = mean_squared_error(actual_prices, predicted_prices)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual_prices, predicted_prices)

print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")


In [None]:
from sklearn.metrics import r2_score
import numpy as np

r2 = r2_score(actual_prices, predicted_prices)

print("R2 Score:", r2)

In [None]:
actual prices

In [None]:
predicted_prices