##First

1. Reading the data from CSV file
2. Training the model
3. Saving the model into a file


In [5]:
import pandas as pd
import numpy as np
import os
print(os.getcwd())
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, InputLayer
import pickle

# loading the data from CSV file
file_path = 'GOOG.csv'
df = pd.read_csv(file_path)

# handling missing values
df = df.dropna()

# Select features (X) and target variable (y)
features = ['Open', 'High', 'Low', 'Close', 'Volume']


target = 'Close' #The target variable (y) is the Close price, which the model aims to predict. 

X = df[features]
y = df[target] 

# normalizing the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets, will use 20%, 80% split
X_train, X_test, y_train, y_test, date_train, date_test = train_test_split(X_scaled, y, df['Date'], test_size=0.2, random_state=42)

# building the LSTM model
model = Sequential()
model.add(InputLayer(shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50, activation='relu'))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

# reshaping the data for LSTM input
X_train_lstm = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# number of epochs
number_epochs = 100

# training the model
model.fit(X_train_lstm, y_train, epochs=number_epochs, batch_size=32, verbose=2)

# saving the model in native Keras format
model.save(f'stock_price_prediction_model_{number_epochs}_epochs.keras')

# Save the scaler
with open(f'scaler_{number_epochs}_epochs.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print("Scaler saved successfully as scaler.pkl.")

print(f'\n\nModel saved successfully with {number_epochs} epochs!')

e:\Stock-Price-Prediction-Using-AI-and-Machine-Learning-main


ModuleNotFoundError: No module named 'tensorflow'

Second

1. Loading previously saved model
2. Loading data again
3. Predicting prices


In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model

# loading the data for prediction from CSV file
file_path = 'GOOG.csv'
df_new = pd.read_csv(file_path)

# handling missing values
df_new = df_new.dropna()

# selecting features (X) for prediction
X_new = df_new[features]

# normalizing the features using the same scaler from training
X_scaled_new = scaler.transform(X_new)

# reshaping the data for LSTM input
X_new_lstm = X_scaled_new.reshape((X_scaled_new.shape[0], X_scaled_new.shape[1], 1))

# setting epochs
number_epochs = 100

# loading the saved model with number_epochs
loaded_model = load_model(f'stock_price_prediction_model_{number_epochs}_epochs.keras')

# making predictions
predictions = loaded_model.predict(X_new_lstm)

# creating a DataFrame with the predictions and corresponding dates
result_df = pd.DataFrame({
    'Date': df_new['Date'],
    'Actual': df_new['Close'],
    'Predicted': predictions.flatten()
})

# print the Date, Actual Price, and Predicted Price
for date, actual, predicted in zip(result_df['Date'], result_df['Actual'], result_df['Predicted']):
   print(f'Date: {date} | Actual Closing Price: {actual:.2f} | Predicted Closing Price: {predicted:.2f}')

# calculating the Mean Squared Error (MSE)
mse = ((result_df['Actual'] - result_df['Predicted']) ** 2).mean()

# calculating the Mean Absolute Error (MAE)
mae = abs(result_df['Actual'] - result_df['Predicted']).mean()

# printing the metrics
print(f'Model Trained with: {number_epochs} epoch(s)')
print(f'Mean Squared Error (MSE): {mse:.4f}')
print(f'Mean Absolute Error (MAE): {mae:.4f}')

# saving actual price and predicted data into CSV file for later use
result_df.to_csv(f'prediction_results_with_{number_epochs}_epochs.csv', index=False)

[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Date: 2004-08-25 | Actual Closing Price: 2.64 | Predicted Closing Price: 3.93
Date: 2004-08-26 | Actual Closing Price: 2.69 | Predicted Closing Price: 3.92
Date: 2004-08-27 | Actual Closing Price: 2.64 | Predicted Closing Price: 3.95
Date: 2004-08-30 | Actual Closing Price: 2.54 | Predicted Closing Price: 3.89
Date: 2004-08-31 | Actual Closing Price: 2.55 | Predicted Closing Price: 3.85
Date: 2004-09-01 | Actual Closing Price: 2.50 | Predicted Closing Price: 3.88
Date: 2004-09-02 | Actual Closing Price: 2.53 | Predicted Closing Price: 3.88
Date: 2004-09-03 | Actual Closing Price: 2.49 | Predicted Closing Price: 3.83
Date: 2004-09-07 | Actual Closing Price: 2.53 | Predicted Closing Price: 3.84
Date: 2004-09-08 | Actual Closing Price: 2.55 | Predicted Closing Price: 3.83
Date: 2004-09-09 | Actual Closing Price: 2.55 | Predicted Closing Price: 3.84
Date: 2004-09-10 | Actual Closing Price: 2.62 | Predicted Closing P

##  Third

1. Getting predicted result from previously saved CSV file
2. Plotting the data into bokeh plotting (bokeh plotting giving better look than matlab)


In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource

# setting epochs
number_epochs = 100

# loading the CSV file based on number of epochs into a DataFrame
df = pd.read_csv(f'prediction_results_with_{number_epochs}_epochs.csv')

# Create a Bokeh figure
p = figure(x_axis_label='Date', y_axis_label='Price', title='Actual vs Predicted Prices', x_range=df['Date'].astype(str).tolist(), height=500, width=900)

# Create a ColumnDataSource
source = ColumnDataSource(df)

# Plot actual prices as bars
p.vbar(x='Date', top='Actual', width=0.5, source=source, legend_label='Actual Price', color='green')

# Plot predicted prices as a line
p.line(x='Date', y='Predicted', source=source, legend_label='Predicted Price', line_width=2, line_color='blue')

# Add hover tool
hover = HoverTool()
hover.tooltips = [('Date', '@Date{%F}'), ('Actual Price', '@Actual'), ('Predicted Price', '@Predicted')]
hover.formatters = {'@Date': 'datetime'}
p.add_tools(hover)

# Customize the plot layout
p.legend.location = 'top_left'
p.legend.click_policy = 'hide'

# Output to notebook
output_notebook()

# Display the plot in the notebook
show(p, notebook_handle=True)

## Fourth
1. Loading historical data again
2. Loading saved model
3. Based on last available data recursively predicting closing price until I can predict the closing price for selected date


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model
import pickle

def generate_data_for_date(date, scaler, historical_data, num_days=10):
    # Create a DataFrame with zeros and the specified date
    new_data = pd.DataFrame(index=[0], columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Date'])
    new_data['Date'] = date

    try:
        # Find the rows corresponding to the specified date in the historical data
        selected_data = historical_data[historical_data['Date'] == pd.to_datetime(date)]
        
        if not selected_data.empty:
            # Use the selected data as input features
            selected_data = selected_data.iloc[0]
            new_data[['Open', 'High', 'Low', 'Close', 'Volume']] = selected_data[['Open', 'High', 'Low', 'Close', 'Volume']].values
        else:
            raise ValueError("No data found for the specified date.")
    except ValueError as e:
        # error occurred becasue no date found in the csv for the date
        # selecting the last num_days rows before the specified date
        selected_data = historical_data.iloc[-num_days:]

        # calculating the average of the selected data
        average_data = selected_data.mean()

        # using the calculated average as input features
        new_data[['Open', 'High', 'Low', 'Close', 'Volume']] = average_data[['Open', 'High', 'Low', 'Close', 'Volume']].values

    # scaling the features using the pre-trained scaler
    scaled_features = scaler.transform(new_data[['Open', 'High', 'Low', 'Close', 'Volume']])

    # assigning scaled features back to new_data
    new_data[['Open', 'High', 'Low', 'Close', 'Volume']] = scaled_features

    return new_data

# number of epochs
number_epochs = 100

# loading the pre-trained model
model = load_model(f'stock_price_prediction_model_5000_epochs.keras')

# loading the pre-trained scaler
with open(f'scaler_{number_epochs}_epochs.pkl', 'rb') as f:
    scaler = pickle.load(f)

# loading historical data
historical_data = pd.read_csv('GOOG.csv')
historical_data['Date'] = pd.to_datetime(historical_data['Date'])

# ensuring data is sorted by date
historical_data = historical_data.sort_values(by='Date')

# specifying the date for which I want to generate predictions
specific_date = '2024-02-10'

# generating data for the specific date using the average of the last 10 days
input_data = generate_data_for_date(specific_date, scaler, historical_data, num_days=10)

# printing the generated input data
# print(f"Input Data for {specific_date}:")
# print(input_data)

# reshaping the input data for LSTM input and cast to float32
input_data_lstm = input_data[['Open', 'High', 'Low', 'Close', 'Volume']].values.reshape((1, 5, 1)).astype('float32')

# making predictions using the pre-trained model
predicted_close = model.predict(input_data_lstm)[0][0]

# printing the predicted close price
print(f'Predicted Close Price for {specific_date}: {predicted_close}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
Predicted Close Price for 2024-02-10: 145.25819396972656
