In [None]:
# Import necessary libraries for data processing, modeling, and visualization
import pandas as pd  # Data manipulation and analysis
import numpy as np  # Numerical operations and array manipulation
import plotly.express as plx  # Interactive data visualization
from imblearn.over_sampling import SMOTE  # Handling imbalanced datasets through oversampling
from sklearn.preprocessing import OneHotEncoder  # Encoding categorical features
from sklearn.preprocessing import MinMaxScaler  # Scaling features to a specific range
from tensorflow.keras.models import Sequential  # Building a sequential model in Keras
from tensorflow.keras.layers import GRU, Dense  # GRU (Gated Recurrent Unit) and Dense layers for neural networks
from tensorflow.keras.metrics import MeanAbsolutePercentageError  # MAPE metric for model evaluation
import tensorflow as tf  # Deep learning and neural network tasks
import matplotlib.pyplot as plt  # Data visualization and plotting
from google.colab import drive  # Mounting Google Drive in Colab to access files

# Mount Google Drive to access data files
drive.mount('/content/drive')

# Load the stock prices dataset from Google Drive
data = pd.read_csv("/content/drive/MyDrive/praveena stock/prices.csv")
print(data.shape)  # Display the shape of the dataset to confirm successful loading
data.head()  # Display the first few rows of the dataset to inspect the data structure

# Analyze the distribution of stock symbols in the dataset
unique_symbols = data['symbol'].value_counts()
print(unique_symbols)  # Print the count of each unique stock symbol

# Display basic information about the dataset including data types and missing values
data.info()

# Filter the dataset for Google stocks using the stock symbol 'GOOG'
google = data[data['symbol'] == 'GOOG']
google.head()  # Display the first few rows of Google stock data for inspection
print(google.shape)  # Display the shape of the filtered Google stock data

# Data Visualization: Plot the difference between open and close prices for Google stocks
plx.line(google, x="date", y=["open", "close"], title="Difference between open and close prices of Google stocks")

# Data Visualization: Plot the difference between high and low prices for Google stocks
plx.line(google, x="date", y=["high", "low"], title="Difference between high and low prices of Google stocks")

# Data Visualization: Plot the volume of Google stocks traded over time
plx.line(google, x="date", y=["volume"], title="Volume of stock traded")

# Repeat the same visualizations for Facebook stocks
facebook = data[data['symbol'] == 'FB']  # Filter the dataset for Facebook stocks
plx.line(facebook, x="date", y=["open", "close"], title="Difference between open and close prices of FB stocks")
plx.line(facebook, x="date", y=["high", "low"], title="Difference between high and low prices of Facebook stocks")
plx.line(facebook, x="date", y=["volume"], title="Volume of stock traded")

# Data Preprocessing: Handle imbalanced data using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_fb_resampled, y_fb_resampled = smote.fit_resample(X_fb, y_fb)

# Convert the resampled data into a DataFrame
upsampled_fb = pd.DataFrame(X_fb_resampled, columns=X_fb.columns)
upsampled_fb['symbol'] = y_fb_resampled  # Add the 'symbol' column back to the resampled data
upsampled_fb.reset_index(drop=True, inplace=True)  # Reset the index of the DataFrame

# Randomly assign dates from the original Facebook data to the resampled data
upsampled_fb['date'] = np.random.choice(facebook['date'], size=len(upsampled_fb), replace=True)
print(upsampled_fb['symbol'].value_counts())  # Print the distribution of symbols after resampling
print(upsampled_fb.shape)  # Print the shape of the upsampled data

# Prepare Google stock data for time series modeling
google = upsampled_fb[upsampled_fb["symbol"] == 'GOOG']
google = google.sort_values(by='date')  # Sort the data by date
google.reset_index(drop=True, inplace=True)  # Reset the index of the sorted DataFrame

# Normalize the 'close' prices using MinMaxScaler
close_prices = google['close'].values.reshape(-1, 1)  # Reshape for scaler input
scaler = MinMaxScaler(feature_range=(0, 1))  # Initialize scaler with range 0-1
close_prices_normalized = scaler.fit_transform(close_prices)  # Normalize the close prices

# Define a function to create the dataset for time series forecasting
def create_dataset(data, win_size):
    X, Y = [], []
    for i in range(len(data) - win_size - 1):
        X.append(data[i:(i + win_size), 0])  # Extract the features for the window
        Y.append(data[i + win_size, 0])  # Extract the target variable
    return np.array(X), np.array(Y)  # Return the feature-target pair arrays

# Set the window size for the time series
win_size = 20
X, Y = create_dataset(close_prices_normalized, win_size)  # Create the dataset
X = X.reshape(X.shape[0], 1, X.shape[1])  # Reshape X for GRU input

# Split the data into training and testing sets (80-20 split)
train_size = int(len(X) * 0.8)
train_X, test_X = X[0:train_size], X[train_size:]  # Training features and test features
train_Y, test_Y = Y[0:train_size], Y[train_size:]  # Training targets and test targets
print(train_Y)  # Print the training target values to inspect

# Save the test data for future use
import pickle
with open('google.pkl', 'wb') as file:
    pickle.dump(test_X, file)  # Save the test features to a file

# Build the GRU model for predicting stock prices
model = Sequential()
model.add(GRU(units=50, return_sequences=True, input_shape=(1, win_size)))  # First GRU layer with return sequences
model.add(GRU(units=50, return_sequences=True))  # Second GRU layer with return sequences
model.add(GRU(units=50))  # Third GRU layer without return sequences
model.add(Dense(units=1))  # Dense layer for the output

# Define custom metrics for model evaluation
def rmae(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.abs(y_pred - y_true)))  # Root Mean Absolute Error

def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))  # Root Mean Squared Error

# Compile the model with the Adam optimizer and mean squared error loss function
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[rmse, rmae])

# Display the model summary to check its architecture
model.summary()

# Train the model on the training data with 50 epochs and a batch size of 32
trainmodel = model.fit(train_X, train_Y, epochs=50, batch_size=32, validation_data=(test_X, test_Y))

# Save the trained model to a file
model.save("google_model.h5")

# Define a function to predict the next few days of stock prices using the trained model
def predict_next_days(model, X_test_scaled, scaler, num_days):
    predicted = []
    input_sequence = X_test_scaled[-1].reshape(1, 1, -1)  # Initialize the input sequence with the last test example
    for _ in range(num_days):
        next_day_pred = model.predict(input_sequence)  # Predict the next day's price
        predicted.append(next_day_pred[0, 0])  # Store the prediction
        input_sequence = np.append(input_sequence[:, :, 1:], next_day_pred.reshape(1, 1, 1), axis=2)  # Update the input sequence
    pred_price = scaler.inverse_transform(np.array(predicted).reshape(-1, 1))  # Scale back the predictions
    return pred_price.flatten()  # Return the predictions as a flattened array

# Set the number of days to predict
num_days_to_predict = 3
pred_price = predict_next_days(model, test_X, scaler, num_days_to_predict)  # Predict the next 3 days of stock prices
for i in range(num_days_to_predict):
    print(f"Predicted close price for day {i + 1}: ${pred_price[i]:.2f}")  # Print the predicted prices

# Repeat the above steps for Facebook stock data
facebook = upsampled_fb[upsampled_fb['symbol'] == 'FB']  # Filter the dataset for Facebook stocks
facebook = facebook.sort_values(by='date')  # Sort the data by date
facebook.reset_index(drop=True, inplace=True)  # Reset the index of the sorted DataFrame
close_prices = facebook['close'].values.reshape(-1, 1)  # Reshape the close prices
scaler = MinMaxScaler(feature_range=(0, 1))  # Initialize the scaler
normalized_close_prices = scaler.fit_transform(close_prices)  # Normalize the close prices

# Create dataset for Facebook stock
def make_dataset(data, win_size):
    X, Y = [], []
    for i in range(len(data) - win_size):
        X.append(data[i:i + win_size, 0])  # Extract features for the window
        Y.append(data[i + win_size, 0])  # Extract the target variable
    return np.array(X), np.array(Y)  # Return the feature-target pair arrays

# Set window size and create dataset
win_size = 20
X, Y = make_dataset(normalized_close_prices, win_size)  # Create the dataset
X = X.reshape(X.shape[0], 1, X.shape[1])  # Reshape X for GRU input

# Split the data into training and testing sets (80-20 split)
train_size = int(len(X) * 0.80)
test_size = len(X) - train_size
train_X, test_X = X[:train_size], X[train_size:]  # Training features and test features
train_Y, test_Y = Y[:train_size], Y[train_size:]  # Training targets and test targets

# Build and compile the model for Facebook stock prediction
model = Sequential()
model.add(GRU(units=50, return_sequences=True, input_shape=(1, win_size)))  # First GRU layer with return sequences
model.add(GRU(units=50, return_sequences=True))  # Second GRU layer with return sequences
model.add(GRU(units=50))  # Third GRU layer without return sequences
model.add(Dense(units=1))  # Dense layer for the output

# Compile the model with the Adam optimizer and mean squared error loss function
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[rmse, rmae])

# Train the model on the Facebook data
trainmodel = model.fit(train_X, train_Y, epochs=50, batch_size=32, validation_data=(test_X, test_Y))

# Save the trained model to a file
model.save("fb_model.h5")

# Predict the next 3 days of stock prices for Facebook
num_days_to_predict = 3
pred_price = predict_next_days(model, test_X, scaler, num_days_to_predict)
for i in range(num_days_to_predict):
    print(f"Predicted close price for day {i + 1}: ${pred_price[i]:.2f}")

# Save the test data for future use
with open('facebook_X_test.pkl', 'wb') as file:
    pickle.dump(test_X, file)  # Save the test features to a file

# Visualization: Compare evaluation metrics between Google and Facebook models
Google = {
    'RMSE': 0.1954,
    'RMAE': 0.3946,
    'LOSS': 0.0396
}
Facebook = {
    'RMSE': 0.1909,
    'RMAE': 0.3804,
    'LOSS': 0.0401
}

# Prepare metrics and values for plotting
metrics = list(Google.keys())
google_values = list(Google.values())
facebook_values = list(Facebook.values())

# Plot the evaluation metrics for Google and Facebook models side by side
plt.figure(figsize=(12, 6))

# Google model metrics
plt.subplot(1, 2, 1)
bars = plt.bar(metrics, google_values, color='b', alpha=0.7)
plt.xlabel('Metrics')
plt.ylabel('Metric Values')
plt.title('Evaluation Metrics for Google')
for bar, value in zip(bars, google_values):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f'{value:.4f}', ha='center', va='bottom')

# Facebook model metrics
plt.subplot(1, 2, 2)
bars = plt.bar(metrics, facebook_values, color='r', alpha=0.7)
plt.xlabel('Metrics')
plt.ylabel('Metric Values')
plt.title('Evaluation Metrics for Facebook')
for bar, value in zip(bars, facebook_values):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f'{value:.4f}', ha='center', va='bottom')

plt.tight_layout()  # Adjust layout to prevent overlap
plt.show()  # Display the plots
