1. Importing libraries

In [None]:
import pandas as pd
import numpy as np
import plotly.express as plx
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.metrics import MeanAbsolutePercentageError
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab import drive
import pickle

2. Mount Google Drive and Load Data

In [None]:
drive.mount('/content/drive')

In [None]:
# Load dataset from Google Drive into a pandas DataFrame
data = pd.read_csv("/content/drive/MyDrive/praveena stock/prices.csv")

In [None]:
# Display dataset dimensions
print("Dataset Shape:", data.shape)

In [None]:

# Display the first few rows of the dataset
print("First 5 rows of the dataset:\n", data.head())

In [None]:
# Display unique stock symbols and their counts
unique_symbols = data['symbol'].value_counts()
print("Unique Symbols Count:\n", unique_symbols)

In [None]:
# Display summary information about the dataset
print("Dataset Info:")
data.info()

# 2. Filtering and Visualizing Stock Data

In [None]:
# Filter data for Google stock (symbol: GOOG)
google = data[data['symbol'] == 'GOOG']
print("Google Stock Data:\n", google.head())
print("Google Stock Data Shape:", google.shape)

In [None]:
# Plot Google stock data: open vs. close prices
plx.line(google, x="date", y=["open", "close"], title="Difference between open and close prices of Google stocks")

In [None]:
# Plot Google stock data: high vs. low prices
plx.line(google, x="date", y=["high", "low"], title="Difference between high and low prices of Google stocks")



In [None]:
 #Plot Google stock data: volume traded
plx.line(google, x="date", y=["volume"], title="Volume of stock traded")



In [None]:
# Filter data for Facebook stock (symbol: FB)
facebook = data[data['symbol'] == 'FB']



In [None]:
# Plot Facebook stock data: open vs. close prices
plx.line(facebook, x="date", y=["open", "close"], title="Difference between open and close prices of FB stocks")




In [None]:
# Plot Facebook stock data: high vs. low prices
plx.line(facebook, x="date", y=["high", "low"], title="Difference between high and low prices of Facebook stocks")



In [None]:
# Plot Facebook stock data: volume traded
plx.line(facebook, x="date", y=["volume"], title="Volume of stock traded")



# 3. Handling Imbalanced Data with SMOTE

In [None]:

# Apply SMOTE to handle class imbalance in Facebook data
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_fb_resampled, y_fb_resampled = smote.fit_resample(X_fb, y_fb)



In [None]:
# Create a DataFrame from resampled data
upsampled_fb = pd.DataFrame(X_fb_resampled, columns=X_fb.columns)
upsampled_fb['symbol'] = y_fb_resampled
upsampled_fb.reset_index(drop=True, inplace=True)



In [None]:
# Add random dates to the upsampled data
upsampled_fb['date'] = np.random.choice(facebook['date'], size=len(upsampled_fb), replace=True)

In [None]:
# Display unique symbols and shape of upsampled data
print("Upsampled Symbols Count:\n", upsampled_fb['symbol'].value_counts())
print("Upsampled Data Shape:", upsampled_fb.shape)

4. Preparing Data for Google Stock Prediction

In [None]:
# Filter and sort Google data from upsampled data
google = upsampled_fb[upsampled_fb["symbol"] == 'GOOG']
google = google.sort_values(by='date')
google.reset_index(drop=True, inplace=True)

In [None]:
# Normalize closing prices using MinMaxScaler
close_prices = google['close'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
close_prices_normalized = scaler.fit_transform(close_prices)

5. Creating Datasets for Training and Testing

In [None]:
def create_dataset(data, win_size):
    """
    Create datasets for training a time series model.

    Parameters:
    - data (numpy.ndarray): Normalized close prices.
    - win_size (int): Size of the time window.

    Returns:
    - tuple: (X, Y) where X is the input features and Y is the target values.
    """
    X, Y = [], []
    for i in range(len(data) - win_size - 1):
        X.append(data[i:(i + win_size), 0])
        Y.append(data[i + win_size, 0])
    return np.array(X), np.array(Y)

In [None]:
# Define window size for time series
win_size = 20

In [None]:
# Create training and testing datasets
X, Y = create_dataset(close_prices_normalized, win_size)
X = X.reshape(X.shape[0], 1, X.shape[1])
train_size = int(len(X) * 0.8)
train_X, test_X = X[0:train_size], X[train_size:]
train_Y, test_Y = Y[0:train_size], Y[train_size:]

In [None]:
# Save test data
with open('google.pkl', 'wb') as file:
    pickle.dump(test_X, file)

# 6. Building and Training the Model

In [None]:
# Define and compile the GRU model
model = Sequential()
model.add(GRU(units=50, return_sequences=True, input_shape=(1, win_size)))
model.add(GRU(units=50, return_sequences=True))
model.add(GRU(units=50))
model.add(Dense(units=1))

def rmae(y_true, y_pred):
    """
    Compute the Root Mean Absolute Error.

    Parameters:
    - y_true (Tensor): True values.
    - y_pred (Tensor): Predicted values.

    Returns:
    - Tensor: Computed RMAE.
    """
    return tf.sqrt(tf.reduce_mean(tf.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    """
    Compute the Root Mean Squared Error.

    Parameters:
    - y_true (Tensor): True values.
    - y_pred (Tensor): Predicted values.

    Returns:
    - Tensor: Computed RMSE.
    """
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[rmse, rmae])
model.summary()

In [None]:
# Train the model
trainmodel = model.fit(train_X, train_Y, epochs=50, batch_size=32, validation_data=(test_X, test_Y))

In [None]:
# Save the trained model
model.save("google_model.h5")


# 7. Predicting Future Prices

In [None]:
def predict_next_days(model, X_test_scaled, scaler, num_days):
    """
    Predict the next 'num_days' days of stock prices using the trained model.

    Parameters:
    - model (tf.keras.Model): Trained model for prediction.
    - X_test_scaled (numpy.ndarray): Scaled test data.
    - scaler (MinMaxScaler): Scaler used to normalize data.
    - num_days (int): Number of days to predict.

    Returns:
    - numpy.ndarray: Predicted stock prices for the next 'num_days' days.
    """
    predicted = []
    input_sequence = X_test_scaled[-1].reshape(1, 1, -1)
    for _ in range(num_days):
        next_day_pred = model.predict(input_sequence)
        predicted.append(next_day_pred[0, 0])
        input_sequence = np.append(input_sequence[:, :, 1:], next_day_pred.reshape(1, 1, 1), axis=2)
    pred_price = scaler.inverse_transform(np.array(predicted).reshape(-1, 1))
    return pred_price.flatten()


In [None]:
# Predict the next 3 days of Google stock prices
num_days_to_predict = 3
pred_price = predict_next_days(model, test_X, scaler, num_days_to_predict)

In [None]:
# Display the predicted prices
for i in range(num_days_to_predict):
    print(f"Predicted close price for day {i + 1}: ${pred_price[i]:.2f}")

# 8. Preparing Facebook Data and Training Model

In [None]:
# Filter and sort Facebook data from upsampled data
facebook = upsampled_fb[upsampled_fb['symbol'] == 'FB']
facebook = facebook.sort_values(by='date')
facebook.reset_index(drop=True, inplace=True)

# Normalize closing prices using MinMaxScaler
close_prices = facebook['close'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
normalized_close_prices = scaler.fit_transform(close_prices)

def make_dataset(data, win_size):
    """
    Create datasets for training a time series model.

    Parameters:
    - data (numpy.ndarray): Normalized close prices.
    - win_size (int): Size of the time window.

    Returns:
    - tuple: (X, Y) where X is the input features and Y is the target values.
    """
    X, Y = [], []
    for i in range(len(data) - win_size):
        X.append(data[i:i + win_size, 0])
        Y.append(data[i + win_size, 0])
    return np.array(X), np.array(Y)

In [None]:

# Create training and testing datasets for Facebook
win_size = 20
X, Y = make_dataset(normalized_close_prices, win_size)
X = X.reshape(X.shape[0], 1, X.shape[1])
train_size = int(len(X) * 0.80)
test_size = len(X) - train_size
train_X, test_X = X[:train_size], X[train_size:]
train_Y, test_Y = Y[:train_size], Y[train_size:]


# Save test data
with open('facebook_X_test.pkl', 'wb') as file:
    pickle.dump(test_X, file)


In [None]:
# Define and compile the GRU model for Facebook
model = Sequential()
model.add(GRU(units=50, return_sequences=True, input_shape=(1, win_size)))
model.add(GRU(units=50, return_sequences=True))
model.add(GRU(units=50))
model.add(Dense(units=1))

def rmse(y_true, y_pred):
    """
    Compute the Root Mean Squared Error.

    Parameters:
    - y_true (Tensor): True values.
    - y_pred (Tensor): Predicted values.

    Returns:
    - Tensor: Computed RMSE.
    """
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[rmse, rmae])

# Train the model
trainmodel = model.fit(train_X, train_Y, epochs=50, batch_size=32, validation_data=(test_X, test_Y))

# Save the trained model
model.save("fb_model.h5")

# 9. Predicting Future Prices for Facebook

In [None]:
# Predict the next 3 days of Facebook stock prices
pred_price = predict_next_days(model, test_X, scaler, num_days_to_predict)

# Display the predicted prices
for i in range(num_days_to_predict):
    print(f"Predicted close price for day {i + 1}: ${pred_price[i]:.2f}")

# 10. Plotting Evaluation Metrics

In [None]:
# Define evaluation metrics for Google and Facebook
Google = {
    'RMSE': 0.1954,
    'RMAE': 0.3946,
    'LOSS': 0.0396
}
Facebook = {
    'RMSE': 0.1909,
    'RMAE': 0.3804,
    'LOSS': 0.0401
}

# Plot evaluation metrics for Google and Facebook
metrics = list(Google.keys())
model1_values = list(Google.values())
model2_values = list(Facebook.values())

plt.figure(figsize=(12, 6))



In [None]:
# Plot metrics for Google
plt.subplot(1, 2, 1)
bars = plt.bar(metrics, model1_values, color='b', alpha=0.7)
plt.xlabel('Metrics')
plt.ylabel('Metric Values')
plt.title('Evaluation Metrics for Google')
for bar, value in zip(bars, model1_values):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f'{value:.4f}', ha='center', va='bottom')



In [None]:
# Plot metrics for Facebook
plt.subplot(1, 2, 2)
bars = plt.bar(metrics, model2_values, color='r', alpha=0.7)
plt.xlabel('Metrics')
plt.ylabel('Metric Values')
plt.title('Evaluation Metrics for Facebook')
for bar, value in zip(bars, model2_values):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f'{value:.4f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()