## Yahoo Finace Libaray

In [28]:

# Define the stock symbol and date range
stock_symbol = "AAME"
start_date = "2013-01-01"
end_date = "2023-01-01"

# Fetch historical stock data from Yahoo Finance
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)

# Reset index to make 'Date' a column again
stock_data.reset_index(inplace=True)

## Plot historical data

In [24]:
import time
from datetime import datetime
import plotly_express as px


def plot_stock_data(data,title):
    '''function for plotting stock data'''
    plot = px.line(data, 
                        x="Date", 
                        y=["Close"], 
                        hover_name="Date",
                        line_shape="linear",
                        title=title) 
    return plot

plot_stock_data(stock_data[-30:],'Airline') # for 30 days

In [25]:
stock_data[['Close']].plot()
plt.title("Closed Price Stock Market")
plt.show()

## Outlier checking

In [None]:
# Convert the "Date" column to a datetime format
df['Date'] = pd.to_datetime(df['Date'])
df

In [None]:
# Specify the column to analyze for outliers 
column_name = 'Open'

# Calculate the IQR (Interquartile Range)
Q1 = df[column_name].quantile(0.25)
Q3 = df[column_name].quantile(0.75)
IQR = Q3 - Q1

# Define the lower and upper bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Identify and count outliers
outliers = df[(df[column_name] < lower_bound) | (df[column_name] > upper_bound)]
num_outliers = len(outliers)

# Display the number of outliers
print(f'Number of outliers in {column_name}: {num_outliers}')


In [None]:
# Specify the column to analyze for outliers 
column_name = 'Close'

# Calculate the IQR (Interquartile Range)
Q1 = df[column_name].quantile(0.25)
Q3 = df[column_name].quantile(0.75)
IQR = Q3 - Q1

# Define the lower and upper bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Identify and count outliers
outliers = df[(df[column_name] < lower_bound) | (df[column_name] > upper_bound)]
num_outliers = len(outliers)

# Display the number of outliers
print(f'Number of outliers in {column_name}: {num_outliers}')


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


# Specify the column to visualize outliers 
column_name = 'Close'

# Create a box plot to visualize outliers
plt.figure(figsize=(8, 6))
sns.boxplot(x=df[column_name], orient='v', width=0.3, palette='Set1')
plt.title(f'Box Plot for {column_name} (with Outliers)')
plt.ylabel(column_name)
plt.show()


## Hyperparameters

In [3]:
# Define hyperparameters
input_size = len(feature_columns)
hidden_size = 64
output_size = 1
num_layers = 2
num_attention_heads = 4
learning_rate = 0.001
num_epochs = 100
# Define the number of time steps to use as input features
num_time_steps = 10 

## LSTM

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import yfinance as yf
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate Mean Absolute Percentage Error (MAPE)
def calculate_mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Define the stock symbol and date range
stock_symbol = "AAME"
start_date = "2013-01-01"
end_date = "2023-01-01"

# Fetch historical stock data from Yahoo Finance
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)

# Reset index to make 'Date' a column again
stock_data.reset_index(inplace=True)

# Rename 'Adj Close' to 'Adjusted Close'
stock_data.rename(columns={'Adj Close': 'Adjusted Close'}, inplace=True)

# Use 'Date' as index
stock_data.set_index('Date', inplace=True)

# Normalize data
columns_to_normalize = ['Low', 'Open', 'Volume', 'High', 'Close', 'Adjusted Close']
data_to_normalize = stock_data[columns_to_normalize].values

# Convert the data to PyTorch tensor
tensor_data = torch.tensor(data_to_normalize, dtype=torch.float32)

# Calculate mean and standard deviation for each column
mean = tensor_data.mean(dim=0)
std = tensor_data.std(dim=0)

# Normalize the data
normalized_data = (tensor_data - mean) / std

# Convert the normalized data back to a DataFrame
df_normalized = pd.DataFrame(normalized_data.numpy(), columns=columns_to_normalize)

# Add 'Date' column back to the DataFrame
df_normalized['Date'] = stock_data.index
data = df_normalized

# Define the percentage of data for training, validation, and testing
train_percent = 0.7
val_percent = 0.15
test_percent = 0.15

# Calculate the sizes of the train, validation, and test sets
train_size = int(train_percent * len(data))
val_size = int(val_percent * len(data))
test_size = len(data) - train_size - val_size

# Split the data into train, validation, and test sets
train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size + val_size]
test_data = data.iloc[train_size + val_size:]

# Define the features(inputs) and target columns
feature_columns = ['Low', 'Open', 'High']
target_column = 'Close'

# Extract features and target for each dataset
train_features = train_data[feature_columns].values
train_target = train_data[target_column].values

val_features = val_data[feature_columns].values
val_target = val_data[target_column].values

test_features = test_data[feature_columns].values
test_target = test_data[target_column].values

# Convert data to PyTorch tensors
train_features = torch.tensor(train_features, dtype=torch.float32)
train_target = torch.tensor(train_target, dtype=torch.float32)
val_features = torch.tensor(val_features, dtype=torch.float32)
val_target = torch.tensor(val_target, dtype=torch.float32)
test_features = torch.tensor(test_features, dtype=torch.float32)
test_target = torch.tensor(test_target, dtype=torch.float32)

num_epochs= 100



class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc_output = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hn, cn) = self.lstm(x)  # Use both hidden state (hn) and cell state (cn)
        x = self.fc_output(hn[-1, :, :])
        return x



# Define hyperparameters for LSTM
hidden_size_lstm = 64
num_layers_lstm = 2

# Create the LSTM model instance
model_lstm = LSTMModel(input_size, hidden_size_lstm, output_size, num_layers_lstm)

# Define the loss function and optimizer for LSTM
criterion= nn.MSELoss()
optimizer_lstm = optim.Adam(model_lstm.parameters(), lr=learning_rate)

# Lists to store training, validation, and test losses for LSTM
train_losses_lstm = []
val_losses_lstm = []
test_losses_lstm = []  # This list will store test loss for each epoch



# Training loop for LSTM
for epoch in range(num_epochs):
    model_lstm.train()
    optimizer_lstm.zero_grad()

    # Forward pass for LSTM
    outputs_lstm = model_lstm(train_features.unsqueeze(1))
    
    # Calculate the loss for LSTM
    loss_lstm = criterion(outputs_lstm, train_target.unsqueeze(1))

    # Backpropagation and optimization for LSTM
    loss_lstm.backward()
    optimizer_lstm.step()

    # Store the training loss for LSTM
    train_losses_lstm.append(loss_lstm.item())

    print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss (LSTM): {loss_lstm.item():.4f}')

    # Validation loss for LSTM
    model_lstm.eval()
    with torch.no_grad():
        val_outputs_lstm = model_lstm(val_features.unsqueeze(1))
        val_loss_lstm = criterion(val_outputs_lstm, val_target.unsqueeze(1))
    
    # Store the validation loss for LSTM
    val_losses_lstm.append(val_loss_lstm.item())

    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss (LSTM): {val_loss_lstm.item():.4f}')

    # Test loss for LSTM
    with torch.no_grad():
        test_outputs_lstm = model_lstm(test_features.unsqueeze(1))
        test_loss_lstm = criterion(test_outputs_lstm, test_target.unsqueeze(1))

    # Store the test loss for each epoch for LSTM
    test_losses_lstm.append(test_loss_lstm.item())

    print(f'Epoch [{epoch + 1}/{num_epochs}], Test Loss (LSTM): {test_loss_lstm.item():.4f}')


# Plotting the training and validation losses for LSTM
plt.plot(range(1, num_epochs + 1), train_losses_lstm, label='Training Loss (LSTM)')
plt.plot(range(1, num_epochs + 1), val_losses_lstm, label='Validation Loss (LSTM)')
plt.plot(range(1, num_epochs + 1), test_losses_lstm, label='Test Loss (LSTM)')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Losses Over Epochs (LSTM)')
plt.legend()
plt.show()


# Switch the LSTM model to evaluation mode
model_lstm.eval()

# Make predictions on the validation set using LSTM
with torch.no_grad():
    val_predictions_lstm = model_lstm(val_features.unsqueeze(1))

# Convert predictions and targets back to numpy arrays for LSTM
val_predictions_lstm = val_predictions_lstm.squeeze(1).numpy()

# Calculate evaluation metrics for validation set using LSTM
mae_val_lstm = mean_absolute_error(val_target.numpy(), val_predictions_lstm)
mse_val_lstm = mean_squared_error(val_target.numpy(), val_predictions_lstm)
rmse_val_lstm = np.sqrt(mse_val_lstm)
mape_val_lstm = calculate_mape(val_target.numpy(), val_predictions_lstm)

# Print the evaluation metrics for validation set using LSTM
print('Validation Set Metrics (LSTM):')
print(f'Mean Absolute Error (MAE): {mae_val_lstm:.4f}')
print(f'Mean Squared Error (MSE): {mse_val_lstm:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse_val_lstm:.4f}')
print(f'Mean Absolute Percentage Error (MAPE): {mape_val_lstm:.4f}')
print()

# Make predictions on the test set using LSTM
with torch.no_grad():
    test_predictions_lstm = model_lstm(test_features.unsqueeze(1))

# Convert predictions and targets back to numpy arrays for LSTM
test_predictions_lstm = test_predictions_lstm.squeeze(1).numpy()

# Calculate evaluation metrics for test set using LSTM
mae_test_lstm = mean_absolute_error(test_target.numpy(), test_predictions_lstm)
mse_test_lstm = mean_squared_error(test_target.numpy(), test_predictions_lstm)
rmse_test_lstm = np.sqrt(mse_test_lstm)
mape_test_lstm = calculate_mape(test_target.numpy(), test_predictions_lstm)

# Print the evaluation metrics for test set using LSTM
print('Test Set Metrics (LSTM):')
print(f'Mean Absolute Error (MAE): {mae_test_lstm:.4f}')
print(f'Mean Squared Error (MSE): {mse_test_lstm:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse_test_lstm:.4f}')
print(f'Mean Absolute Percentage Error (MAPE): {mape_test_lstm:.4f}')

# Plotting the predicted values against true values for the test set using LSTM
plt.plot(test_target.numpy(), label='True Values')
plt.plot(test_predictions_lstm, label='Predicted Values (LSTM)', linestyle='dashed')
plt.xlabel('Time Steps')
plt.ylabel('Normalized Close Price')
plt.title('True vs Predicted Values for Test Set (LSTM)')
plt.legend()
plt.show()




In [11]:
# Calculate R2
r2 = r2_score(test_target, test_predictions_lstm)
print(f'R-squared: {r2}')

R-squared: 0.9842929076853941


## Prophet 

In [14]:
import matplotlib.pyplot as plt
import yfinance as yf
stock_ticker = 'AAPL'
yfin = yf.Ticker(stock_ticker)
data = yfin.history(period="max")
data = data[['Close']]
print(data.tail())

import yfinance as yf
stock_ticker = 'AAPL'
yfin = yf.Ticker(stock_ticker)
data = yfin.history(period="max")
data = data[['Close']]
data.reset_index(level=0, inplace=True)
data = data.rename({'Date': 'ds', 'Close': 'y'}, axis='columns')
print(data.tail())


data['ds'] = data['ds'].dt.tz_localize(None)
data



In [31]:
# Import libraries
from prophet import Prophet
import pandas as pd

# Fetch historical stock data
stock_symbol = "AAL"
start_date = "2013-01-01"
end_date = "2023-01-01"

# Download stock data from Yahoo Finance
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)

# Prepare data for Prophet
prophet_data = stock_data.reset_index()[['Date', 'Close']]
prophet_data.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

# Create and fit the Prophet model
model = Prophet()
model.fit(prophet_data)

# Create a dataframe for future dates (1 year)
future = model.make_future_dataframe(periods=365)

# Make predictions
forecast = model.predict(future)

# Plot the forecast
fig = model.plot(forecast)


In [None]:
figure1 = m.plot(forecast)

In [None]:
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

In [None]:
figure1 = m.plot(forecast)
figure2 = m.plot_components(forecast)

In [None]:
from prophet.plot import plot_plotly, plot_components_plotly

plot_plotly(m, forecast)


## Transformer

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import yfinance as yf
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate Mean Absolute Percentage Error (MAPE)
def calculate_mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Define the stock symbol and date range
stock_symbol = "AAME"
start_date = "2013-01-01"
end_date = "2023-01-01"

# Fetch historical stock data from Yahoo Finance
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)

# Reset index to make 'Date' a column again
stock_data.reset_index(inplace=True)

# Rename 'Adj Close' to 'Adjusted Close'
stock_data.rename(columns={'Adj Close': 'Adjusted Close'}, inplace=True)

# Use 'Date' as index
stock_data.set_index('Date', inplace=True)

# Normalize data
columns_to_normalize = ['Low', 'Open', 'Volume', 'High', 'Close', 'Adjusted Close']
data_to_normalize = stock_data[columns_to_normalize].values

# Convert the data to PyTorch tensor
tensor_data = torch.tensor(data_to_normalize, dtype=torch.float32)

# Calculate mean and standard deviation for each column
mean = tensor_data.mean(dim=0)
std = tensor_data.std(dim=0)

# Normalize the data
normalized_data = (tensor_data - mean) / std

# Convert the normalized data back to a DataFrame
df_normalized = pd.DataFrame(normalized_data.numpy(), columns=columns_to_normalize)

# Add 'Date' column back to the DataFrame
df_normalized['Date'] = stock_data.index
data = df_normalized

# Define the percentage of data for training, validation, and testing
train_percent = 0.7
val_percent = 0.15
test_percent = 0.15

# Calculate the sizes of the train, validation, and test sets
train_size = int(train_percent * len(data))
val_size = int(val_percent * len(data))
test_size = len(data) - train_size - val_size

# Split the data into train, validation, and test sets
train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size + val_size]
test_data = data.iloc[train_size + val_size:]

# Define the features and target columns
feature_columns = ['Low', 'Open', 'High']
target_column = 'Close'

# Extract features and target for each dataset
train_features = train_data[feature_columns].values
train_target = train_data[target_column].values

val_features = val_data[feature_columns].values
val_target = val_data[target_column].values

test_features = test_data[feature_columns].values
test_target = test_data[target_column].values

# Convert data to PyTorch tensors
train_features = torch.tensor(train_features, dtype=torch.float32)
train_target = torch.tensor(train_target, dtype=torch.float32)
val_features = torch.tensor(val_features, dtype=torch.float32)
val_target = torch.tensor(val_target, dtype=torch.float32)
test_features = torch.tensor(test_features, dtype=torch.float32)
test_target = torch.tensor(test_target, dtype=torch.float32)

# Define a simple Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, num_attention_heads):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_attention_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers
        )
        self.fc_output = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)  # Adjust input shape for the transformer
        x = self.transformer(x, x)  # Set source and target as the same data
        x = x.permute(1, 0, 2)  # Restore the original shape
        x = self.fc_output(x[:, -1, :])  # Use the last layer's output for prediction
        return x

# Define hyperparameters
input_size = len(feature_columns)
hidden_size = 64
output_size = 1
num_layers = 2
num_attention_heads = 4
learning_rate = 0.001
num_epochs = 100

# Create the model instance
model = TransformerModel(input_size, hidden_size, output_size, num_layers, num_attention_heads)

# Define the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Lists to store training, validation, and test losses
train_losses = []
val_losses = []
test_losses = [] 

# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    outputs = model(train_features.unsqueeze(1))
    
    # Calculate the loss
    loss = criterion(outputs, train_target.unsqueeze(1))

    # Backpropagation and optimization
    loss.backward()
    optimizer.step()

    # Store the training loss
    train_losses.append(loss.item())

    print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {loss.item():.4f}')

    # Validation loss
    model.eval()
    with torch.no_grad():
        val_outputs = model(val_features.unsqueeze(1))
        val_loss = criterion(val_outputs, val_target.unsqueeze(1))
    
    # Store the validation loss
    val_losses.append(val_loss.item())

    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss.item():.4f}')

    # Test loss
    with torch.no_grad():
        test_outputs = model(test_features.unsqueeze(1))
        test_loss = criterion(test_outputs, test_target.unsqueeze(1))

    # Store the test loss for each epoch
    test_losses.append(test_loss.item())

    print(f'Epoch [{epoch + 1}/{num_epochs}], Test Loss: {test_loss.item():.4f}')

# Plotting the training and validation losses
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.plot(range(1, num_epochs + 1), test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Losses Over Epochs')
plt.legend()
plt.show()


# Print the test loss for the last epoch
print(f'Final Test Loss: {test_losses[-1]:.4f}')
print()

# Switch the model to evaluation mode
model.eval()

# Make predictions on the validation set
with torch.no_grad():
    val_predictions = model(val_features.unsqueeze(1))

# Convert predictions and targets back to numpy arrays
val_predictions = val_predictions.squeeze(1).numpy()
val_target_numpy = val_target.numpy()

# Calculate evaluation metrics for validation set
mae_val = mean_absolute_error(val_target_numpy, val_predictions)
mse_val = mean_squared_error(val_target_numpy, val_predictions)
rmse_val = np.sqrt(mse_val)
mape_val = calculate_mape(val_target_numpy, val_predictions)

# Print the evaluation metrics for validation set
print('Validation Set Metrics:')
print(f'Mean Absolute Error (MAE): {mae_val:.4f}')
print(f'Mean Squared Error (MSE): {mse_val:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse_val:.4f}')
print(f'Mean Absolute Percentage Error (MAPE): {mape_val:.4f}')
print()

# Make predictions on the test set
with torch.no_grad():
    test_predictions = model(test_features.unsqueeze(1))

# Convert predictions and targets back to numpy arrays
test_predictions = test_predictions.squeeze(1).numpy()
test_target_numpy = test_target.numpy()

# Calculate evaluation metrics for test set
mae_test = mean_absolute_error(test_target_numpy, test_predictions)
mse_test = mean_squared_error(test_target_numpy, test_predictions)
rmse_test = np.sqrt(mse_test)
mape_test = calculate_mape(test_target_numpy, test_predictions)

# Print the evaluation metrics for test set
print('Test Set Metrics:')
print(f'Mean Absolute Error (MAE): {mae_test:.4f}')
print(f'Mean Squared Error (MSE): {mse_test:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse_test:.4f}')
print(f'Mean Absolute Percentage Error (MAPE): {mape_test:.4f}')

# Plotting the predicted values against true values for the test set
plt.plot(test_target_numpy, label='True Values')
plt.plot(test_predictions, label='Predicted Values', linestyle='dashed')
plt.xlabel('Time Steps')
plt.ylabel('Normalized Close Price')
plt.title('True vs Predicted Values for Test Set')
plt.legend()
plt.show()
