In [None]:
import pandas as pd
import numpy as np
import math
import yfinance as yf
from statsmodels.regression.linear_model import OLS
from sklearn.metrics import *
import matplotlib.pyplot as plt

In [None]:
stock_symbols = ['AMZN','MSFT','AAPL']

# Prepare data

In [None]:
def create_seq_data(data, num_days):
    x = []
    y = []

    print(len(data))
    for i in range(0,len(data)-num_days-1):
        x.append(data.iloc[i:i+num_days])
        y.append(data.iloc[i+num_days])

    return np.array(x), np.array(y)


# Create and evaluate model

In [None]:
def model_func(stock_symbol):
    stock_data = yf.download(stock_symbol, start="2010-01-01", end=pd.Timestamp.now())
    stock_data['Date'] = stock_data.index
    stock_data.reset_index(drop=True, inplace=True)

    train_size = int(0.8 * len(stock_data))
    train_data = stock_data.iloc[:train_size]
    test_data = stock_data.iloc[train_size:]

    # print(train_data['Open'])
    num_prev_days = 30

    xtrain, ytrain = create_seq_data(train_data['Open'],num_prev_days)
    xtest, ytest = create_seq_data(test_data['Open'],num_prev_days)

    # xtrain = train_data[['Open','High','Low','Volume']]
    # ytrain = train_data['Close']
    # xtest = test_data[['Open','High','Low','Volume']]
    # ytest = test_data['Close']

    model = OLS(ytrain, xtrain).fit()
    train_pred = model.predict(xtrain)
    test_pred = model.predict(xtest)

    train_mse = mean_squared_error(ytrain,train_pred)
    test_mse = mean_squared_error(ytest,test_pred)

    train_mae = mean_absolute_error(ytrain,train_pred)
    test_mae = mean_absolute_error(ytest,test_pred)

    train_r2 = r2_score(ytrain,train_pred)
    test_r2 = r2_score(ytest,test_pred)

    print(f"Summary for {stock_symbol}: ")
    display(model.summary())

    plt.subplot(1, 2, 1)
    plt.plot(stock_data['Date'], stock_data['Open'], label='Actuals', color='red')
    plt.plot(train_data['Date'].iloc[num_prev_days+1:], train_pred, label='Train predictions', color='blue')
    plt.plot(test_data['Date'].iloc[num_prev_days+1:], test_pred, label='Test predictions', color='green')
    plt.title(f"{stock_symbol} Actual vs Predictions")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.scatter(train_data['Date'].iloc[num_prev_days+1:], ytrain - train_pred, label='Train residuals', alpha=0.5)
    plt.scatter(test_data['Date'].iloc[num_prev_days+1:], ytest - test_pred, label='Test residuals', alpha=0.5)
    plt.title(f"{stock_symbol} Residuals")
    plt.xlabel("Date")
    plt.ylabel("Residuals")
    plt.legend()



    plt.show()

    return train_mse, test_mse, train_mae, test_mae, train_r2, test_r2


# Results

In [None]:
for val in stock_symbols:
    train_mse, test_mse, train_mae, test_mae, train_r2, test_r2 = model_func(val)
    train_rmse = math.sqrt(train_mse)
    test_rmse = math.sqrt(test_mse)
    
    print(f"{val} Train MSE: {train_mse}, Test MSE: {test_mse}")
    print(f"{val} Train MAE: {train_mae}, Test MAE: {test_mae}")
    print(f"{val} Train RMSE: {train_rmse}, Test RMSE: {test_rmse}")
    print(f"{val} Train R^2: {train_r2}, Test R^2 : {test_r2}")