In [22]:
import yfinance as yf
import matplotlib.pyplot as plt
import statistics
import numpy as np
from pandas.tseries.offsets import BDay
import pandas as pd
import calendar

# Load data for AMZ
amzn_data = pd.read_csv('stock_data_AMEN.csv')
amzn_data['Date'] = pd.to_datetime(amzn_data['Date'])
amzn_data.set_index('Date', inplace=True)

# Load data for NCLH
nclh_data = pd.read_csv('stock_data.csv')
nclh_data['Date'] = pd.to_datetime(nclh_data['Date'])
nclh_data.set_index('Date', inplace=True)

In [23]:
def median_forecast(stock_data, date, N):
    # Filter the stock data up to the specified date
    date = pd.to_datetime(date)
    stock_data_before_date = stock_data[stock_data.index <= date]

    # Calculate the start index
    start_index = len(stock_data_before_date) - N

    # Extract the closing prices within the specified window
    closing_prices = stock_data_before_date['Volume'].iloc[start_index:]
    
    # Calculate the median of volumes
    median = statistics.median(closing_prices)
    
    return median

def get_real_volume(stock_data, date):
    date = pd.to_datetime(date)
    # Calculate real volume
    real_volume = stock_data.loc[stock_data.index == date]['Volume']
    # Check if real_volume is empty
    if real_volume.empty:
        return 0  # No trading volume available for this date, return 0
    else:
        return real_volume.iloc[0]  # Return the actual traded volume if available


def average_forecast(stock_data, date, N):
    # Filter the stock data up to the specified date
    date = pd.to_datetime(date)
    stock_data_before_date = stock_data[stock_data.index <= date]

    # Calculate the start index
    start_index = len(stock_data_before_date) - N

    # Extract the closing prices within the specified window
    closing_prices = stock_data_before_date['Volume'].iloc[start_index:]
    
    # Calculate the mean of volumes
    mean = statistics.mean(closing_prices)
    
    return mean

get_real_volume(amzn_data, '2019-01-01 00:00:00')

0

In [24]:
def get_monthly_MSE(year, stock_data, N, forecast_type):
    monthly_MSE_list = []

    for month in range(1, 13):  # Iterate through each month
        # Calculate the first day of the month
        start_date = pd.to_datetime(f"{year}-{month:02d}-01")
        end_date = start_date + pd.DateOffset(days=calendar.monthrange(year, month)[1] - 1)

        # Initialize the lists
        forecast_list = []
        real_volume_list = []

        # We forecast each day of the month using the 'forecast_type' method:
        for day in pd.date_range(start_date, end_date):
            if forecast_type == 1:
                forecast = average_forecast(stock_data, day, N)
            elif forecast_type == 2:
                forecast = median_forecast(stock_data, day, N)

            real_volume = get_real_volume(stock_data, day)
            
            # Skip the day if real volume is 0
            if real_volume == 0:
                continue

            forecast_list.append(forecast)
            real_volume_list.append(real_volume)

        # Calculate mean squared error
        if len(real_volume_list) > 0:  # Check if there are days with real volume
            MSE = sum((x - y) ** 2 for x, y in zip(forecast_list, real_volume_list)) / len(real_volume_list)
            monthly_MSE_list.append(MSE)

    return monthly_MSE_list

year = 2019  
forecast_type = 2  
stock_data = amzn_data 
N = 10

monthly_MSE = get_monthly_MSE(year, stock_data, N, forecast_type)
print(monthly_MSE)


[1235094576380952.5, 921223521842105.2, 982921446285714.2, 758467514761904.8, 364745198454545.44, 839783786450000.0, 370176663454545.44, 382110040909090.94, 321945098550000.0, 1003537883434782.6, 145986338550000.0, 598993113714285.8]


In [25]:
def get_yearly_MSE(year, stock_data, N, forecast_type):
    print(f'For the year {year} and the stock : ')
    monthly_MSE = get_monthly_MSE(year, stock_data, N, forecast_type)
    return np.mean(monthly_MSE)


In [26]:
# Define a list of holidays for 2019
holidays_2019 = [
    "2019-10-14",  # Columbus Day
    "2019-11-11",  # Veteran's Day
    "2019-11-29",  # Friday after Thanksgiving
    "2019-12-31",  # December 31
]


# Define a function to calculate the MSE
def calculate_mse(stock_data, date, N, forecast_type):
    # Calculate forecast and get the real price for the given date
    forecast = 0
    real_price = 0  # Replace with the actual real price for the date

    if forecast_type == 1:
        # Calculate the average forecast
        forecast = average_forecast(stock_data, date, N)
    elif forecast_type == 2:
        # Calculate the median forecast
        forecast = median_forecast(stock_data, date, N)

    # Calculate the Mean Squared Error
    mse = (forecast - real_price) ** 2
    return mse

# Function to calculate the MSE for a given forecast type
def calculate_mse_for_holidays(stock_data, holidays, N, forecast_type):
    mse_values = []
    for holiday_date in holidays:
        mse = calculate_mse(stock_data, holiday_date, N, forecast_type)
        mse_values.append(mse)
    return mse_values

stock_data = amzn_data 

# Calculate MSE for Average and Median forecasts for N=10
mse_average = calculate_mse_for_holidays(stock_data, holidays_2019, N=10, forecast_type=1)
mse_median = calculate_mse_for_holidays(stock_data, holidays_2019, N=10, forecast_type=2)

# Print or store the MSE values as needed
print("MSE for Average Forecast (N=10):", mse_average)
print("MSE for Median Forecast (N=10):", mse_median)





MSE for Average Forecast (N=10): [2874310878760000, 2247802921000000, 3268706190760000, 5238951256360000]
MSE for Median Forecast (N=10): [2618573584000000.0, 2233791169000000.0, 3169127025000000.0, 4894121764000000.0]


In [27]:
# Function to fill the table for Amazon (AMZN)
def fill_amzn_table(years, n_values):
    amzn_table = []

    for year in years:
        year_row = [year]

        for n in n_values:
            mse_lag1 = get_yearly_MSE(year, amzn_data, n, forecast_type=1)
            mse_lag2 = get_yearly_MSE(year, amzn_data, n, forecast_type=2)
            year_row.extend([f"{mse_lag1:.4e}", f"{mse_lag2:.4e}"])

        amzn_table.append(year_row)

    return amzn_table

# Function to fill the table for NCLH
def fill_nclh_table(years, n_values):
    nclh_table = []

    for year in years:
        year_row = [year]

        for n in n_values:
            mse_average = get_yearly_MSE(year, nclh_data, n, forecast_type=1)
            mse_median = get_yearly_MSE(year, nclh_data, n, forecast_type=2)
            year_row.extend([f"{mse_average:.4e}", f"{mse_median:.4e}"])

        nclh_table.append(year_row)

    return nclh_table

# Define the years and N values
years = [2017, 2018, 2019, 2020]
n_values = [10, 30, 60]

# Fill the tables
amzn_table = fill_amzn_table(years, n_values)
nclh_table = fill_nclh_table(years, n_values)

# Print the tables
print("Table 1: Average Mean Square Error (MSE) for Amazon")
print(amzn_table)

print("\nTable 2: Average Mean Square Error (MSE) for NCLH")
print(nclh_table)


For the year 2017 and the stock : 


For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2018 and the stock : 
For the year 2018 and the stock : 
For the year 2018 and the stock : 
For the year 2018 and the stock : 
For the year 2018 and the stock : 
For the year 2018 and the stock : 
For the year 2019 and the stock : 
For the year 2019 and the stock : 
For the year 2019 and the stock : 
For the year 2019 and the stock : 
For the year 2019 and the stock : 
For the year 2019 and the stock : 
For the year 2020 and the stock : 
For the year 2020 and the stock : 
For the year 2020 and the stock : 
For the year 2020 and the stock : 
For the year 2020 and the stock : 
For the year 2020 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 and the stock : 
For the year 2017 an

In [28]:
# Define the table structure
table_data = {
    "Month": [
        "January",
        "February",
        "March",
        "April",
        "May",
        "June",
        "July",
        "August",
        "September",
        "October",
        "November",
        "December",
        "Average",
    ],
    "MSE for Average": [""] * 13,  # Initialize with empty strings
    "MSE for Median": [""] * 13,
}

# Loop through the months and calculate MSE for both methods
for month in range(1, 13):
    # Calculate the MSE for Average forecast
    mse_average = np.mean(get_monthly_MSE(2019, amzn_data, N=10, forecast_type=1)[month - 1])

    # Calculate the MSE for Median forecast
    mse_median = np.mean(get_monthly_MSE(2019, amzn_data, N=10, forecast_type=2)[month - 1])

    # Fill the table data with MSE values
    table_data["MSE for Average"][month - 1] = f"{mse_average:.4e}"
    table_data["MSE for Median"][month - 1] = f"{mse_median:.4e}"

# Print the filled table
filled_table = pd.DataFrame(table_data)
print(filled_table)

        Month MSE for Average MSE for Median
0     January      1.1299e+15     1.2351e+15
1    February      1.1526e+15     9.2122e+14
2       March      7.6900e+14     9.8292e+14
3       April      6.0308e+14     7.5847e+14
4         May      2.9683e+14     3.6475e+14
5        June      8.2447e+14     8.3978e+14
6        July      3.1315e+14     3.7018e+14
7      August      3.9135e+14     3.8211e+14
8   September      2.6635e+14     3.2195e+14
9     October      8.6798e+14     1.0035e+15
10   November      2.1504e+14     1.4599e+14
11   December      5.3531e+14     5.9899e+14
12    Average                               


In [29]:
stock_data = amzn_data
mean_list =[]
median_list = []
real_list= []

mean_list.append(average_forecast(stock_data, '2019-01-16', 10))
mean_list.append(average_forecast(stock_data, '2019-03-01', 30))
mean_list.append(average_forecast(stock_data, '2019-04-12', 60))

median_list.append(median_forecast(stock_data, '2019-01-16', 10))
median_list.append(median_forecast(stock_data, '2019-03-01', 30))
median_list.append(median_forecast(stock_data, '2019-04-12', 60))

real_list.append(get_real_volume(stock_data, '2019-01-16'))
real_list.append(get_real_volume(stock_data, '2019-03-01'))
real_list.append(get_real_volume(stock_data, '2019-04-12'))

print(mean_list)
print(median_list)
print(real_list)

[69094800, 82852866.66666667, 110621633.33333333]
[69489000.0, 70365000.0, 96963000.0]
[57932000, 52124000, 122522000]
