# 1. Calculate MAPE for ARIMA model

## MAPE for ARIMA pred in HOSE

In [2]:
import pandas as pd
import numpy as np

# Load the datasets
hose_df_2023 = pd.read_csv('./data/hose_df_2023.csv')
hose_arima_pred = pd.read_csv('./arima/hose_arima_pred.csv')

# Convert 'date' column to datetime and sort by date
hose_df_2023['date'] = pd.to_datetime(hose_df_2023['date'])
hose_arima_pred['date'] = pd.to_datetime(hose_arima_pred['date'])
hose_df_2023.sort_values(by=['ticker','date'], inplace=True)
hose_arima_pred.sort_values(by='date', inplace=True)

# Define function to calculate MAPE
def calculate_mape(actual, predicted):
    return np.mean(np.abs((actual - predicted) / actual)) * 100

# Time marks in business days (approximation)
time_marks = {
    '1_week': 5,
    '2_week': 10,
    '3_week': 15,
    '1_month': 21,
    '2_month': 42,
    '6_month': 120,
    '12_month': 248
}

# Get unique tickers
tickers = hose_df_2023['ticker'].unique()

# Initialize an empty DataFrame to store MAPE values
arima_mape_hose = pd.DataFrame(index=tickers, columns=time_marks.keys())

# Loop through each ticker and each timeframe
for ticker in tickers:
    for timeframe, days in time_marks.items():
        # Extract actual prices
        actual_prices = hose_df_2023[hose_df_2023['ticker'] == ticker][['date', 'close']].reset_index(drop=True)
        # assign actual_prices as actual_prices at close column, index = days
        actual_prices = actual_prices.loc[days, 'close']

        # Extract ARIMA predictions
        predicted_prices = hose_arima_pred[['date', ticker]].dropna().reset_index(drop=True)
        predicted_prices = predicted_prices.loc[days, ticker]

        # Calculate MAPE and round to 2 decimal places
        mape_value = round(calculate_mape(actual_prices, predicted_prices), 2)
        arima_mape_hose.loc[ticker, timeframe] = mape_value

arima_mape_hose.sort_index(inplace=True)
# Save the MAPE table to a CSV file
arima_mape_hose.to_csv('./arima/arima_mape_hose.csv')

arima_mape_hose.head()

Unnamed: 0,1_week,2_week,3_week,1_month,2_month,6_month,12_month
ACB,6.63,13.46,15.95,11.18,13.63,0.71,8.39
BCM,1.66,3.42,4.9,4.56,4.0,1.82,28.21
BID,6.32,13.93,14.22,13.06,18.81,10.75,10.86
BVH,2.92,4.12,7.55,5.39,5.77,5.66,17.97
CTG,4.59,10.98,10.69,8.9,7.35,7.66,0.52


In [6]:
import pandas as pd
import numpy as np

# Load the datasets
dse_df_2023 = pd.read_csv('./data/dse_df_2023.csv')
dse_arima_pred = pd.read_csv('./arima/dse_arima_pred.csv')

# Convert 'date' column to datetime and sort by date
dse_df_2023['date'] = pd.to_datetime(dse_df_2023['date'])
dse_arima_pred['date'] = pd.to_datetime(dse_arima_pred['date'])
dse_df_2023.sort_values(by=['ticker','date'], inplace=True)
dse_arima_pred.sort_values(by='date', inplace=True)

# Define function to calculate MAPE
def calculate_mape(actual, predicted):
    return np.mean(np.abs((actual - predicted) / actual)) * 100

# Time marks in business days (approximation)
time_marks = {
    '1_week': 5,
    '2_week': 9,
    '3_week': 15,
    '1_month': 20,
    '2_month': 40,
    '6_month': 120,
    '12_month': 230
}

# Get unique tickers
tickers = dse_df_2023['ticker'].unique()

# Initialize an empty DataFrame to store MAPE values
arima_mape_dse = pd.DataFrame(index=tickers, columns=time_marks.keys())

# Loop through each ticker and each timeframe
for ticker in tickers:
    for timeframe, days in time_marks.items():
        # Extract actual prices
        actual_prices = dse_df_2023[dse_df_2023['ticker'] == ticker][['date', 'close']].reset_index(drop=True)
        if days > len(actual_prices):
            actual_prices = np.nan
        else:
    # Assign actual_prices as actual_prices at close column, index = days
            actual_prices = actual_prices.loc[days, 'close']

        # Extract ARIMA predictions
        predicted_prices = dse_arima_pred[['date', ticker]].dropna().reset_index(drop=True)
        if days > len(predicted_prices):
            predicted_prices = np.nan
        else:
            predicted_prices = predicted_prices.loc[days, ticker]

        # Calculate MAPE and round to 2 decimal places
        mape_value = round(calculate_mape(actual_prices, predicted_prices), 2)
        arima_mape_dse.loc[ticker, timeframe] = mape_value

arima_mape_dse.sort_index(inplace=True)
# Save the MAPE table to a CSV file
arima_mape_dse.to_csv('./arima/arima_mape_dse.csv')

arima_mape_dse.head()

Unnamed: 0,1_week,2_week,3_week,1_month,2_month,6_month,12_month
00DS30,0.18,0.16,0.4,1.6,1.05,0.12,4.19
00DSEX,0.27,0.1,0.75,1.26,0.16,2.01,0.92
BATBC,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBSCABLES,0.0,0.0,0.0,0.0,0.0,0.0,
BEACONPHAR,2.33,1.27,2.25,11.15,4.73,16.78,


# 2. Compare MAPE of GBM and ARIMA

In [4]:
import pandas as pd

# Load the datasets
hose_df_2023 = pd.read_csv('./data/hose_df_2023.csv')
average_paths_hose = pd.read_csv('./average_paths_hose.csv')
hose_arima_pred = pd.read_csv('./arima/hose_arima_pred.csv')
hose_mape = pd.read_csv('./mape/hose_mape.csv', index_col=0)
arima_mape_hose = pd.read_csv('./arima/arima_mape_hose.csv', index_col=0)

# Convert 'date' columns to datetime and sort by date, then by ticker
hose_df_2023['date'] = pd.to_datetime(hose_df_2023['date'])
hose_df_2023.sort_values(by=['date', 'ticker'], inplace=True)

average_paths_hose['date'] = pd.to_datetime(average_paths_hose['date'])
average_paths_hose.sort_values(by='date', inplace=True)

hose_arima_pred['date'] = pd.to_datetime(hose_arima_pred['date'])
hose_arima_pred.sort_values(by='date', inplace=True)

# Time marks in business days (approximation)
time_marks = {
    '1_week': 5,
    '2_week': 10,
    '3_week': 15,
    '1_month': 21,
    '2_month': 42,
    '6_month': 120,
    '12_month': 248
}

# Get unique tickers
tickers = hose_df_2023['ticker'].unique()

# Loop through each timeframe
for timeframe, days in time_marks.items():
    # Initialize an empty DataFrame to store values
    comparison_table = pd.DataFrame(index=tickers, columns=['Actual Price', 'Forecast by GBM', 'GBM MAPE', 'Forecast by ARIMA', 'ARIMA MAPE'])

    # Loop through each ticker
    for ticker in tickers:
        # Extract actual prices
        actual_prices = hose_df_2023[hose_df_2023['ticker'] == ticker][['date', 'close']].reset_index(drop=True)
        if len(actual_prices) > days:
            comparison_table.loc[ticker, 'Actual Price'] = actual_prices.loc[days, 'close']
        
        # Extract GBM predictions
        gbm_predictions = average_paths_hose[['date', ticker]].dropna().reset_index(drop=True)
        if len(gbm_predictions) > days:
            comparison_table.loc[ticker, 'Forecast by GBM'] = gbm_predictions.loc[days, ticker]
        
        # Extract ARIMA predictions
        arima_predictions = hose_arima_pred[['date', ticker]].dropna().reset_index(drop=True)
        if len(arima_predictions) > days:
            comparison_table.loc[ticker, 'Forecast by ARIMA'] = arima_predictions.loc[days, ticker]
        
        # Extract GBM MAPE
        if ticker in hose_mape.index and timeframe in hose_mape.columns:
            comparison_table.loc[ticker, 'GBM MAPE'] = hose_mape.loc[ticker, timeframe]
        
        # Extract ARIMA MAPE
        if ticker in arima_mape_hose.index and timeframe in arima_mape_hose.columns:
            comparison_table.loc[ticker, 'ARIMA MAPE'] = arima_mape_hose.loc[ticker, timeframe]

    # Save the comparison table to a CSV file for each timeframe
    comparison_table.to_csv(f'./arima/hose/comparison_{timeframe}.csv')

In [8]:
import pandas as pd

# Load the datasets
dse_df_2023 = pd.read_csv('./data/dse_df_2023.csv')
average_paths_dse = pd.read_csv('./average_paths_dse.csv')
dse_arima_pred = pd.read_csv('./arima/dse_arima_pred.csv')
dse_mape = pd.read_csv('./mape/dse_mape.csv', index_col=0)
arima_mape_dse = pd.read_csv('./arima/arima_mape_dse.csv', index_col=0)

# Convert 'date' columns to datetime and sort by date, then by ticker
dse_df_2023['date'] = pd.to_datetime(dse_df_2023['date'])
dse_df_2023.sort_values(by=['date', 'ticker'], inplace=True)

average_paths_dse['date'] = pd.to_datetime(average_paths_dse['date'])
average_paths_dse.sort_values(by='date', inplace=True)

dse_arima_pred['date'] = pd.to_datetime(dse_arima_pred['date'])
dse_arima_pred.sort_values(by='date', inplace=True)

# Time marks in business days (approximation)
time_marks = {
    '1_week': 5,
    '2_week': 9,
    '3_week': 15,
    '1_month': 20,
    '2_month': 40,
    '6_month': 120,
    '12_month': 230
}

# Get unique tickers
tickers = dse_df_2023['ticker'].unique()

# Loop through each timeframe
for timeframe, days in time_marks.items():
    # Initialize an empty DataFrame to store values
    comparison_table = pd.DataFrame(index=tickers, columns=['Actual Price', 'Forecast by GBM', 'GBM MAPE', 'Forecast by ARIMA', 'ARIMA MAPE'])

    # Loop through each ticker
    for ticker in tickers:
        # Extract actual prices
        actual_prices = dse_df_2023[dse_df_2023['ticker'] == ticker][['date', 'close']].reset_index(drop=True)
        if len(actual_prices) > days:
            comparison_table.loc[ticker, 'Actual Price'] = actual_prices.loc[days, 'close']
        
        # Extract GBM predictions
        gbm_predictions = average_paths_dse[['date', ticker]].dropna().reset_index(drop=True)
        if len(gbm_predictions) > days:
            comparison_table.loc[ticker, 'Forecast by GBM'] = gbm_predictions.loc[days, ticker]
        
        # Extract ARIMA predictions
        arima_predictions = dse_arima_pred[['date', ticker]].dropna().reset_index(drop=True)
        if len(arima_predictions) > days:
            comparison_table.loc[ticker, 'Forecast by ARIMA'] = arima_predictions.loc[days, ticker]
        
        # Extract GBM MAPE
        if ticker in dse_mape.index and timeframe in dse_mape.columns:
            comparison_table.loc[ticker, 'GBM MAPE'] = dse_mape.loc[ticker, timeframe]
        
        # Extract ARIMA MAPE
        if ticker in arima_mape_dse.index and timeframe in arima_mape_dse.columns:
            comparison_table.loc[ticker, 'ARIMA MAPE'] = arima_mape_dse.loc[ticker, timeframe]

    # Save the comparison table to a CSV file for each timeframe
    comparison_table.to_csv(f'./arima/dse/comparison_{timeframe}.csv')