In [1]:
import importlib.util
import subprocess
import sys

def install_and_import(module_name):
    if importlib.util.find_spec(module_name) is None:
        print(f"{module_name} not found. Installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", module_name])
    else:
        print(f"{module_name} is already installed.")

# Use the function for the libraries
install_and_import('pandas')
install_and_import('numpy')
install_and_import('matplotlib')

pandas is already installed.
numpy is already installed.
matplotlib is already installed.


# 1. Extract HOSE actual price and gbm paths

In [10]:
import pandas as pd
import numpy as np
import glob
import os

# Load actual prices for HOSE
hose_df_2023 = pd.read_csv('./data/hose_df_2023.csv')
hose_df_2023['date'] = pd.to_datetime(hose_df_2023['date'])

# Get the list of HOSE tickers
hose_tickers = hose_df_2023['ticker'].unique()

# Get the list of all simulated path files
path_files = glob.glob('./gbm_price/*.csv')

# Filter the path files to include only HOSE tickers
hose_path_files = [file for file in path_files if os.path.basename(file).replace('.csv', '') in hose_tickers]

# Initialize an empty DataFrame to store the average paths
hose_average_gbm = pd.DataFrame()
hose_average_gbm['date'] = sorted(hose_df_2023['date'].unique())
hose_average_gbm['date'] = pd.to_datetime(hose_average_gbm['date'])

# Load each file, calculate the average path, and add to the DataFrame
for file in hose_path_files:
    ticker = os.path.basename(file).replace('.csv', '')
    sim_df = pd.read_csv(file)
    sim_df['date'] = pd.to_datetime(sim_df['date'])
    
    # Calculate the average path
    sim_df['average_path'] = sim_df.iloc[:, 1:].mean(axis=1)
    
    # Merge the average path into the main DataFrame
    hose_average_gbm = hose_average_gbm.merge(sim_df[['date', 'average_path']], on='date', how='left')
    hose_average_gbm = hose_average_gbm.rename(columns={'average_path': ticker})

# Ensure the columns are in the correct order
cols = ['date'] + [os.path.basename(file).replace('.csv', '') for file in hose_path_files]
hose_average_gbm = hose_average_gbm[cols]

# Save the result to a CSV file
hose_average_gbm.to_csv('average_paths_hose.csv', index=False)

# Display the first few rows of the DataFrame to verify
hose_average_gbm.head()

Unnamed: 0,date,ACB,BCM,BID,BVH,CTG,FPT,GAS,GVR,HDB,...,VCB,VHM,VIB,VIC,VJC,VN30INDEX,VNINDEX,VNM,VPB,VRE
0,2023-01-03,21.953213,80.025451,39.301068,46.634277,27.327769,76.892024,101.989223,13.740464,16.053928,...,80.201098,47.886835,18.901593,53.674086,109.418791,1008.701753,1009.656838,76.431358,17.968025,26.992686
1,2023-01-04,21.904796,80.098034,39.290642,46.571368,27.305737,76.876245,102.020581,13.679972,16.00753,...,80.217336,47.733062,18.830319,53.535336,109.354238,1007.088965,1007.763634,76.402449,17.917365,26.985897
2,2023-01-05,21.857003,80.190791,39.302727,46.532818,27.286698,76.779536,102.022701,13.61925,15.96542,...,80.229581,47.607799,18.766577,53.402458,109.28809,1005.391368,1006.095537,76.37619,17.874119,26.973237
3,2023-01-06,21.822504,80.237711,39.321967,46.506483,27.269593,76.721922,102.010618,13.567718,15.914317,...,80.224476,47.492454,18.710708,53.270702,109.221507,1003.556605,1004.711712,76.3179,17.826057,26.960437
4,2023-01-09,21.767886,80.316616,39.341731,46.449118,27.25026,76.635813,102.053766,13.517154,15.87149,...,80.233128,47.375691,18.656564,53.12996,109.183221,1001.541615,1003.108914,76.286104,17.771338,26.931107


# 2. Calculate MAPE for HOSE stocks

In [12]:
import pandas as pd
import numpy as np

# Function to calculate MAPE
def calculate_mape(actual, predicted):
    return np.mean(np.abs((actual - predicted) / actual)) * 100

# Load actual prices for HOSE
hose_df_2023 = pd.read_csv('./data/hose_df_2023.csv')
hose_df_2023['date'] = pd.to_datetime(hose_df_2023['date'])

# Sort the actual prices by date
hose_df_2023 = hose_df_2023.sort_values(by='date')

# Load the average GBM paths
average_paths_df = pd.read_csv('average_paths_hose.csv')
average_paths_df['date'] = pd.to_datetime(average_paths_df['date'])

# Initialize the hose_mape DataFrame
columns = ['1_week', '2_week', '3_week', '1_month', '2_month', '6_month', '12_month']
hose_mape = pd.DataFrame(index=hose_df_2023['ticker'].unique(), columns=columns)

# Time marks in business days (approximation)
time_marks = {
    '1_week': 5,
    '2_week': 10,
    '3_week': 15,
    '1_month': 21,
    '2_month': 42,
    '6_month': 120,
    '12_month': 248
}

# Calculate MAPE for each ticker
for ticker in hose_df_2023['ticker'].unique():
    actual_prices = hose_df_2023[hose_df_2023['ticker'] == ticker].set_index('date')['close']
    predicted_prices = average_paths_df.set_index('date')[ticker]

    for period, days in time_marks.items():
        if len(actual_prices) > days:
            actual_price = actual_prices.iloc[days]
            predicted_price = predicted_prices.iloc[days]

            mape = calculate_mape(actual_price, predicted_price)
            hose_mape.at[ticker, period] = mape
# Sort the hose_mape DataFrame by ticker
hose_mape_sorted = hose_mape.sort_index()

# Save the hose_mape DataFrame to a CSV file
hose_mape.to_csv('hose_mape.csv')

# Display the first few rows of the hose_mape DataFrame to verify
print(hose_mape.head())

        1_week     2_week     3_week    1_month    2_month    6_month  \
PLX  14.612816  16.650529  16.425363  18.321186  25.656034  34.605144   
BID   4.743284  12.505548  12.744901  11.259949  16.927973   8.012674   
BVH   3.311404   4.949126   8.771065   7.010418    8.54407   3.054115   
CTG   4.631757  11.390235  11.480851  10.388973  10.679559  17.232775   
FPT   4.802112   6.953766   9.008997   6.911511   7.687709  18.655248   

      12_month  
PLX  46.360213  
BID   7.315655  
BVH   1.235722  
CTG  20.516486  
FPT  34.070984  


# 3. Extract DSE actual_price and DSE gbm paths

In [13]:
import pandas as pd
import numpy as np
import glob
import os

# Load actual prices for DSE
dse_df_2023 = pd.read_csv('./data/dse_df_2023.csv')
dse_df_2023['date'] = pd.to_datetime(dse_df_2023['date'])

# Get the list of DSE tickers
dse_tickers = dse_df_2023['ticker'].unique()

# Get the list of all simulated path files
path_files = glob.glob('./gbm_price/*.csv')

# Filter the path files to include only DSE tickers
dse_path_files = [file for file in path_files if os.path.basename(file).replace('.csv', '') in dse_tickers]

# Initialize an empty DataFrame to store the average paths
dse_average_gbm = pd.DataFrame()
dse_average_gbm['date'] = sorted(dse_df_2023['date'].unique())
dse_average_gbm['date'] = pd.to_datetime(dse_average_gbm['date'])

# Load each file, calculate the average path, and add to the DataFrame
for file in dse_path_files:
    ticker = os.path.basename(file).replace('.csv', '')
    sim_df = pd.read_csv(file)
    sim_df['date'] = pd.to_datetime(sim_df['date'])
    
    # Calculate the average path
    sim_df['average_path'] = sim_df.iloc[:, 1:].mean(axis=1)
    
    # Merge the average path into the main DataFrame
    dse_average_gbm = dse_average_gbm.merge(sim_df[['date', 'average_path']], on='date', how='left')
    dse_average_gbm = dse_average_gbm.rename(columns={'average_path': ticker})

# Ensure the columns are in the correct order
cols = ['date'] + [os.path.basename(file).replace('.csv', '') for file in dse_path_files]
dse_average_gbm = dse_average_gbm[cols]

# Save the result to a CSV file
dse_average_gbm.to_csv('average_paths_dse.csv', index=False)

# Display the first few rows of the DataFrame to verify
dse_average_gbm.head()

Unnamed: 0,date,00DS30,00DSEX,BATBC,BBSCABLES,BEACONPHAR,BEXIMCO,BRACBANK,BSC,BSCCL,...,ORIONPHARM,POWERGRID,RENATA,ROBI,SEAPEARL,SOUTHEASTB,SQURPHARMA,TITASGAS,UNIQUEHRL,UPGDCL
0,2023-01-01,2193.819464,6204.63707,518.306043,49.866365,304.775145,115.441056,38.444144,116.243415,218.951186,...,82.808104,52.359071,1217.639744,29.979342,192.048357,13.793122,209.764833,40.939024,56.026624,233.644405
1,2023-01-02,2192.591833,6202.44757,517.88823,49.853301,304.97168,115.317636,38.388564,116.558866,219.039573,...,82.72588,,1217.216318,29.972946,193.262261,13.7866,209.726659,40.955688,56.108491,233.570472
2,2023-01-03,2191.515471,6198.181774,517.370572,49.816966,305.021867,115.17456,38.320222,116.888605,219.13893,...,82.67817,52.313781,1216.769623,29.963443,194.385148,13.780588,209.701867,40.97191,56.124813,233.483812
3,2023-01-04,2190.202905,6196.356243,516.980176,49.78545,305.306788,115.030059,38.253408,117.114397,219.156209,...,82.62345,52.288058,1216.516704,29.944015,195.754521,13.773501,209.70172,40.996652,56.19376,233.388495
4,2023-01-05,2188.967792,6193.251144,516.566055,49.757176,305.664077,114.895551,38.190672,117.371366,219.191483,...,82.532473,52.256933,1216.259146,29.935415,196.826532,13.763269,209.680595,,56.168586,


# 4. Calculate MAPE for DSE stock

In [32]:
import pandas as pd
import numpy as np

# Function to calculate MAPE
def calculate_mape(actual, predicted):
    return np.mean(np.abs((actual - predicted) / actual)) * 100

# Load actual prices for DSE
dse_df_2023 = pd.read_csv('./data/dse_df_2023.csv')
dse_df_2023['date'] = pd.to_datetime(dse_df_2023['date'])

# Sort the actual prices by date
dse_df_2023 = dse_df_2023.sort_values(by='date')

# Load the average GBM paths
average_paths_df = pd.read_csv('average_paths_dse.csv')
average_paths_df['date'] = pd.to_datetime(average_paths_df['date'])

# Initialize the dse_mape DataFrame
columns = ['1_week', '2_week', '3_week', '1_month', '2_month', '6_month', '12_month']
dse_mape = pd.DataFrame(index=dse_df_2023['ticker'].unique(), columns=columns)

# Time marks in business days (approximation)
time_marks = {
    '1_week': 5,
    '2_week': 9,
    '3_week': 15,
    '1_month': 20,
    '2_month': 40,
    '6_month': 120,
    '12_month': 230
}

# Calculate MAPE for each ticker
for ticker in dse_df_2023['ticker'].unique():
    actual_prices = dse_df_2023[dse_df_2023['ticker'] == ticker].set_index('date')['close']
    predicted_prices = average_paths_df.set_index('date')[ticker]

    for period, days in time_marks.items():
        if len(actual_prices) > days:
            actual_price = actual_prices.iloc[days]
            predicted_price = predicted_prices.iloc[days]

            mape = calculate_mape(actual_price, predicted_price)
            dse_mape.at[ticker, period] = mape

# Sort the dse_mape DataFrame by ticker
dse_mape_sorted = dse_mape.sort_index()

# Save the dse_mape DataFrame to a CSV file
dse_mape.to_csv('dse_mape.csv')

# Display the first few rows of the dse_mape DataFrame to verify
print(dse_mape)

              1_week    2_week     3_week    1_month    2_month    6_month  \
00DS30      0.180024  0.741582   1.348716   2.839238   3.518062   7.081283   
UPGDCL      0.151379  0.242343   0.363592   0.515228   1.019289   3.328555   
UNIQUEHRL    1.47271   6.99663   9.642564  19.373072   6.682733  15.896646   
TITASGAS    0.316922  0.642949   1.055506   1.341588   2.678294   7.170558   
SQURPHARMA  0.049658  0.123844   0.187845   0.291541   0.589207   1.612899   
SOUTHEASTB  0.330588  0.580954   0.946348   1.205419   2.382083    3.43103   
SEAPEARL    7.366164  8.844382  13.765773  23.045601  10.680321   84.33164   
ROBI        0.324952  0.514069   0.921467   1.280916   2.451009   7.443399   
RENATA       0.15687  0.315144   0.558228   0.743423   1.484492   4.021924   
ORIONPHARM  1.636332  4.162934   3.718109   3.427555   1.880937   2.826399   
OLYMPIC     0.102609  0.474112   0.212039   8.923272  18.591843  27.757176   
MPETROLEUM   0.02405   0.00091   0.013549    0.02864   0.104094 

In [33]:
# Sort the hose_mape DataFrame by ticker
hose_mape = hose_mape.sort_index()

# Save the hose_mape DataFrame to a CSV file
hose_mape.to_csv('hose_mape.csv')

# Display the first few rows of the hose_mape DataFrame to verify
print(hose_mape.head())

# Sort the dse_mape DataFrame by ticker
dse_mape = dse_mape.sort_index()

# Save the dse_mape DataFrame to a CSV file
dse_mape.to_csv('dse_mape.csv')

# Display the first few rows of the dse_mape DataFrame to verify
print(dse_mape.head())

       1_week     2_week     3_week    1_month    2_month    6_month  \
ACB  7.374319  15.006332  18.247182  14.724767  20.116096  20.685555   
BCM  1.911359   3.232342   4.309744   3.455058     1.0229  12.385007   
BID  4.743284  12.505548  12.744901  11.259949  16.927973   8.012674   
BVH  3.311404   4.949126   8.771065   7.010418    8.54407   3.054115   
CTG  4.631757  11.390235  11.480851  10.388973  10.679559  17.232775   

      12_month  
ACB   42.58275  
BCM  59.250516  
BID   7.315655  
BVH   1.235722  
CTG  20.516486  
              1_week    2_week    3_week   1_month   2_month    6_month  \
00DS30      0.180024  0.741582  1.348716  2.839238  3.518062   7.081283   
00DSEX      0.005692  0.513959  1.320802  2.030416  1.439993    6.30959   
BATBC       0.496937  0.811458  1.404692   1.81815  3.508989  10.156271   
BBSCABLES   0.336546  0.572393  0.848177  1.107268  2.210843   5.974102   
BEACONPHAR  9.383772  5.792963  5.231043  3.935789  4.887364   37.07693   

             1