In [2]:
import pandas as pd
import numpy as np

hose_df_2023 = pd.read_csv('./data/hose_df_2023.csv')
dse_df_2023 = pd.read_csv('./data/dse_df_2023.csv')
hose_parameter = pd.read_csv('./log_parameters.csv')
dse_parameter = pd.read_csv('./dse_parameters.csv')
hose_df_2023['date'] = pd.to_datetime(hose_df_2023['date'])
dse_df_2023['date'] = pd.to_datetime(dse_df_2023['date'])
hose_df_2023 = hose_df_2023.sort_values(by=['ticker','date'])
dse_df_2023 = dse_df_2023.sort_values(by=['ticker','date'])

hose_tickers = hose_df_2023['ticker'].unique()
dse_tickers = dse_df_2023['ticker'].unique()

# Create EDA with 32 empty row, and 8 columns: hose_ticker, mean_volume, drift, volatility, dse_ticker, mean_volume, drift, volatility
eda = pd.DataFrame(np.zeros((32,8)), columns=['hose_ticker','mean_volume_1','drift_1','volatility_1','dse_ticker','mean_volume_2','drift_2','volatility_2'])
eda['hose_ticker'] = hose_tickers
eda['dse_ticker'] = dse_tickers

# Run a for loop for every ticker in hose_tickers
for ticker in hose_tickers:
    # Calculate the mean volume, drift, and volatility for each ticker in hose_df_2023
    mean_volume = hose_df_2023[hose_df_2023['ticker'] == ticker]['volume'].mean()
    drift = hose_parameter[hose_parameter['ticker'] == ticker]['mu_annualized'].values[0]
    volatility = hose_parameter[hose_parameter['ticker'] == ticker]['sigma_annualized'].values[0]
    eda.loc[eda['hose_ticker'] == ticker, 'mean_volume_1'] = round(mean_volume)
    eda.loc[eda['hose_ticker'] == ticker, 'drift_1'] = drift
    eda.loc[eda['hose_ticker'] == ticker, 'volatility_1'] = volatility

# Run a for loop for every ticker in dse_tickers
for ticker in dse_tickers:
    # Calculate the mean volume, drift, and volatility for each ticker in dse_df_2023
    mean_volume = dse_df_2023[dse_df_2023['ticker'] == ticker]['volume'].mean()
    drift = dse_parameter[dse_parameter['ticker'] == ticker]['mu_annualized'].values[0]
    volatility = dse_parameter[dse_parameter['ticker'] == ticker]['sigma_annualized'].values[0]
    eda.loc[eda['dse_ticker'] == ticker, 'mean_volume_2'] = round(mean_volume)
    eda.loc[eda['dse_ticker'] == ticker, 'drift_2'] = drift
    eda.loc[eda['dse_ticker'] == ticker, 'volatility_2'] = volatility

eda.to_csv('./eda.csv', index=False)