In [4]:
import pandas as pd
import numpy as np

def remove_all_zeros(df, tolerance=1e-8):
    # Select only numeric columns to check
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    cols_to_drop = []
    for col in numeric_cols:
        if np.isclose(df[col], 0, atol=tolerance).all():
            print(f"All values in {col} are effectively zero")
            cols_to_drop.append(col)
    return df.drop(columns=cols_to_drop)

In [5]:
# 📂 File paths
vn_index_file = "../ready_data/cleaned_hose_historical_data.csv"  
external_data_file = "../ready_data/cleaned_external_data.csv"    # Processed external data

# ✅ Load VN-Index Data
vn_index_df = pd.read_csv(vn_index_file, parse_dates=["Date"])
vn_index_df.rename(columns={"VN-INDEX": "VN_Index_Close"}, inplace=True)  # Rename column

# ✅ Load External Data
external_df = pd.read_csv(external_data_file, parse_dates=["Date"])

# ✅ Merge using Left Join (VN-Index as reference)
merged_df = pd.merge(vn_index_df, external_df, on="Date", how="inner")
vn_index_external_df = pd.merge(vn_index_df[['Date', 'VN_Index_Close']], external_df, on="Date", how="inner")

merged_df = remove_all_zeros(merged_df)
vn_index_external_df = remove_all_zeros(vn_index_external_df)

# ✅ Sort values by Date
merged_df = merged_df.sort_values(by="Date")
vn_index_external_df = vn_index_external_df.sort_values(by="Date")

# ✅ Save merged dataset
merged_df.to_csv("../ready_data/merged_data.csv", index=False)
print("🎉 Merging complete! Data saved to `merged_data.csv`")

vn_index_external_df.to_csv("../ready_data/vn_index_external_data.csv", index=False)
print("🎉 Merging complete! Data saved to `vn_index_external_data.csv`")

All values in US_10Y_Treasury_Yield_Volume are effectively zero
All values in USDVND_Volume are effectively zero
All values in EURVND_Volume are effectively zero
All values in US_10Y_Treasury_Yield_Volume are effectively zero
All values in USDVND_Volume are effectively zero
All values in EURVND_Volume are effectively zero
🎉 Merging complete! Data saved to `merged_data.csv`
🎉 Merging complete! Data saved to `vn_index_external_data.csv`


In [6]:
merged_df

Unnamed: 0,Index,Date,VN_Index_Close,Total Volume,Total Value,Total Foreigner Buy Volume,Total Foreigner Buy Value,Total Foreigner Sell Volume,Total Foreigner Sell Value,Copper_Close,...,DJIA_Close,DJIA_Volume,USDVND_Close,EURVND_Close,Gold_Close,Gold_Volume,FTSE_100_Close,FTSE_100_Volume,Shanghai_Close,Shanghai_Volume
0,4546,2007-01-02,741.27,3.825510e+06,3.708200e+11,24600.0,1.430000e+09,20000.0,2.100000e+09,2.8540,...,12463.150391,161560000.0,15423.0,20470.0,635.200012,17.0,6310.899902,1.074946e+09,2675.474121,8.680000e+04
1,4545,2007-01-03,757.71,4.485494e+06,4.007000e+11,8000.0,4.542500e+08,22000.0,2.550000e+09,2.6325,...,12474.519531,327200000.0,15476.0,20380.0,627.099976,38.0,6319.000000,1.606893e+09,2675.474121,8.680000e+04
2,4544,2007-01-04,788.82,5.998810e+06,6.520800e+11,123300.0,6.170000e+09,8000.0,1.280000e+09,2.5885,...,12480.690430,259060000.0,15419.0,20180.0,623.900024,4.0,6287.000000,1.902876e+09,2715.718994,1.202000e+05
3,4543,2007-01-05,818.51,8.119454e+06,9.722600e+11,663600.0,6.671000e+10,10800.0,4.705400e+08,2.5225,...,12398.009766,235220000.0,15507.0,20165.0,604.900024,531.0,6220.100098,1.622439e+09,2641.333984,1.062000e+05
4,4542,2007-01-08,825.11,8.911220e+06,1.088810e+12,525000.0,4.990000e+10,10300.0,1.550000e+09,2.5145,...,12423.490234,223500000.0,15403.0,20037.0,607.500000,1000.0,6194.200195,1.326701e+09,2707.198975,1.068000e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4527,4,2025-03-28,1317.46,7.396520e+08,1.708416e+13,62460716.0,1.650070e+12,62678442.0,2.056680e+12,5.1125,...,41583.898438,532360000.0,25560.0,27188.0,3086.500000,31206.0,8658.900391,7.475458e+08,3351.306885,4.123000e+05
4528,3,2025-03-31,1306.86,8.678124e+08,2.120564e+13,53798688.0,1.684460e+12,89320381.0,2.966360e+12,5.0200,...,42001.761719,732220000.0,25550.0,27149.0,3122.800049,3438.0,8582.799805,9.250750e+08,3335.746094,4.726000e+05
4529,2,2025-04-01,1317.33,6.200336e+08,1.502564e+13,52789640.0,1.876930e+12,64539449.0,2.316520e+12,5.0190,...,41989.960938,514610000.0,25565.0,27279.0,3118.899902,1721.0,8634.799805,6.824101e+08,3348.435059,4.226000e+05
4530,1,2025-04-02,1317.83,8.493078e+08,1.929671e+13,66044032.0,1.957070e+12,81823582.0,2.665490e+12,5.0235,...,42225.320312,492050000.0,25620.0,27281.0,3139.899902,1721.0,8608.500000,6.014178e+08,3350.126953,3.501000e+05
