In [13]:
import pandas as pd
import numpy as np

def remove_all_zeros(df, tolerance=1e-8):
    # Select only numeric columns to check
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    cols_to_drop = []
    for col in numeric_cols:
        if np.isclose(df[col], 0, atol=tolerance).all():
            print(f"All values in {col} are effectively zero")
            cols_to_drop.append(col)
    return df.drop(columns=cols_to_drop)

In [14]:
# 📂 File paths
vn_index_file = "../ready_data/cleaned_vn_index_data.csv"  
external_data_file = "../ready_data/cleaned_external_data.csv"    # Processed external data

# ✅ Load VN-Index Data
vn_index_df = pd.read_csv(vn_index_file, parse_dates=["Date"])
vn_index_df.rename(columns={"VN-INDEX": "VN_Index_Close"}, inplace=True)  # Rename column

# ✅ Load External Data
external_df = pd.read_csv(external_data_file, parse_dates=["Date"])

# ✅ Merge using Left Join (VN-Index as reference)
merged_df = pd.merge(vn_index_df, external_df, on="Date", how="inner")
vn_index_external_df = pd.merge(vn_index_df[['Date', 'VN_Index_Close']], external_df, on="Date", how="inner")

merged_df = remove_all_zeros(merged_df)
vn_index_external_df = remove_all_zeros(vn_index_external_df)

# ✅ Sort values by Date
merged_df = merged_df.sort_values(by="Date")
vn_index_external_df = vn_index_external_df.sort_values(by="Date")

# ✅ Save merged dataset
merged_df.to_csv("../ready_data/vn_index_merged_data.csv", index=False)
print("🎉 Merging complete! Data saved to `vn_index_merged_data.csv`")

vn_index_external_df.to_csv("../ready_data/vn_index_external_data.csv", index=False)
print("🎉 Merging complete! Data saved to `vn_index_external_data.csv`")

All values in US_10Y_Treasury_Yield_Volume are effectively zero
All values in USDVND_Volume are effectively zero
All values in EURVND_Volume are effectively zero
All values in US_10Y_Treasury_Yield_Volume are effectively zero
All values in USDVND_Volume are effectively zero
All values in EURVND_Volume are effectively zero
🎉 Merging complete! Data saved to `vn_index_merged_data.csv`
🎉 Merging complete! Data saved to `vn_index_external_data.csv`


In [15]:
merged_df

Unnamed: 0,Date,VN_Index_Close,Total Volume,Total Value,Total Foreigner Buy Volume,Total Foreigner Buy Value,Total Foreigner Sell Volume,Total Foreigner Sell Value,Copper_Close,Copper_Volume,...,DJIA_Close,DJIA_Volume,USDVND_Close,EURVND_Close,Gold_Close,Gold_Volume,FTSE_100_Close,FTSE_100_Volume,Shanghai_Close,Shanghai_Volume
0,2007-01-02,741.27,3.825510e+06,3.708200e+11,24600.0,1.430000e+09,20000.0,2.100000e+09,2.8540,466.0,...,12463.150391,161560000.0,15423.0,20470.0,635.200012,17.0,6310.899902,1.074946e+09,2675.474121,8.680000e+04
1,2007-01-03,757.71,4.485494e+06,4.007000e+11,8000.0,4.542500e+08,22000.0,2.550000e+09,2.6325,865.0,...,12474.519531,327200000.0,15476.0,20380.0,627.099976,38.0,6319.000000,1.606893e+09,2675.474121,8.680000e+04
2,2007-01-08,825.11,8.911220e+06,1.088810e+12,525000.0,4.990000e+10,10300.0,1.550000e+09,2.5145,341.0,...,12423.490234,223500000.0,15403.0,20037.0,607.500000,1000.0,6194.200195,1.326701e+09,2707.198975,1.068000e+05
3,2007-01-09,844.91,7.215300e+06,8.256600e+11,685600.0,7.956000e+10,9600.0,1.560000e+09,2.5420,314.0,...,12416.599609,225190000.0,15448.0,20074.0,613.099976,2.0,6196.100098,2.143700e+09,2807.803955,1.108000e+05
4,2007-01-10,865.71,6.498406e+06,7.349800e+11,272000.0,4.226000e+10,9200.0,4.127000e+08,2.6515,367.0,...,12442.160156,226570000.0,15451.0,19995.0,611.599976,2.0,6160.700195,1.881991e+09,2825.575928,1.118000e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4299,2025-04-22,1197.13,1.702204e+09,3.411897e+13,147096796.0,3.842890e+12,128015545.0,3.322110e+12,4.8705,516.0,...,39186.980469,582330000.0,25871.0,29009.0,3400.800049,785.0,8328.599609,1.042190e+09,3299.757080,4.032000e+05
4300,2025-04-23,1211.00,8.552081e+08,1.899687e+13,69318424.0,2.110700e+12,71503319.0,2.203420e+12,4.8355,714.0,...,39606.570312,642550000.0,25932.0,29456.0,3276.300049,331.0,8403.200195,1.043916e+09,3296.354980,4.116000e+05
4301,2025-04-24,1223.35,7.986391e+08,1.766251e+13,79446013.0,2.428630e+12,56395317.0,1.849570e+12,4.8500,1626.0,...,40093.398438,558120000.0,25965.0,29247.0,3332.000000,560.0,8407.400391,1.126606e+09,3297.288086,3.928000e+05
4302,2025-04-25,1229.23,8.630187e+08,2.035038e+13,72203085.0,2.287750e+12,84913614.0,2.878810e+12,4.8355,1626.0,...,40113.500000,559040000.0,26026.0,29249.0,3282.399902,560.0,8415.299805,8.027340e+08,3295.060059,4.110000e+05
