In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime

# üìÇ Define the folder containing all CSV files
data_folder = "../vn_30_list"

# ‚úÖ Function to preprocess a single CSV file
def preprocess_file(file_path):
    try:
        # Read CSV, skipping first two rows to fix column names issue
        df = pd.read_csv(file_path)

        # Ensure 'Date' is parsed correctly
        df['Date'] = pd.to_datetime(df['Date'])
        df['Close'] = df['Close'].astype(float)

        df = df[['Date', 'Close']]

        # Rename columns: Add file name as prefix to avoid conflicts
        file_name = os.path.basename(file_path).replace("_historical_data.csv", "")
        df.rename(columns={'Close': f"{file_name}_Close"}, inplace=True)

        print(f"‚úÖ Processed: {file_name}")
        return df
    except Exception as e:
        print(f"‚ö†Ô∏è Error processing {file_path}: {e}")
        return None

# ‚úÖ Read all CSV files in the folder
all_dataframes = []
for file in os.listdir(data_folder):
    if file.endswith(".csv"):
        file_path = os.path.join(data_folder, file)
        df = preprocess_file(file_path)
        if df is not None:
            all_dataframes.append(df)

# ‚úÖ Merge all data on Date
if all_dataframes:
    merged_df = all_dataframes[0]
    for df in all_dataframes[1:]:
        merged_df = pd.merge(merged_df, df, on="Date", how="inner")

    # ‚úÖ Handle missing values (Forward Fill & Backward Fill)
    merged_df.fillna(method='ffill', inplace=True)  # Forward fill
    merged_df.fillna(method='bfill', inplace=True)  # Backward fill (if needed)

    merged_df = merged_df.sort_values(by='Date')

    # ‚úÖ Save preprocessed data
    merged_df.to_csv("../ready_data/cleaned_vn30_list_data.csv", index=False)
    print("üéâ Preprocessing complete! Data saved to `cleaned_vn30_list_data.csv`")
else:
    print("‚ö†Ô∏è No valid CSV files found in the folder.")

‚úÖ Processed: VRE
‚úÖ Processed: VIB
‚úÖ Processed: STB
‚úÖ Processed: VIC
‚úÖ Processed: VCB
‚úÖ Processed: MSN
‚úÖ Processed: MWG
‚úÖ Processed: PLX
‚úÖ Processed: TCB
‚úÖ Processed: FPT
‚úÖ Processed: VHM
‚úÖ Processed: HDB
‚úÖ Processed: HPG
‚úÖ Processed: ACB
‚úÖ Processed: MBB
‚úÖ Processed: SAB
‚úÖ Processed: NVL
‚úÖ Processed: PNJ
‚úÖ Processed: SHB
‚úÖ Processed: VPB
‚úÖ Processed: POW
‚úÖ Processed: TPB
‚úÖ Processed: GAS
‚úÖ Processed: CTG
‚úÖ Processed: VNM
‚úÖ Processed: PDR
‚úÖ Processed: SSI
‚úÖ Processed: BVH
‚úÖ Processed: VJC
‚úÖ Processed: BID
üéâ Preprocessing complete! Data saved to `cleaned_vn30_list_data.csv`


  merged_df.fillna(method='ffill', inplace=True)  # Forward fill
  merged_df.fillna(method='bfill', inplace=True)  # Backward fill (if needed)
