In [7]:
import pandas as pd
import os

# Define column mappings for each disease to rename user_data columns to match processed_data columns
column_mappings = {
    'Anemia': {'target': 'diseased', 'PLT_mm3': 'PLT/mm3'},
    'Cardiovascular': {'target': 'cardio'},
    'cleaned_heart': {'target': 'HeartDisease'},
    'Hepatitis_C': {'target': 'Category'},
    'lung_cancer_processed': {'target': 'LUNG_CANCER'},
    'thyroid_cleaned_data': {'target': 'Diagnosis'},
    'processed_healthcare_data': {'target': 'stroke'},
    'indian_liver_patient_processed': {'target': 'Dataset'}
}

def update_csv(csv_1, csv_2):
    # Read the data
    df1 = pd.read_csv(csv_1)
    df2 = pd.read_csv(csv_2)
    
    # Extract the base filename without extension to identify the disease
    disease = os.path.splitext(os.path.basename(csv_1))[0]
    
    # Get the appropriate column mapping for the disease
    mapping = column_mappings.get(disease, {})
    
    # Rename columns in df2 according to the mapping
    df2_renamed = df2.rename(columns=mapping)
    
    # Additional manual checks for specific column name differences not covered by the mapping
    # Example: Anemia's 'PLT_mm3' to 'PLT/mm3' if not already handled
    if disease == 'Anemia' and 'PLT_mm3' in df2_renamed.columns:
        df2_renamed = df2_renamed.rename(columns={'PLT_mm3': 'PLT/mm3'})
    
    # Ensure both dataframes have the same columns before concatenation
    # This will add any missing columns in df2_renamed with NaN values for existing rows in df1
    # and vice versa, but typically user_data should match processed_data columns after renaming
    merged_df = pd.concat([df1, df2_renamed], axis=0, ignore_index=True)
    
    # Save the merged dataframe back to the original processed data path
    merged_df.to_csv(csv_1, index=False)
    print(f"Successfully merged and saved to {csv_1}")

if __name__ == "__main__":
    # Processed data paths
    Anemia = "../data/processed/Anemia.csv"
    Cardiovascular = "../data/processed/Cardiovascular.csv"
    Heart = "../data/processed/cleaned_heart.csv"
    Hepatitis_C = "../data/processed/Hepatitis_C.csv"
    Lung_cancer = "../data/processed/lung_cancer_processed.csv"
    Thyroid = "../data/processed/thyroid_cleaned_data.csv"
    Stroke = "../data/processed/processed_healthcare_data.csv"
    Liver = "../data/processed/indian_liver_patient_processed.csv"  

    # User data paths
    Anemia_ = "../backend/user_data/anemia.csv"
    Cardiovascular_ = "../backend/user_data/cardiovascular.csv"
    Heart_ = "../backend/user_data/heart.csv"
    Hepatitis_C_ = "../backend/user_data/hepatitis.csv"
    Lung_cancer_ = "../backend/user_data/lung_cancer.csv"
    Thyroid_ = "../backend/user_data/thyroid.csv"
    Stroke_ = "../backend/user_data/stroke.csv"
    Liver_ = "../backend/user_data/liver.csv"

    # Update each dataset
    update_csv(Anemia, Anemia_)
    update_csv(Cardiovascular, Cardiovascular_)
    update_csv(Heart, Heart_)
    update_csv(Hepatitis_C, Hepatitis_C_)
    update_csv(Lung_cancer, Lung_cancer_)
    update_csv(Thyroid, Thyroid_)
    update_csv(Stroke, Stroke_)
    update_csv(Liver, Liver_)

Successfully merged and saved to ../data/processed/Anemia.csv
Successfully merged and saved to ../data/processed/Cardiovascular.csv
Successfully merged and saved to ../data/processed/cleaned_heart.csv
Successfully merged and saved to ../data/processed/Hepatitis_C.csv
Successfully merged and saved to ../data/processed/lung_cancer_processed.csv
Successfully merged and saved to ../data/processed/thyroid_cleaned_data.csv
Successfully merged and saved to ../data/processed/processed_healthcare_data.csv
Successfully merged and saved to ../data/processed/indian_liver_patient_processed.csv
