In [15]:
import os
import pandas as pd

def concatenate_csv_files(root_dir, output_dir='combined', ignore_dirs={'Unbalanced', 'combined', 'bidar'}):
    combined_path = os.path.join(root_dir, output_dir)
    os.makedirs(combined_path, exist_ok=True)

    file_groups = {}

    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)

        if not os.path.isdir(subdir_path) or subdir in ignore_dirs:
            continue

        for filename in os.listdir(subdir_path):
            if filename.endswith('.csv'):
                file_path = os.path.join(subdir_path, filename)
                file_groups.setdefault(filename, []).append(file_path)

    for filename, paths in file_groups.items():
        dfs = []
        for path in sorted(paths):
            try:
                df = pd.read_csv(path)
                dfs.append(df)
            except Exception as e:
                print(f"⚠️ Error reading {path}: {e}")

        if dfs:
            combined_df = pd.concat(dfs, ignore_index=True)

            # Always save as .csv, explicitly
            output_csv_path = os.path.join(combined_path, filename)
            combined_df.to_csv(output_csv_path, index=False, encoding='utf-8')

            print(f"✅ Saved: {output_csv_path}")

    print("🎉 All CSVs combined and saved correctly in 'combined/' as plain CSV files.")

In [16]:
concatenate_csv_files('Karnataka_Datasets/Across/Kharif/Cropland_Masked/Train/2020/')

✅ Saved: Karnataka_Datasets/Across/Kharif/Cropland_Masked/Train/2020/combined\balanced_on_Duration.csv
✅ Saved: Karnataka_Datasets/Across/Kharif/Cropland_Masked/Train/2020/combined\balanced_on_Height.csv
✅ Saved: Karnataka_Datasets/Across/Kharif/Cropland_Masked/Train/2020/combined\balanced_on_Structure.csv
🎉 All CSVs combined and saved correctly in 'combined/' as plain CSV files.
