In [4]:
import pandas as pd
import os

input_folder = "../data/cleaned/"
output_folder = "../data/long/"

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Process all CSVs in cleaned folder
for filename in os.listdir(input_folder):
    if not filename.endswith(".csv"):
        continue

    input_path = os.path.join(input_folder, filename)
    output_path = os.path.join(output_folder, filename)

    try:
        df = pd.read_csv(input_path)

        # Identify structure: assume first column is ID, rest are years
        id_col = df.columns[0]
        year_cols = df.columns[1:]

        # Melt from wide to long format
        df_long = df.melt(id_vars=id_col, value_vars=year_cols,
                          var_name="year", value_name="value")

        # Rename ID column to 'country'
        df_long = df_long.rename(columns={id_col: "country"})

        # Ensure correct types
        df_long["year"] = pd.to_numeric(df_long["year"], errors="coerce").astype("Int64")
        df_long["value"] = pd.to_numeric(df_long["value"], errors="coerce")

        # Save to long-format directory
        df_long.to_csv(output_path, index=False)
        print(f"✅ Melted and saved: {filename}")

    except Exception as e:
        print(f"❌ Failed on {filename}: {e}")

✅ Melted and saved: electricity_gen_fossil.csv
❌ Failed on state_total_energy_rankings.csv: No columns to parse from file
✅ Melted and saved: net_imports_electricity.csv
✅ Melted and saved: natural_gas_exports.csv
✅ Melted and saved: world_emissions.csv
✅ Melted and saved: total_energy_consumption.csv
✅ Melted and saved: state_comparison.csv
✅ Melted and saved: natural_gas_imports.csv
✅ Melted and saved: electricity_gen_total.csv
✅ Melted and saved: annual_petrol_production.csv
✅ Melted and saved: electricity_gen_renewables.csv
✅ Melted and saved: electricity_gen_nuclear.csv
✅ Melted and saved: total_energy_production.csv
