In [3]:
import os
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

# Parameters
input_root = "datasets_leaflet"
output_root = "datasets_latlon"
crs_in = "EPSG:26914"
crs_out = "EPSG:4326"
suffix = "_epsg4326.csv"

# Collect all CSV paths
csv_paths = []
for dirpath, _, filenames in os.walk(input_root):
    for fname in filenames:
        if fname.endswith(".csv") and fname.startswith("dataset_"):
            full_path = os.path.join(dirpath, fname)
            csv_paths.append(full_path)

print(f"Found {len(csv_paths)} CSV files to convert.")
os.makedirs(output_root, exist_ok=True)

# Convert with progress bar
for path in tqdm(csv_paths, desc="Converting files", total=len(csv_paths)):
    try:
        df = pd.read_csv(path)
        if not {"X", "Y", "value"}.issubset(df.columns):
            print(f"❌ Skipping {path}: missing required columns")
            continue

        df = df.dropna(subset=["X", "Y", "value"])

        # Convert to GeoDataFrame and reproject
        gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.X, df.Y), crs=crs_in)
        gdf = gdf.to_crs(crs_out)
        gdf["lat"] = gdf.geometry.y
        gdf["lon"] = gdf.geometry.x

        # Build output path
        relative_dir = os.path.relpath(os.path.dirname(path), input_root)
        base_name = os.path.splitext(os.path.basename(path))[0]
        output_filename = base_name + suffix
        output_dir = os.path.join(output_root, relative_dir)
        os.makedirs(output_dir, exist_ok=True)
        output_path = os.path.join(output_dir, output_filename)

        # Save as CSV with lon/lat/value only
        gdf[["lon", "lat", "value"]].to_csv(output_path, index=False)

    except Exception as e:
        print(f"⚠️ Error with {path}: {e}")


Found 720 CSV files to convert.


Converting files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 720/720 [00:10<00:00, 71.67it/s]
