In [2]:
# Cell 1: Install & import required libraries

!pip install geopandas pandas matplotlib shapely pyproj --quiet

import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from shapely import wkt


In [None]:
import matplotlib as mpl
import numpy as np

# Wind speed colormap (unchanged)
def wspd_lc():
    # Main thresholds for categories (can adjust the last value if needed)
    levels = [
    # Blue: 0–61
    0, 12.2, 24.4, 36.6, 48.8, 61,
    # Yellow: 62–88
    62, 66.4, 70.8, 75.2, 79.6, 88,
    # Orange: 89–117
    89, 93.6, 98.2, 102.8, 107.4, 117,
    # Red: 118–184
    118, 128.6, 139.2, 149.8, 160.4, 184,
    # Violet: 185–300
    185, 208, 231, 254, 277, 300
]

 
    # Subdivided shades for each category: muted → vibrant
    colors = [
    # Blue category
    '#b3e6fb', '#80d4fa', '#4dc2f9', '#1ab0f8', '#0da9f3',
    # Yellow category
    '#fff8b3', '#fff380', '#ffef4d', '#ffeb1a', '#feea3d',
    # Orange category
    '#fdd4b3', '#fdba80', '#fd9f4d', '#fd852a', '#fd960b',
    # Red category
    '#fcb3b3', '#fa8080', '#f94d4d', '#f71a1a', '#f54337',
    # Violet category
    '#e0b3e6', '#c680d4', '#ad4dc2', '#991ab0', '#9e28b0'
]

    cmap = mpl.colors.ListedColormap(colors)
    return levels, cmap

# Anomaly colormap, -80 to +80 with 5-unit intervals
def anom_lc():
    levels = list(range(-80, 85, 5))  # -80, -75, ..., 75, 80
    # Create a gradient: blue for negative, white at 0, red for positive
    # Number of colors = len(levels)-1 = 32
    from matplotlib.cm import get_cmap
    base_cmap = mpl.colormaps["RdBu_r"]
  # reversed RdBu: blue=negative, red=positive
    colors = [base_cmap(i/(len(levels)-1)) for i in range(len(levels)-1)]
    cmap = mpl.colors.ListedColormap(colors)
    return levels, cmap

lev, wspd_cmap = wspd_lc()
anom_lev, anom_cmap = anom_lc()


In [None]:
import os


shp_path = r"C:\Users\jo_ht\tcrm-3.1.16\swiftph\data\shp\ph_admin_prov_boundaries\PHL_adm4_PSA_pn_2016Junprj_prov.shp"
csv_path_input = r"C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\anomalies\EMONG_2025"

columns_to_process = ["F", "G", "P", "Q", "R", "S", "T", "U"]
anomaly_columns = ["R", "S", "T", "U"]
wspd_columns = [c for c in columns_to_process if c not in anomaly_columns]

# Wind speed colormap (new discrete version with subdivisions and levels ending at 300)
lev, wspd_cmap = wspd_lc()  

# Anomaly colormap (unchanged)
anom_lev, anom_cmap = anom_lc()

# Per-column settings
column_settings = {}
for col in wspd_columns:
    column_settings[col] = {"thresholds": lev, "cmap": wspd_cmap, "norm": None}
for col in anomaly_columns:
    column_settings[col] = {"thresholds": anom_lev, "cmap": anom_cmap, "norm": None}

# Output root directory
output_root = r"C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\vizualization (anomalies)"
os.makedirs(output_root, exist_ok=True)

# Recursively collect all CSV files
csv_files = []
for root, dirs, files in os.walk(csv_path_input):
    for file in files:
        if file.lower().endswith(".csv"):
            csv_files.append(os.path.join(root, file))

print(f"Found {len(csv_files)} CSV files")

Found 0 CSV files


In [5]:
# Cell 2a: Define titles for each column

column_titles = {
    "F": "ECMWF Control Mean Wind Speed (kph)",
    "G": "ECMWF Mean Wind Speed (kph)",
    "P": "TCRM Control Mean Wind Speed (kph)",
    "Q": "TCRM Weighted Mean Wind Speed (kph)",
    "R": "TCRM Control Mean Wind Speed and ECMWF Control Mean Wind Speed Anomaly (kph)",
    "S": "TCRM Control Mean Wind Speed and ECMWF Mean Wind Speed Anomaly (kph)",
    "T": "TCRM Weighted Mean Wind Speed and ECMWF Control Mean Wind Speed Anomaly (kph)",
    "U": "TCRM Weighted Mean Wind Speed Mean and ECMWF Mean Wind Speed Anomaly (kph)"
}


In [5]:
# Cell 3: Load shapefile and discover all CSV files (recursive)

import glob

# Load shapefile
gdf = gpd.read_file(shp_path)
print("Shapefile loaded:", gdf.shape)

# Find all CSV files (including nested folders)
csv_files = glob.glob(os.path.join(csv_path_input, "**", "*.csv"), recursive=True)

if not csv_files:
    raise FileNotFoundError(f"No CSV files found in: {csv_path_input}")

print(f"Found {len(csv_files)} CSV files:")
for csv_file in csv_files:
    print(" -", os.path.relpath(csv_file, csv_path_input))

# Loop through each CSV file
for csv_file in csv_files:
    print(f"\nProcessing CSV: {os.path.basename(csv_file)}")

    # Load CSV
    df = pd.read_csv(csv_file)

    # Compute relative path to preserve directory tree in output
    rel_path = os.path.relpath(os.path.dirname(csv_file), csv_path_input)
    output_dir_for_this_csv = os.path.join(output_root, rel_path)
    os.makedirs(output_dir_for_this_csv, exist_ok=True)

    # Extract base filename (without .csv) for later use in Cell 5
    base_name = os.path.splitext(os.path.basename(csv_file))[0]

    # Merge shapefile and data (or however you prepare merged_base)
    merged_base = gdf.merge(
    df,
    left_on="Pro_Name",   # column in your shapefile
    right_on="prov",      # column in your CSV
    how="left"
    )


Shapefile loaded: (87, 9)
Found 12 CSV files:
 - EMONG_072325_0000\EMONG_072325_0000_Jul24_TCRM and ECMWF.csv
 - EMONG_072325_0000\EMONG_072325_0000_Jul25_TCRM and ECMWF.csv
 - EMONG_072325_0600\EMONG_072325_0600_Jul24_TCRM and ECMWF.csv
 - EMONG_072325_0600\EMONG_072325_0600_Jul25_TCRM and ECMWF.csv
 - EMONG_072325_1200\EMONG_072325_1200_Jul24_TCRM and ECMWF.csv
 - EMONG_072325_1200\EMONG_072325_1200_Jul25_TCRM and ECMWF.csv
 - EMONG_072325_1800\EMONG_072325_1800_Jul24_TCRM and ECMWF.csv
 - EMONG_072425_0000\EMONG_072425_0000_Jul24_TCRM and ECMWF.csv
 - EMONG_072425_0000\EMONG_072425_0000_Jul25_TCRM and ECMWF.csv
 - EMONG_072425_0600\EMONG_072425_0600_Jul25_TCRM and ECMWF.csv
 - EMONG_072425_1200\EMONG_072425_1200_Jul25_TCRM and ECMWF.csv
 - EMONG_072525_0000\EMONG_072525_0000_Jul25_TCRM and ECMWF.csv

Processing CSV: EMONG_072325_0000_Jul24_TCRM and ECMWF.csv

Processing CSV: EMONG_072325_0000_Jul25_TCRM and ECMWF.csv

Processing CSV: EMONG_072325_0600_Jul24_TCRM and ECMWF.csv

Proce

In [6]:
# Cell 4: Merge CSV data with shapefile

gdf_join_col = "Pro_Name"      # shapefile column
csv_join_col = "prov"          # CSV column

if gdf_join_col not in gdf.columns:
    raise ValueError(f"Missing: {gdf_join_col} in shapefile")
if csv_join_col not in df.columns:
    raise ValueError(f"Missing: {csv_join_col} in CSV")

merged_base = gdf.merge(df, left_on=gdf_join_col, right_on=csv_join_col, how="left")
print("Merge complete:", merged_base.shape)

missing = merged_base[merged_base.isna().any(axis=1)][gdf_join_col]
if not missing.empty:
    print("Provinces with no data:", missing.tolist())


NameError: name 'gdf' is not defined

In [7]:
# Cell 5: Generate maps per column for ALL CSVs, preserving input folder structure

import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import os
import numpy as np

# Shorthand folder names for output
column_folders = {
    "F": "ECMWF_CTRL",
    "G": "ECMWF_MEAN",
    "P": "TCRM_CTRL",
    "Q": "TCRM_WEIGHTED",
    "R": "ctrl_ctrl",   # anomaly shorthand
    "S": "ctrl_mean",   # anomaly shorthand
    "T": "wtd_ctrl",    # anomaly shorthand
    "U": "wtd_mean"     # anomaly shorthand
}

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    merged_base = gdf.merge(df, left_on="Pro_Name", right_on="prov", how="left")

    rel_path = os.path.relpath(os.path.dirname(csv_file), csv_path_input)
    csv_output_root = os.path.join(output_root, rel_path)
    os.makedirs(csv_output_root, exist_ok=True)

    base_name = os.path.splitext(os.path.basename(csv_file))[0]

    for col_letter in columns_to_process:
        col_index = ord(col_letter.upper()) - 65
        col_name = df.columns[col_index]

        merged_base[col_name] = pd.to_numeric(merged_base[col_name], errors="coerce")
        if merged_base[col_name].isna().all():
            continue

        settings = column_settings[col_letter]
        thresholds = settings["thresholds"]
        cmap = settings["cmap"]

        # Wind speed columns: create discrete norm using subdivisions
        if col_letter in wspd_columns:
            n_colors = len(cmap.colors)
            t_min, t_max = thresholds[0], thresholds[-1]
            new_thresholds = list(np.linspace(t_min, t_max, n_colors + 1))
            norm = mcolors.BoundaryNorm(new_thresholds, ncolors=n_colors, clip=False)
        else:
            norm = settings["norm"]  # anomaly columns unchanged

        # Use shorthand folder names and ensure safe folder name
        folder_name = column_folders[col_letter]
        col_folder = os.path.join(csv_output_root, folder_name)
        os.makedirs(col_folder, exist_ok=True)

        fig, ax = plt.subplots(figsize=(10, 10))
        merged_base.plot(
            column=col_name,
            cmap=cmap,
            norm=norm,
            linewidth=0.3,
            edgecolor="black",
            legend=True,
            ax=ax
        )

        title = column_titles.get(col_letter, col_name)
        ax.set_title(title, fontsize=16)
        ax.axis("off")

        # Safe output filename
        safe_title = title.replace(" ", "_").replace("(", "").replace(")", "")
        output_file = os.path.join(col_folder, f"{base_name}_{safe_title}_map.png")
        plt.savefig(output_file, dpi=300, bbox_inches="tight")
        plt.close(fig)

        print(f"Saved: {output_file}")
print("sir, tapos na po")

Saved: C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\vizualization (anomalies)\EMONG_072325_0000\ECMWF_CTRL\EMONG_072325_0000_Jul24_TCRM and ECMWF_ECMWF_Control_Mean_Wind_Speed_kph_map.png
Saved: C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\vizualization (anomalies)\EMONG_072325_0000\ECMWF_MEAN\EMONG_072325_0000_Jul24_TCRM and ECMWF_ECMWF_Mean_Wind_Speed_kph_map.png
Saved: C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\vizualization (anomalies)\EMONG_072325_0000\TCRM_CTRL\EMONG_072325_0000_Jul24_TCRM and ECMWF_TCRM_Control_Mean_Wind_Speed_kph_map.png
Saved: C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\vizualization (anomalies)\EMONG_072325_0000\TCRM_WEIGHTED\EMONG_072325_0000_Jul24_TCRM and ECMWF_TCRM_Weighted_Mean_Wind_Speed_kph_map.png
Saved: C:\Users\jo_ht\OneDrive\Documents\neu\sept 22 report\tcrm vs ecmwf\vizualization (anomalies)\EMONG_072325_0000\ctrl_ctrl\EMONG_072325_0000_Jul24_TCRM and ECMWF_