In [2]:
import numpy as np
import pandas as pd
import time
from keras.utils import to_categorical
from keras.models import load_model
from model_function import model_function

regions = ["CA", "NE", "SE"]

for reg in regions:
    print(f"\n==============================")
    print(f"Training DK model for region: {reg}")
    print("==============================\n")

    # === Load data ===
    df_train = pd.read_csv(f"projection_matrix_{reg}_train.csv")
    df_test = pd.read_csv(f"projection_matrix_{reg}_test.csv")

    # class index shift
    df_train["class"] = df_train["class"] - 1
    dummy_y = to_categorical(df_train["class"])
    n_class = dummy_y.shape[1]
    print(f"{reg}: Number of classes = {n_class}")

    # === Load embeddings ===
    phi_train = np.load(f"embeddings_{reg}/phi_train_{reg}.npy")
    phi_test = np.load(f"embeddings_{reg}/phi_test_{reg}.npy")

    print(f"{reg} Training œÜ shape:", phi_train.shape)
    print(f"{reg} Test œÜ shape:", phi_test.shape)

    # === Train the model ===
    time_start = time.time()
    model_reg = model_function(df_train, phi_train, dummy_y, n_class)
    time_end = time.time()
    train_time = time_end - time_start

    # === Predict on test ===
    pred = model_reg.predict(phi_test)
    pred_df = pd.DataFrame(pred)

    df_test_preds = pd.concat([df_test.reset_index(drop=True), pred_df], axis=1)

    # === Save results with region prefix ===
    df_test_preds.to_csv(f"predictions_results_{reg}.csv", index=False)
    pd.DataFrame([{"train_time_sec": train_time}]).to_csv(
        f"DNN_time_records_{reg}.csv", index=False
    )

    # Save model with region name
    model_reg.save(f"model_{reg}.h5")

    print(f"‚úî Saved DK model as model_{reg}.h5")
    print(f"‚úî Saved DK test preds: predictions_results_{reg}.csv")
    print(f"‚úî Training time saved: DNN_time_records_{reg}.csv")
    print(f"‚è± Training time: {train_time:.2f} seconds")

print("\nüéØ All region DK models trained and saved!")



Training DK model for region: CA

CA: Number of classes = 51
CA Training œÜ shape: (2773, 541)
CA Test œÜ shape: (22, 541)


0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

<<<<<<<<<<<<<<<< Fitting DNN-model >>>>>>>>>>>>>>>>>
‚úî Saved DK model as model_CA.h5
‚úî Saved DK test preds: predictions_results_CA.csv
‚úî Training time saved: DNN_time_records_CA.csv
‚è± Training time: 24.20 seconds

Training DK model for region: NE

NE: Number of classes = 36
NE Training œÜ shape: (1950, 633)
NE Test œÜ shape: (19, 633)


0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

<<<<<<<<<<<<<<<< Fitting DNN-model >>>>>>>>>>>>>>>>>
‚úî Saved DK model as model_NE.h5
‚úî Saved DK test preds: predictions_results_NE.csv
‚úî Training time saved: DNN_time_records_NE.csv
‚è± Training time: 29.74 seconds

Training DK model for region: SE

SE: Number of classes = 65
SE Training œÜ shape: (3560, 607)
SE Test œÜ shape: (16, 607)


0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

<<<<<<<<<<<<<<<< Fitting DNN-model >>>>>>>>>>>>>>>>>
‚úî Saved DK model as model_SE.h5
‚úî Saved DK test preds: predictions_results_SE.csv
‚úî Training time saved: DNN_time_records_SE.csv
‚è± Training time: 30.77 seconds

üéØ All region DK models trained and saved!


In [3]:
import numpy as np
import pandas as pd
from keras.models import load_model
from tqdm import tqdm
import os

# =====================================================
# Helper functions
# =====================================================
def minmax_normalize(coords, min_vals=None, max_vals=None):
    if min_vals is None or max_vals is None:
        min_vals = coords.min(axis=0)
        max_vals = coords.max(axis=0)
    norm_coords = (coords - min_vals) / (max_vals - min_vals)
    return norm_coords, min_vals, max_vals


def compute_phi_matrix(coords, num_basis_list):
    N = coords.shape[0]
    phi = np.zeros((N, sum(num_basis_list)))
    knots_1d = [np.linspace(0, 1, int(np.sqrt(i))) for i in num_basis_list]
    K = 0

    for res in range(len(num_basis_list)):
        theta = 1 / np.sqrt(num_basis_list[res]) * 2.5
        knots_s1, knots_s2 = np.meshgrid(knots_1d[res], knots_1d[res])
        knots = np.column_stack((knots_s1.flatten(), knots_s2.flatten()))

        for i in range(num_basis_list[res]):
            d = np.linalg.norm(coords - knots[i, :], axis=1) / theta
            mask = (d >= 0) & (d <= 1)
            phi[mask, i + K] = ((1 - d[mask]) ** 6) * \
                              (35 * d[mask] ** 2 + 18 * d[mask] + 3) / 3
        K += num_basis_list[res]
    return phi


# =====================================================
# üöÄ  Loop over regions
# =====================================================
regions = ["CA", "NE", "SE"]
num_basis = [5**2, 9**2, 15**2, 21**2]

for region in regions:
    print(f"\n==============================")
    print(f"üî∑ Predicting region: {region}")
    print(f"==============================")

    # 1Ô∏è‚É£ Load projection matrix
    df_train = pd.read_csv(f"projection_matrix_{region}_train.csv")
    coords_train = df_train[["uj_lat", "uj_lon"]].values
    _, coord_min, coord_max = minmax_normalize(coords_train)

    # 2Ô∏è‚É£ Load prediction subset (CMAQ)
    df_cmaq = pd.read_csv(f"CMAQ_{region}_subset.csv")
    coords_cmaq = df_cmaq[["lat", "lon"]].values

    # Normalize using training stats only
    coords_cmaq_norm = (coords_cmaq - coord_min) / (coord_max - coord_min)

    grid_coords = coords_cmaq
    grid_coords_norm = coords_cmaq_norm

    print(f"Computing phi_grid for {region} ...")
    phi_grid = compute_phi_matrix(grid_coords_norm, num_basis)

    # Load region embedding removal index
    phi_train = np.load(f"embeddings_{region}/phi_train_{region}.npy")
    idx_zero = np.load(f"embeddings_{region}/idx_zero_{region}.npy")
    phi_grid = np.delete(phi_grid, idx_zero, axis=1)

    print("phi_train shape:", phi_train.shape)
    print("phi_grid shape:", phi_grid.shape)

    # 4Ô∏è‚É£ Load model
    model = load_model(f"model_{region}.h5")
    X_grid = phi_grid

    assert model.input_shape[-1] == X_grid.shape[1], \
        f"[{region}] Feature dimension mismatch!"

    # 5Ô∏è‚É£ Prediction
    print(f"Predicting class probabilities for {region} ...")
    pred_probs = model.predict(X_grid, batch_size=4096, verbose=1)
    n_classes = pred_probs.shape[1]

    # 6Ô∏è‚É£ Save
    output_df = pd.DataFrame({
        "latitude": grid_coords[:, 0],
        "longitude": grid_coords[:, 1],
        "Z2": df_cmaq["PM25"].values,
        "pred_class": np.argmax(pred_probs, axis=1)
    })

    for i in range(n_classes):
        output_df[f"prob_class_{i+1}"] = pred_probs[:, i]

    output_path = f"DK_grid_probabilities_{region}.csv"
    output_df.to_csv(output_path, index=False)

    print(f"‚úî {region} grid preds saved -> {output_path}")

print("\nüéØ All region predictions done successfully! üéØ")



üî∑ Predicting region: CA
Computing phi_grid for CA ...
phi_train shape: (2773, 541)
phi_grid shape: (2787, 541)
Predicting class probabilities for CA ...
‚úî CA grid preds saved -> DK_grid_probabilities_CA.csv

üî∑ Predicting region: NE
Computing phi_grid for NE ...
phi_train shape: (1950, 633)
phi_grid shape: (1953, 633)
Predicting class probabilities for NE ...
‚úî NE grid preds saved -> DK_grid_probabilities_NE.csv

üî∑ Predicting region: SE
Computing phi_grid for SE ...
phi_train shape: (3560, 607)
phi_grid shape: (3573, 607)
Predicting class probabilities for SE ...
‚úî SE grid preds saved -> DK_grid_probabilities_SE.csv

üéØ All region predictions done successfully! üéØ
