In [2]:
import numpy as np
from numpy import exp
import tensorflow as tf
from scipy.stats import norm
import pandas as pd

num_samples = 100
epsilon = 1e-12
h = 3
regions = ["CA", "NE", "SE"]

for region in regions:
    print(f"\n==============================")
    print(f"Sampling Z1 for region: {region}")
    print(f"==============================")

    # Load prediction results + training nodes
    df_pred = pd.read_csv(f"DK_grid_probabilities_{region}.csv")
    df_train = pd.read_csv(f"projection_matrix_{region}_train.csv")

    N = len(df_pred)

    # mixture component means (per region)
    Z1_node = df_train.groupby("class")["Z1"].mean().values
    Z2_node = df_train.groupby("class")["Z2"].mean().values
    num_class = len(Z1_node)

    print(f"Classes: {num_class} | Grid points: {N}")

    # class probability matrix (skip first 4 standard columns)
    pi = df_pred.iloc[:, 4:].values  # (N,K)
    Z2_val = df_pred["Z2"].values.reshape(-1, 1)

    # kernel weights
    phi_vals = norm.pdf((Z2_val - Z2_node) / h)
    weights = pi * phi_vals
    weights_sum = np.sum(weights, axis=1, keepdims=True)
    weights = weights / np.clip(weights_sum, epsilon, None)

    weights_tf = tf.constant(weights, dtype=tf.float32)

    # Gaussian mixture components
    out_mu = tf.constant(Z1_node, dtype=tf.float32)[None, :]
    out_mu = tf.tile(out_mu, [N, 1])
    out_sigma = tf.ones_like(out_mu) * h

    # categorical sampling for component index
    logits = tf.math.log(weights_tf + epsilon)
    cat_samples = tf.random.categorical(logits, num_samples)
    cat_samples = tf.cast(cat_samples, tf.int32)

    batch_idx = tf.tile(tf.range(N)[:, None], [1, num_samples])
    chosen_mu = tf.gather_nd(out_mu, tf.stack([batch_idx, cat_samples], axis=-1))
    chosen_sigma = tf.gather_nd(out_sigma, tf.stack([batch_idx, cat_samples], axis=-1))

    # Z1 sampling
    eps = tf.random.normal(shape=(N, num_samples))
    samples = chosen_mu + chosen_sigma * eps

    samples_df = pd.DataFrame(samples.numpy(),
                              columns=[f"sample_{i+1}" for i in range(num_samples)])
    
    df_test = pd.concat([
        df_pred[['latitude', 'longitude', 'Z2']].reset_index(drop=True),
        samples_df
    ], axis=1)

    out_file = f"predicted_results_heatmap_{region}.csv"
    df_test.to_csv(out_file, index=False)

    print(f"ðŸ“Œ Saved regional heatmap results â†’ {out_file}")
    print(f"â–¶ Shape: {df_test.shape}")


print("\nðŸŽ¯ All region sampling done!")



Sampling Z1 for region: CA
Classes: 51 | Grid points: 2787
ðŸ“Œ Saved regional heatmap results â†’ predicted_results_heatmap_CA.csv
â–¶ Shape: (2787, 103)

Sampling Z1 for region: NE
Classes: 36 | Grid points: 1953
ðŸ“Œ Saved regional heatmap results â†’ predicted_results_heatmap_NE.csv
â–¶ Shape: (1953, 103)

Sampling Z1 for region: SE
Classes: 65 | Grid points: 3573
ðŸ“Œ Saved regional heatmap results â†’ predicted_results_heatmap_SE.csv
â–¶ Shape: (3573, 103)

ðŸŽ¯ All region sampling done!
