In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.callbacks import EarlyStopping
from tcn import TCN
import matplotlib.pyplot as plt

In [4]:
df = pd.read_csv("Tony_data/merged_data.csv", parse_dates=["month"])
df = df.dropna(subset=["Unemployment Rate"])  # ensure target exists

# Standardize column names
df = df.rename(columns={
    "Unemployment Rate": "unemployment_rate",
    "median_income": "median_income",
    "lfp_rate": "lfp_rate",
    "initial_claims": "initial_claims",
    "population": "population"
})

feature_cols = ["median_income", "lfp_rate", "initial_claims", "population", "unemployment_rate"]

In [5]:
def create_sequences(data, window):
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data[i:i+window, :-1])
        y.append(data[i+window, -1])
    return np.array(X), np.array(y)

def TCN_model(X_shape:tuple, patience=None):
    model = Sequential()
    model.add(Input(shape=(X_shape[1], X_shape[2])))
    model.add(TCN(nb_filters=64,
                  kernel_size=3,
                  nb_stacks=1,
                  dilations=[1, 2, 4, 8],
                  use_layer_norm=True,
                  dropout_rate=0.02,
                  kernel_initializer='glorot_uniform'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    if patience:
        early_stop = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
        return model, early_stop
    else:
        return model, None



In [None]:

window = 12
results = []

for state in df['state'].unique():
    try:
        df_state = df[df['state'] == state].sort_values('month')
        if df_state[feature_cols].isnull().any().any():
            continue  # skip if still has NaNs

        scaler = MinMaxScaler()
        scaled = scaler.fit_transform(df_state[feature_cols])
        X, y = create_sequences(scaled, window)

        split1 = int(len(X) * 0.9)
        X_train, y_train = X[:split1], y[:split1]
        X_test, y_test = X[split1:], y[split1:]

        model, early_stop = TCN_model(X_train.shape, patience=3)
        model.fit(X_train, y_train, epochs=30, batch_size=16,
                  validation_data=(X_test, y_test),
                  callbacks=[early_stop] if early_stop else None,
                  verbose=0)

        y_pred = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        r2 = r2_score(y_test, y_pred)

        results.append({
            'state': state,
            'RMSE': rmse,
            'R2': r2
        })

        print(f"✓ Done: {state} | RMSE: {rmse:.4f} | R²: {r2:.4f}")

    except Exception as e:
        print(f"✗ Error in {state}: {e}")

df_results = pd.DataFrame(results).sort_values("RMSE", ascending=True)




2025-07-29 16:23:47.999498: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2025-07-29 16:23:47.999630: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-07-29 16:23:47.999638: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
I0000 00:00:1753820627.999805 4931034 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1753820628.000014 4931034 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-07-29 16:23:49.632077: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 763ms/step
✓ Done: Alabama | RMSE: 0.0375 | R²: -0.3014
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 960ms/step
✓ Done: Alaska | RMSE: 0.0830 | R²: 0.3826
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step  
✓ Done: Arizona | RMSE: 0.1027 | R²: -3.0890
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
✓ Done: Arkansas | RMSE: 0.0846 | R²: -1.8899
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
✓ Done: California | RMSE: 0.0733 | R²: 0.2931
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
✓ Done: Colorado | RMSE: 0.1481 | R²: -1.2543
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step
✓ Done: Connecticut | RMSE: 0.1405 | R²: -0.2711


In [None]:

# RMSE Plot
plt.figure(figsize=(14, 5))
plt.bar(df_results['state'], df_results['RMSE'])
plt.xticks(rotation=90)
plt.title("TCN RMSE per State")
plt.ylabel("RMSE")
plt.tight_layout()
plt.show()

# R² Plot
plt.figure(figsize=(14, 5))
plt.bar(df_results['state'], df_results['R2'])
plt.xticks(rotation=90)
plt.title("TCN R² Score per State")
plt.ylabel("R²")
plt.tight_layout()
plt.show()



NameError: name 'df_results' is not defined

<Figure size 1400x500 with 0 Axes>