In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.callbacks import EarlyStopping
from tcn import TCN
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("Tony_data/merged_data.csv", parse_dates=["month"])
df = df.dropna(subset=["Unemployment Rate"])  # ensure target exists

# Standardize column names
df = df.rename(columns={
    "Unemployment Rate": "unemployment_rate",
    "median_income": "median_income",
    "lfp_rate": "lfp_rate",
    "initial_claims": "initial_claims",
    "population": "population"
})

feature_cols = ["median_income", "lfp_rate", "initial_claims", "population", "unemployment_rate"]

In [None]:
def create_sequences(data, window):
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data[i:i+window, :-1])
        y.append(data[i+window, -1])
    return np.array(X), np.array(y)

def TCN_model(X_shape:tuple, patience=None):
    model = Sequential()
    model.add(Input(shape=(X_shape[1], X_shape[2])))
    model.add(TCN(nb_filters=64,
                  kernel_size=3,
                  nb_stacks=1,
                  dilations=[1, 2, 4, 8],
                  use_layer_norm=True,
                  dropout_rate=0.02,
                  kernel_initializer='glorot_uniform'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    if patience:
        early_stop = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
        return model, early_stop
    else:
        return model, None



In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

# --- Initialize ---
window = 12
rmse_dict = {}
r2_dict = {}

# --- Loop through states ---
for state in df['state'].unique():
    try:
        # --- 1. Filter state and restrict to pre-2020 ---
        df_state = df[df['state'] == state].sort_values('month')
        df_state = df_state[df_state['month'] < '2020-01-01']

        # --- 2. Skip if insufficient data or NaNs ---
        if len(df_state) < window or df_state[feature_cols].isnull().any().any():
            continue

        # --- 3. Scale and sequence ---
        scaler = MinMaxScaler()
        scaled = scaler.fit_transform(df_state[feature_cols])
        X, y = create_sequences(scaled, window)

        # --- 4. Train-test split (90/10) ---
        split1 = int(len(X) * 0.9)
        X_train, y_train = X[:split1], y[:split1]
        X_test, y_test = X[split1:], y[split1:]

        # --- 5. Train model ---
        model, early_stop = TCN_model(X_train.shape, patience=3)
        model.fit(
            X_train, y_train,
            epochs=30, batch_size=16,
            validation_data=(X_test, y_test),
            callbacks=[early_stop] if early_stop else None,
            verbose=0
        )

        # --- 6. Predict and score ---
        y_pred = model.predict(X_test).flatten()
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        r2 = r2_score(y_test, y_pred)

        # --- 7. Save to dicts for plotting ---
        rmse_dict[state] = rmse
        r2_dict[state] = r2

        print(f"✓ Done: {state} | RMSE: {rmse:.4f} | R²: {r2:.4f}")

    except Exception as e:
        print(f"✗ Error in {state}: {e}")


In [None]:
import matplotlib.pyplot as plt

# Sort states by RMSE for consistent ordering
states_sorted = sorted(rmse_dict, key=rmse_dict.get)

# --- RMSE Plot ---
plt.figure(figsize=(14, 5))
plt.bar(states_sorted, [rmse_dict[s] for s in states_sorted], color='skyblue')
plt.xticks(rotation=90)
plt.title("TCN RMSE per State")
plt.ylabel("RMSE")
plt.tight_layout()
plt.show()

# --- R² Plot ---
plt.figure(figsize=(14, 5))
plt.bar(states_sorted, [r2_dict[s] for s in states_sorted], color='mediumseagreen')
plt.xticks(rotation=90)
plt.title("TCN R² Score per State")
plt.ylabel("R²")
plt.tight_layout()
plt.show()


In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

# --- Create 3D figure ---
fig = plt.figure(figsize=(14, 9))
ax = fig.add_subplot(111, projection='3d')

# --- Data ---
states = list(rmse_dict.keys())
xs = np.arange(len(states))
ys = [r2_dict[s] for s in states]
zs = [rmse_dict[s] for s in states]

# --- 3D scatter plot ---
sc = ax.scatter(xs, ys, zs, c=zs, cmap='coolwarm', s=80, depthshade=True)

# --- Tick settings ---
ax.set_xticks(xs)
ax.set_xticklabels(states, rotation=90, fontsize=6, ha='center')
ax.tick_params(axis='x', labelsize=6, pad=2)
ax.tick_params(axis='y', labelsize=7)
ax.tick_params(axis='z', labelsize=7)

# --- Axis labels (shift x-label downward using labelpad) ---
ax.set_xlabel('State Index', fontsize=9, labelpad=20)
ax.set_ylabel('R² Score', fontsize=9, labelpad=10)
ax.set_zlabel('RMSE', fontsize=9, labelpad=10)

# --- Title and colorbar ---
ax.set_title('3D Scatter Plot of Model Performance per State', fontsize=12, pad=20)
cbar = plt.colorbar(sc, ax=ax, shrink=0.6, pad=0.1)
cbar.set_label('RMSE', fontsize=9)

plt.tight_layout()
plt.show()
