In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt

# --- Load train data ---
df = pd.read_csv("train.csv")

# --- Sort by date ---
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

# --- Date-based cyclical features ---
df['month'] = df['date'].dt.month
df['day_of_week'] = df['date'].dt.dayofweek
df['week'] = df['date'].dt.isocalendar().week

df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
df['dow_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
df['dow_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
df['week_sin'] = np.sin(2 * np.pi * df['week'] / 52)
df['week_cos'] = np.cos(2 * np.pi * df['week'] / 52)

# Drop original date columns
df = df.drop(columns=['date', 'id', 'month', 'day_of_week', 'week'])

# --- Tuned Neural Net ---

nn = Pipeline([
    ("scaler", StandardScaler()),
    ("model", MLPRegressor(
        hidden_layer_sizes=(512, 256, 128),  # slightly larger
        alpha=0.003,                          # slightly less regularization
        learning_rate_init=0.0005,
        learning_rate='adaptive',
        max_iter=3000,
        early_stopping=True,
        activation="relu",
        solver="adam",
        random_state=42
    ))
])


# --- Split for validation ---
X = df.drop(columns=["demand"]).astype(float)
y = df["demand"].astype(float)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)  # no shuffle for time series

nn.fit(X_train, y_train)

# --- Performance metrics ---
y_val_pred = nn.predict(X_val)
print("Validation R2:", r2_score(y_val, y_val_pred))
print("Validation RMSE:", np.sqrt(mean_squared_error(y_val, y_val_pred)))

# --- Scatter plot ---
plt.figure(figsize=(7, 7))
plt.scatter(y_val, y_val_pred, alpha=0.5, edgecolor="k")
plt.plot([y_val.min(), y_val.max()], [y_val.min(), y_val.max()], "r--", lw=2)
plt.xlabel("Actual Demand")
plt.ylabel("Predicted Demand")
plt.title("Neural Net Predictions vs Actuals (Validation Set)")
plt.grid(True)
plt.show()

# --- Residual plot ---
residuals = y_val - y_val_pred
plt.figure(figsize=(7, 5))
plt.scatter(y_val_pred, residuals, alpha=0.5, edgecolor="k")
plt.axhline(0, color="red", linestyle="--", lw=2)
plt.xlabel("Predicted Demand")
plt.ylabel("Residual (Actual - Predicted)")
plt.title("Residuals vs Predictions (Validation Set)")
plt.grid(True)
plt.show()
