In [None]:
# --- 1. EARLY STOPPING CLASS ---
class EarlyStopping:
    def __init__(self, patience=15, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# --- 2. LSTM MODEL ---
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.2):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # LSTM Layer
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Fully Connected Output Layer
        self.fc = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0)) 
        out = out[:, -1, :]
        out = self.fc(out)
        return out

# --- 3. TRAIN FUNCTION (FIXED) ---
def train_model(model, X_t, y_t, X_v, y_v, lr=0.01, epochs=300, batch_size=32, weight_decay=1e-4, verbose=True):
    criterion = nn.MSELoss()
    # Added weight_decay (L2 Regularization) to optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Scheduler: reduce LR if val loss stops dropping
    # REMOVED verbose=False to fix TypeError
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    
    # Early Stopping
    early_stopper = EarlyStopping(patience=15)
    
    train_loader = DataLoader(TensorDataset(X_t, y_t), batch_size=batch_size, shuffle=True)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * batch_X.size(0)
            
        train_mse = epoch_loss / len(X_t)
        train_rmse = np.sqrt(train_mse)
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_v)
            val_loss = criterion(val_outputs, y_v)
            val_rmse = np.sqrt(val_loss.item())
            
        train_losses.append(train_rmse)
        val_losses.append(val_rmse)
        
        # Update Scheduler and Check Early Stopping
        scheduler.step(val_rmse)
        early_stopper(val_rmse)
        
        if verbose and (epoch % 20 == 0 or epoch == epochs-1):
            print(f"Epoch {epoch}/{epochs} | Train RMSE: {train_rmse:.2f} | Val RMSE: {val_rmse:.2f}")
            
        if early_stopper.early_stop:
            if verbose: 
                print(f"Early stopping triggered at epoch {epoch}")
            break
            
    return train_losses, val_losses

# --- INITIAL TEST (SMALLER MODEL) ---
input_dim = X_train.shape[2]
# Reduced hidden_dim from 64 to 32 to prevent overfitting immediately
model_init = LSTMModel(input_dim=input_dim, hidden_dim=32, num_layers=1).to(device)

train_hist, val_hist = train_model(model_init, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)

plt.figure(figsize=(10, 6))
plt.plot(train_hist, label='Training RMSE')
plt.plot(val_hist, label='Validation RMSE')
plt.title(f'LSTM Learning Curve (Early Stopping & Reg)', fontsize=15)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend()
plt.show()

model_init.eval()
with torch.no_grad():
    y_pred_init_test = model_init(X_test_tensor).cpu().numpy().flatten()
print(f"Initial LSTM Test RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_init_test)):.2f}")