In [None]:
# --- 1. EARLY STOPPING CLASS (Relaxed) ---
class EarlyStopping:
    def __init__(self, patience=30, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# --- 2. LSTM MODEL (Fixed Dropout) ---
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.2):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # LSTM Layer
        # Note: 'dropout' arg in LSTM only works if num_layers > 1
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Explicit Dropout Layer (Works even for 1-layer LSTM)
        self.dropout_layer = nn.Dropout(dropout)
        
        # Fully Connected Output Layer
        self.fc = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        
        # Forward Prop
        out, _ = self.lstm(x, (h0, c0)) 
        out = out[:, -1, :] # Take last time step
        out = self.dropout_layer(out) # Apply dropout explicitly
        out = self.fc(out)
        return out

# --- 3. TRAIN FUNCTION (With More Patience) ---
def train_model(model, X_t, y_t, X_v, y_v, lr=0.01, epochs=300, batch_size=32, weight_decay=1e-4, verbose=True):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Scheduler: increased patience to 10 to allow model to settle before cutting LR
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)
    
    # Early Stopping: increased patience to 30 to avoid stopping too early
    early_stopper = EarlyStopping(patience=30)
    
    train_loader = DataLoader(TensorDataset(X_t, y_t), batch_size=batch_size, shuffle=True)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * batch_X.size(0)
            
        train_mse = epoch_loss / len(X_t)
        train_rmse = np.sqrt(train_mse)
        
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_v)
            val_loss = criterion(val_outputs, y_v)
            val_rmse = np.sqrt(val_loss.item())
            
        train_losses.append(train_rmse)
        val_losses.append(val_rmse)
        
        scheduler.step(val_rmse)
        early_stopper(val_rmse)
        
        if verbose and (epoch % 20 == 0 or epoch == epochs-1):
            print(f"Epoch {epoch}/{epochs} | Train RMSE: {train_rmse:.2f} | Val RMSE: {val_rmse:.2f}")
            
        if early_stopper.early_stop:
            if verbose: 
                print(f"Early stopping triggered at epoch {epoch}")
            break
            
    return train_losses, val_losses

# --- INITIAL TEST ---
input_dim = X_train.shape[2]
# Force a high dropout on the initial test to see if it helps overfitting
model_init = LSTMModel(input_dim=input_dim, hidden_dim=32, num_layers=1, dropout=0.3).to(device)

train_hist, val_hist = train_model(model_init, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)

plt.figure(figsize=(10, 6))
plt.plot(train_hist, label='Training RMSE')
plt.plot(val_hist, label='Validation RMSE')
plt.title(f'LSTM Learning Curve (Fixed Dropout & Higher Patience)', fontsize=15)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend()
plt.show()

model_init.eval()
with torch.no_grad():
    y_pred_init_test = model_init(X_test_tensor).cpu().numpy().flatten()
print(f"Initial LSTM Test RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_init_test)):.2f}")

In [None]:
# --- OPTUNA OBJECTIVE FUNCTION (High Regularization) ---
def objective(trial):
    # 1. Suggest Hyperparameters
    # Keep hidden dimension small to prevent memorization
    hidden_dim = trial.suggest_int("hidden_dim", 16, 64)
    num_layers = trial.suggest_int("num_layers", 1, 2)
    
    # Increase Dropout minimum to 0.3 to enforce regularization
    dropout = trial.suggest_float("dropout", 0.3, 0.6)
    lr = trial.suggest_float("lr", 1e-4, 0.01, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    
    # Stronger Weight Decay range (L2 regularization)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)

    # 2. Build Model
    model = LSTMModel(input_dim, hidden_dim, num_layers, dropout).to(device)
    
    # 3. Setup Components
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Higher patience in optimization as well
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)
    early_stopper = EarlyStopping(patience=30)
    
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), 
                              batch_size=batch_size, shuffle=True)
    
    # 4. Training Loop
    epochs = 150 # Cap epochs for optimization speed, usually enough with early stopping
    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
        
        # Evaluate
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_tensor)
            val_mse = criterion(val_pred, y_val_tensor).item()
            val_rmse = np.sqrt(val_mse)

        scheduler.step(val_rmse)
        
        early_stopper(val_rmse)
        if early_stopper.early_stop:
            break

        # Optuna Pruning
        trial.report(val_rmse, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_rmse

# --- RUN OPTIMIZATION ---
study_name = f'{CHOSEN_CROP.capitalize()}_Yield_LSTM_v2'
study = optuna.create_study(direction='minimize', study_name=study_name)
study.optimize(objective, n_trials=30)

print("\nBest Parameters found:")
print(study.best_params)