In [6]:

# Precipitation Forecasting in Brazzaville - K-Fold Simple MLP Ensemble
# -------------------------------------------------------------
# Script: loads data, preprocesses, trains K simple MLPs (5-fold CV),
# averages predictions, and outputs submission.csv.

# 1. Setup and Imports
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# 2. GPU Check
print("TensorFlow version:", tf.__version__)
gpus = tf.config.list_physical_devices('GPU')
print("GPUs detected:" if gpus else "No GPU detected, using CPU.", gpus)

# 3. Load Data
train_df = pd.read_csv('/content/Train_data.csv')
test_df  = pd.read_csv('/content/Test_data.csv')

# 4. Preprocessing: datetime features
for df in [train_df, test_df]:
    df['date'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d')
    df['month_sin'] = np.sin(2 * np.pi * df['date'].dt.month / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['date'].dt.month / 12)
    df['day_sin']   = np.sin(2 * np.pi * df['date'].dt.day / 31)
    df['day_cos']   = np.cos(2 * np.pi * df['date'].dt.day / 31)
    df.drop(['date'], axis=1, inplace=True)

# 5. Feature / Target Setup
exclude = ['ID', 'DATE', 'Target']
features = [c for c in train_df.columns if c not in exclude]
X = train_df[features].values
y = train_df['Target'].values
X_test = test_df[features].values

# 6. Scaling
scaler_X = StandardScaler().fit(X)
X_scaled = scaler_X.transform(X)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = StandardScaler().fit(y.reshape(-1,1))
y_scaled = scaler_y.transform(y.reshape(-1,1)).flatten()

# 7. Simple MLP Builder
def build_mlp(input_dim):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dropout(0.1),
        keras.layers.Dense(1)
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=5e-4),
        loss='mse',
        metrics=[keras.metrics.RootMeanSquaredError(name='rmse')]
    )
    return model

# 8. K-Fold Training & Prediction
kf = KFold(n_splits=5, shuffle=True, random_state=1)
preds_test = np.zeros(len(X_test))
fold_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_scaled)):
    print(f"\n--- Fold {fold+1} ---")
    X_tr, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_tr, y_val = y_scaled[train_idx], y_scaled[val_idx]

    # Build & train model
    model = build_mlp(X_tr.shape[1])
    callbacks = [
        keras.callbacks.EarlyStopping('val_rmse', patience=5, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau('val_rmse', factor=0.5, patience=3)
    ]
    model.fit(
        X_tr, y_tr,
        validation_data=(X_val, y_val),
        epochs=70,
        batch_size=32,
        callbacks=callbacks,
        verbose=0
    )

    # Validation
    val_pred = scaler_y.inverse_transform(
        model.predict(X_val).reshape(-1,1)
    ).flatten()
    rmse = np.sqrt(mean_squared_error(y[val_idx], val_pred))
    print(f"Fold {fold+1} RMSE: {rmse:.4f}")
    fold_scores.append(rmse)

    # Test predictions
    test_pred = scaler_y.inverse_transform(
        model.predict(X_test_scaled).reshape(-1,1)
    ).flatten()
    preds_test += test_pred

# 9. Aggregate Results
print(f"\nAverage CV RMSE: {np.mean(fold_scores):.4f}")
# Average over folds
preds_test /= kf.n_splits
preds_test = np.clip(preds_test, 0, None)

# 10. Save Submission
submission = pd.DataFrame({'ID': test_df['ID'], 'Target': preds_test})
submission.to_csv('submission.csv', index=False)
print("Submission saved to submission.csv")


TensorFlow version: 2.19.0
No GPU detected, using CPU. []

--- Fold 1 ---
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Fold 1 RMSE: 8.1081
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 

--- Fold 2 ---
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Fold 2 RMSE: 5.8126
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

--- Fold 3 ---
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Fold 3 RMSE: 7.3461
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

--- Fold 4 ---
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Fold 4 RMSE: 6.2612
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

--- Fold 5 ---
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Fold 5 RMSE: 7.8961
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

Average CV RMSE: 7.0848
Submis