## Objective: Final Prediction Computation

This notebook generates the final model predictions and formats them for submission on Codabench. 

The evaluation dataset comprises data from 39 stations included in the training set and 13 stations exclusive to the evaluation set.

<img src="../images/notebook-4.png" alt="Experiment Diagram" style="width:75%;" style="text-align:center;" />


### 1. Imports
Starts by importing the necessary libraries, configuring environment paths, and loading custom utility functions.


In [None]:
import sys
import pandas as pd
import os
import zipfile
import joblib
import pandas as pd
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..','..','..')))

from src.utils.model import load_models_auto
from src.utils.analysis import create_predict_function, create_quantile_function
from src.utils.model import load_models_auto

In [None]:
ALPHA = 0.125
NUMBER_OF_WEEK = 4
USE_AUTO_SCAN = True  # Toggle this to switch between the loading of the last model of the manual load of a specific model
FINAL_MODEL = "qrf"


#replace with your directory

MODEL_DIR = "/N/lustre/project/proj-212/Ramtelpp/PersonalProject/Coda/model/"
EVAL_DIR = "/N/lustre/project/proj-212/Ramtelpp/PersonalProject/Coda/data/evaluation/"
EVAL_DIR_MINI = "/N/lustre/project/proj-212/Ramtelpp/PersonalProject/Coda/data/evaluation_mini/"
COMPUTE_MINICHALLENGE = False

### 2. Data and models Loading

Loading of the inference dataset.

In [None]:
# load the dataset
inference_data = pd.read_csv(f"{EVAL_DIR}dataset_baseline.csv")
inference_data = inference_data.set_index("ObsDate")

if COMPUTE_MINICHALLENGE:
    inference_data_mini = pd.read_csv(f"{EVAL_DIR_MINI}dataset_baseline.csv")
    inference_data_mini = inference_data_mini.set_index("ObsDate")
    inference_data = pd.concat([inference_data, inference_data_mini], axis=0)

In [None]:
f"{EVAL_DIR}dataset_baseline.csv"

In [None]:
feature_columns = ['station_code',
    "water_flow_lag_1w",
    "water_flow_lag_2w",
    "soil_moisture_region",
    "precipitation_region_lag_1w",
    "catchment",
    "soil_moisture",
    "temperatures",
    "precipitation_sector_lag_1w",
    "soil_moisture_sub_sector",
    "precipitation_sub_sector_lag_1w",
    "precipitation_zone",
    "precipitation_sub_sector",
    "evaporation_sub_sector_lag_1w",
    "temperature_region"
]

inference_data =  inference_data[feature_columns]

Loading of the final models.

In [None]:
# Load models based on conditions
final_models = []

if FINAL_MODEL == "qrf":

    if USE_AUTO_SCAN:
        final_models = load_models_auto("qrf_quantile", f"{MODEL_DIR}final/")
    else:
        final_models.append(joblib.load(f"{MODEL_DIR}final/qrf_quantile_2025-04-11_20-24-55_week_0.pkl"))
        final_models.append(joblib.load(f"{MODEL_DIR}final/qrf_quantile_2025-04-11_20-24-55_week_1.pkl"))
        final_models.append(joblib.load(f"{MODEL_DIR}final/qrf_quantile_2025-04-11_20-24-55_week_2.pkl"))
        final_models.append(joblib.load(f"{MODEL_DIR}final/qrf_quantile_2025-04-11_20-24-55_week_3.pkl"))



### 3. Predictions computation

Evaluation data include a spatio-temporal split and a temporal only split.

<img src="../images/eval.png" alt="Experiment Diagram" style="width:50%;" />


In [None]:
import numpy as np
predictions = inference_data[['station_code']].copy()
y_pred_test_quantile = {}
y_pred_test = {}
X_test = inference_data.drop(columns=['station_code'])
X_test = X_test.drop(columns=['altitude (m)', 'catchment_area (km2)'], errors='ignore')
for i in range(NUMBER_OF_WEEK):
    
    if FINAL_MODEL == "qrf":
        predict_adjusted = create_predict_function(final_models, i, FINAL_MODEL)
        quantile_adjusted = create_quantile_function(final_models, i, FINAL_MODEL, ALPHA)
    
        y_pred_test[i] = predict_adjusted(X_test)
        y_pred_test_quantile[i] = quantile_adjusted(X_test)

for i in range(NUMBER_OF_WEEK):
    predictions[f"week_{i}_pred"] = y_pred_test[i]
    predictions[f"week_{i}_sup"] = y_pred_test_quantile[i][:,1]
    predictions[f"week_{i}_inf"] = y_pred_test_quantile[i][:,0]


### 4. Saving of the predictions

In [None]:
# save the predictions to a csv file
predictions["ObsDate"] = X_test.index
predictions.to_csv(f"{EVAL_DIR}predictions.csv", index=False)

In [None]:
# Create a ZIP file containing predictions.csv
with zipfile.ZipFile(f"{EVAL_DIR}predictions.zip", 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(f"{EVAL_DIR}predictions.csv", "predictions.csv")
