In [10]:
import os
import gc
from glob import glob
import sys
import math
import time
import random
import shutil
from pathlib import Path
from typing import Dict, List
from scipy.stats import entropy
from scipy.signal import butter, lfilter, freqz
from contextlib import contextmanager
from collections import defaultdict, Counter

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, log_loss
from tqdm.auto import tqdm
from functools import partial
import cv2
from PIL import Image
import torch
import torch.nn as nn
import pytorch_lightning as pl
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR, CosineAnnealingLR, CosineAnnealingWarmRestarts
from sklearn.preprocessing import LabelEncoder
from torchvision.transforms import v2
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations import (Compose, Normalize, Resize, RandomResizedCrop, HorizontalFlip, VerticalFlip, ShiftScaleRotate, Transpose)
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
import timm
from scipy import optimize
import warnings 
warnings.filterwarnings('ignore')
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from matplotlib import pyplot as plt
import joblib
VERSION=16
BASE_PATH = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/zindi_data/"

In [2]:
# ====================================================
# CFG
# ====================================================

class CFG:
    wandb = True
    debug = False
    train=True
    apex=True
    t4_gpu=False
    scheduler='OneCycleLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts','OneCycleLR']
    # CosineAnnealingLR params
    cosanneal_params={
        'T_max':6,
        'eta_min':1e-5,
        'last_epoch':-1
    }
    #ReduceLROnPlateau params
    reduce_params={
        'mode':'min',
        'factor':0.2,
        'patience':4,
        'eps':1e-6,
        'verbose':True
    }
    # CosineAnnealingWarmRestarts params
    cosanneal_res_params={
        'T_0':20,
        'eta_min':1e-6,
        'T_mult':1,
        'last_epoch':-1
    }
    print_freq=5
    num_workers = 1
    cnn_model_name = 'resnet50d'
    model_name = 'resnet50d_resnet1d_multimodal'
    optimizer='Adan'
    epochs = 25
    factor = 0.9
    patience = 2
    eps = 1e-6
    lr = 1e-3
    min_lr = 1e-6
    batch_size = 32
    weight_decay = 1e-2
    batch_scheduler=True
    gradient_accumulation_steps = 1
    max_grad_norm = 1e6
    seed = 2025
    tta = 5
    target_cols = "label"
    target_size = 1
    in_channels = 1
    n_fold = 5
    trn_fold = [0, 1, 2, 3, 4]

In [3]:
def get_score(preds, targets):
    
    return log_loss(targets, preds)

def seed_torch(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.enabled = True
    
seed_torch(seed=CFG.seed)

In [4]:
##paths to the oofs from the subsequent notebooks
resnet1d_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/resnet_1d_subs_and_oof/resnet1d_final_oof.csv"
lgbm_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/lightgbm_subs_and_oof/lgb_train_with_oof.csv"
fastai_tabular_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/fastai_subs_and_oof/fastai_train_with_oof.csv"
xgb_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/xgb_subs_and_oof/xgb_train_with_oof.csv"
tabconv_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/tabconv_subs_and_oof/tab_conv_oof.csv"
tabtrans_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/tabtransformer_subs_and_oof/transformer_tab_oof.csv"
fastai_gated_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/fastai_gated_cnn_subs_and_oof/fastai_Gated_CNN_train_with_oof.csv"
wavenet_gru_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/wavenet_subs_and_oof/wavenet_gru_transformer_oof_df_version9.csv"
tabnet_oof_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/tabnet_subs_and_oof/tabnet_oof_koleshjr_version2.csv"

##paths to the subs from the subsequent notebooks
resnet1d_sub_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/resnet_1d_subs_and_oof/resnet1d_final_test_subs.csv"
fastai_tabular_sub_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/fastai_subs_and_oof/fastai_test_with_oof.csv"
lgbm_tabular_sub_path ="/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/lightgbm_subs_and_oof/lgb_test_with_oof.csv"
xgb_tabular_sub_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/xgb_subs_and_oof/xgb_test_with_oof.csv"
tabconv_sub_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/tabconv_subs_and_oof/tab_conv_test.csv"
tabtrans_sub_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/tabtransformer_subs_and_oof/transformer_tab_test.csv"
fastai_gated_sub_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/fastai_gated_cnn_subs_and_oof/fastai_Gated_CNN_test_with_oof.csv"
wavenet_gru_sub_path ="/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/wavenet_subs_and_oof/wavenet_gru_transformer_submission_version9.csv"
tabnet_sub_path = "//kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/tabnet_subs_and_oof/tabnet_version2.csv"


#load the oof dfs
lgbm_oof = pd.read_csv(lgbm_oof_path).sort_values(by=['event_t', 'location_id'])
xgb_oof = pd.read_csv(xgb_oof_path).sort_values(by=['event_t', 'location_id'])
tabconv_oof = pd.read_csv(tabconv_oof_path).sort_values(by=['event_t', 'location_id'])
tabtrans_oof = pd.read_csv(tabtrans_oof_path).sort_values(by=['event_t', 'location_id'])
fastai_gated_oof = pd.read_csv(fastai_gated_oof_path).sort_values(by=['event_t', 'location_id'])
wavenet_gru_oof = pd.read_csv(wavenet_gru_oof_path).sort_values(by=['event_t', 'location_id'])
tabnet_oof = pd.read_csv(tabnet_oof_path).sort_values(by=['event_t', 'location_id'])
fastai_tabular_oof = pd.read_csv(fastai_tabular_oof_path).sort_values(by=['event_t', 'location_id'])
resnet1d_oof = pd.read_csv(resnet1d_oof_path).sort_values(by=['event_t', 'location_id'])

#load the sub dfs
lgbm_sub= pd.read_csv(lgbm_tabular_sub_path).sort_values('event_id')
xgb_sub = pd.read_csv(xgb_tabular_sub_path).sort_values('event_id')
tabconv_sub = pd.read_csv(tabconv_sub_path).sort_values('event_id')
tabtrans_sub = pd.read_csv(tabtrans_sub_path).sort_values('event_id')
fastai_gated_sub = pd.read_csv(fastai_gated_sub_path).sort_values('event_id')
wavenet_gru_sub = pd.read_csv(wavenet_gru_sub_path).sort_values('event_id')
tabnet_sub = pd.read_csv(tabnet_sub_path).sort_values('event_id')
fastai_tabular_sub = pd.read_csv(fastai_tabular_sub_path).sort_values('event_id')
resnet1d_subs = pd.read_csv(resnet1d_sub_path).sort_values('event_id')



### Checking the individual oof of each model approach and the uniform ensemble

In [5]:
resnet1d_score = get_score(resnet1d_oof['resnet1d_oof_preds'].values, lgbm_oof['label'].values)
print(f"resnet1d score: {resnet1d_score}")

# wavenet_gru_transformer_score = get_score(wavenet_gru_transformer_oof['predictions'].values, wavenet_gru_transformer_oof['label'].values)
# print(f"wavenet_gru_transformer score: {wavenet_gru_transformer_score}")


fastai_score = get_score(fastai_tabular_oof['oof_fastai'].values, fastai_tabular_oof['label'].values)
print(f"fastai model score: {fastai_score}")


lgbm_score = get_score(lgbm_oof['oof_lgb'].values, lgbm_oof['label'].values)
print(f"lgbm score: {lgbm_score}")

xgb_score = get_score(xgb_oof['oof_xgb'].values, xgb_oof['label'].values)
print(f"xgb score: {xgb_score}")

tabconv_score = get_score(tabconv_oof['oof_preds'].values, tabconv_oof['label'].values)
print(f"tabconv score: {tabconv_score}")

tabtrans_score = get_score(tabtrans_oof['oof_preds'].values, tabtrans_oof['label'].values)
print(f"tabtrans score: {tabtrans_score}")

fastai_gated_score = get_score(fastai_gated_oof['oof_fastai'].values, fastai_gated_oof['label'].values)
print(f"fastai_gated score: {fastai_gated_score}")

wavenet_gru_score = get_score(wavenet_gru_oof['wavelt_oof_preds'].values, wavenet_gru_oof['label'].values)
print(f"wavenet_gru score: {wavenet_gru_score}")

tabnet_score = get_score(tabnet_oof['oof_tabnet_pred'].values, tabnet_oof['label'].values)
print(f"tabnet score: {tabnet_score}")



#ensemble_oof = (lgbm_oof['oof_lgb'].values + ['predictions'].values + wavenet_gru_transformer_oof['predictions'].values + fastai_tabular_oof['oof_preds'].values) / 4
ensemble_oof = (lgbm_oof['oof_lgb'].values + xgb_oof['oof_xgb'].values + fastai_tabular_oof['oof_fastai'].values + tabconv_oof['oof_preds'] + tabtrans_oof['oof_preds'] +
               fastai_gated_oof['oof_fastai'].values + wavenet_gru_oof['wavelt_oof_preds'].values + tabnet_oof['oof_tabnet_pred'].values + resnet1d_oof['resnet1d_oof_preds'].values)/9
lgbm_oof['ensemble_oof'] = ensemble_oof
uniform_ensemble_score = get_score(lgbm_oof['ensemble_oof'], lgbm_oof['label'].values)
print(f"Uniform Ensemble score: {uniform_ensemble_score}")

resnet1d score: 0.0024075282646260124
fastai model score: 0.0026564100747232173
lgbm score: 0.002392372256798666
xgb score: 0.002344927985875686
tabconv score: 0.002568253778283404
tabtrans score: 0.002845684083937957
fastai_gated score: 0.0026142450242668466
wavenet_gru score: 0.0026192319050095864
tabnet score: 0.002566036507378727
Uniform Ensemble score: 0.0020882354159607714


### Normalizing the Probabilities Based on Flood Probability
* Legitimacy of the Step

This normalization is a valid preprocessing step because we are leveraging the flood probability feature predicted by our image classifier in the second stage. Since this feature is model-derived and not manually adjusted or externally injected(like how the leak used row order), it maintains the integrity of the pipeline.

* Mathematical Justification

Normalizing probabilities is a mathematically sound approach that aligns with the objective of minimizing log loss. Unlike setting hard thresholds or rounding values—both of which introduce artificial discontinuities—normalization preserves the relative ordering of predictions while adjusting them to be more coherent with an additional signal (flood probability). This transformation does not arbitrarily manipulate predictions but rather refines their distribution in a way that maintains probabilistic consistency.

* Generalizability & Robustness

This method is inherently generalizable because it relies on a predictive feature (flood probability) rather than dataset-specific artifacts like row order(as seen in the discussion leak), which has been a concern in the discussions. By anchoring the adjustment to a meaningful feature rather than data leakage, we ensure that the approach remains effective across different splits and datasets.

* Empirical Evidence

The improved training and test out-of-fold (OOF) scores confirm that this normalization enhances predictive performance. A technique that boosts both training and test scores while maintaining conceptual soundness is likely to generalize well beyond the current dataset.

In [6]:
from sklearn.metrics import log_loss

print(f"logloss before normalizing: {log_loss(lgbm_oof['label'], lgbm_oof['ensemble_oof'])}")

locations_to_normalize = lgbm_oof[lgbm_oof['flood_probability'] >= 0.7]['location_id'].unique()
lgbm_oof['oof_sum_prob'] = lgbm_oof.groupby('location_id')['ensemble_oof'].transform('sum')

# Avoid division by zero
epsilon = 1e-8
lgbm_oof['ensemble_oof_norm'] = lgbm_oof['ensemble_oof']  # Copy original values

lgbm_oof.loc[lgbm_oof['location_id'].isin(locations_to_normalize), 'ensemble_oof_norm'] = (
    lgbm_oof.loc[lgbm_oof['location_id'].isin(locations_to_normalize), 'ensemble_oof'] /
    (lgbm_oof.loc[lgbm_oof['location_id'].isin(locations_to_normalize), 'oof_sum_prob'] + epsilon)
)

print(f"logloss after normalizing: {log_loss(lgbm_oof['label'], lgbm_oof['ensemble_oof_norm'])}")

logloss before normalizing: 0.0020882354159607714
logloss after normalizing: 0.0020165476689310603


### Optimizing the ensemble weights using nelder mead rather than using a uniform ensemble

In [7]:
# ens = [lgbm_oof['oof_pred'].values, wavenet_gru_transformer_oof['predictions'].values, resnet1d_oof['predictions'].values, fastai_tabular_oof['oof_preds'].values]
ens = [lgbm_oof['oof_lgb'].values,
       xgb_oof['oof_xgb'].values,
       fastai_tabular_oof['oof_fastai'].values,
       tabconv_oof['oof_preds'].values,
       tabtrans_oof['oof_preds'].values,
       fastai_gated_oof['oof_fastai'].values ,
       wavenet_gru_oof['wavelt_oof_preds'].values,
       tabnet_oof['oof_tabnet_pred'].values,
       resnet1d_oof['resnet1d_oof_preds'].values
      ]
labels = lgbm_oof['label'].values

n = len(ens)
initial_weights = [1/n]*n
# Constraint: weights sum to 1
constraints = ({'type': 'eq', 'fun': lambda w: sum(w) - 1})

def objective(weights):
    ensemble_preds = np.zeros(len(ens[0]))
    for k in range(len(weights)):
        ensemble_preds += weights[k]*ens[k]
    loss = log_loss(labels, ensemble_preds)
    return loss

def scipy_opt(ens,seed=42):  
        def scorer_sc(w):  

            labels = lgbm_oof['label'].values
                
            w = [ wi/(sum(w)) for wi in w ]  
            
            i=0
            
            wtd=None
            
            for _w in w: 
                
                if i==0:
                    
                    wtd = ens[i] *_w
                
                else:
                    
                    wtd+=ens[i]*_w
                
                i=i+1  
            
            scr= get_score(wtd, labels)
            
            return scr
        tol = 1e-10  
        
        total = len(ens)
        
        init_weights = [ 1/(total) for i in range(total) ] 
        
        #print(init_weights)
        
        result = optimize.minimize(scorer_sc,
                                        
                                   init_weights,
                                        
                                   #constraints=({'type': 'eq','fun': lambda w: 1-sum(w),'jac': lambda x: [1] * len(x)}),
                                        
                                   method= 'Nelder-Mead',#'Nelder-Mead', #'SLSQP',
                                        
                                   bounds=[(0.001, 1.0)] * len(ens),
                                   #options = {'ftol':1e-10},
                                   #tol=tol
                                   options={'disp': True} 
                                    
                                  ) 
        
        w = result['x']
        
        print(f'Optimum weights = {w} [{sum(w)}] \n') 
        weights = [ wi/(sum(w)) for wi in w ] 
        print('With CV =',  result['fun'], " to use ",weights ) 
        return  result['fun'],weights 
    
    
_,w=  scipy_opt(ens,seed=CFG.seed)     
result = optimize.minimize(objective, initial_weights, )
w2 = result.x
w2

Optimum weights = [0.00100071 0.75168319 0.50319245 0.33788598 0.2264627  0.19454701
 0.3107063  0.99978013 0.56944328] [3.894701754292776] 

With CV = 0.0020720855380638695  to use  [0.0002569420897732828, 0.19300147733824388, 0.12919922640441578, 0.08675528973463324, 0.05814635131465559, 0.049951709088699316, 0.079776660546064, 0.25670261549658385, 0.14620972798693113]


array([0.10040714, 0.14227863, 0.13246341, 0.11017624, 0.06199604,
       0.07317186, 0.11323705, 0.26546746, 0.16159635])

In [8]:
ens = [lgbm_oof['oof_lgb'].values,
       xgb_oof['oof_xgb'].values,
       fastai_tabular_oof['oof_fastai'].values,
       tabconv_oof['oof_preds'].values,
       tabtrans_oof['oof_preds'].values,
       fastai_gated_oof['oof_fastai'].values ,
       wavenet_gru_oof['wavelt_oof_preds'].values,
       tabnet_oof['oof_tabnet_pred'].values,
       resnet1d_oof['resnet1d_oof_preds'].values
      ]

In [11]:
sample_submission = pd.read_csv(BASE_PATH + 'SampleSubmission.csv').sort_values('event_id')
sample_submission['label'] = w2[0]*lgbm_sub['lgb_preds'].values + \
                             w2[1]*xgb_sub['xgb_preds'].values + \
                             w2[2]*fastai_tabular_sub['fastai_preds'].values + \
                             w2[3]*tabconv_sub['preds'].values + \
                             w2[4]*tabtrans_sub['preds'].values + \
                             w2[5]*fastai_gated_sub['fastai_preds'].values + \
                             w2[6]*wavenet_gru_sub['wavelt_preds'].values + \
                             w2[7]*tabnet_sub['tabnet_preds'].values + \
                             w2[8]*resnet1d_subs['resnet_sub_preds'].values




sample_submission.head()

Unnamed: 0,event_id,label
86140,id_066zz28m11mr_X_0,4.6e-05
86141,id_066zz28m11mr_X_1,3.5e-05
86150,id_066zz28m11mr_X_10,3.4e-05
86240,id_066zz28m11mr_X_100,4.8e-05
86241,id_066zz28m11mr_X_101,3.4e-05


In [12]:
sample_submission.to_csv('ensemble_all_models.csv', index = False)

### Normalizing the predictions based on the Flood probability
* Reasons explained above

In [13]:
print(sample_submission.shape, lgbm_sub.shape)
mod_sub = pd.merge(sample_submission,lgbm_sub[['event_id','location_id', 'flood_probability']], on='event_id', how='left' )
print(mod_sub.shape)

(163520, 2) (163520, 6)
(163520, 4)


In [14]:
locations_to_normalize = mod_sub[mod_sub['flood_probability'] >= 0.7]['location_id'].unique()
mod_sub['oof_sum_prob'] = mod_sub.groupby('location_id')['label'].transform('sum')

# Avoid division by zero
epsilon = 1e-8
mod_sub['pred_norm'] = mod_sub['label']  # Copy original values

mod_sub.loc[mod_sub['location_id'].isin(locations_to_normalize), 'pred_norm'] = (
    mod_sub.loc[mod_sub['location_id'].isin(locations_to_normalize), 'label'] /
    (mod_sub.loc[mod_sub['location_id'].isin(locations_to_normalize), 'oof_sum_prob'] + epsilon)
)

mod_sub.head()

Unnamed: 0,event_id,label,location_id,flood_probability,oof_sum_prob,pred_norm
0,id_066zz28m11mr_X_0,4.6e-05,id_066zz28m11mr,0.013285,0.050589,4.6e-05
1,id_066zz28m11mr_X_1,3.5e-05,id_066zz28m11mr,0.013285,0.050589,3.5e-05
2,id_066zz28m11mr_X_10,3.4e-05,id_066zz28m11mr,0.013285,0.050589,3.4e-05
3,id_066zz28m11mr_X_100,4.8e-05,id_066zz28m11mr,0.013285,0.050589,4.8e-05
4,id_066zz28m11mr_X_101,3.4e-05,id_066zz28m11mr,0.013285,0.050589,3.4e-05


In [15]:
mod_sub[['event_id', 'pred_norm']].to_csv('ensemble_all_models_post_processed.csv', index = False)