# Libraries

In [1]:
import os
import gc
from glob import glob
import sys
import math
import time
import random
import shutil
from pathlib import Path
from typing import Dict, List
from scipy.stats import entropy
from scipy.signal import butter, lfilter, freqz
from contextlib import contextmanager
from collections import defaultdict, Counter

from scipy.interpolate import interp1d
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder, PowerTransformer
from sklearn.metrics import accuracy_score, log_loss
from tqdm.auto import tqdm
from functools import partial
import cv2
from PIL import Image
import torch
import torch.nn as nn
import pytorch_lightning as pl
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR, CosineAnnealingLR, CosineAnnealingWarmRestarts
from sklearn.preprocessing import LabelEncoder
from torchvision.transforms import v2
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations import (Compose, Normalize, Resize, RandomResizedCrop, HorizontalFlip, VerticalFlip, ShiftScaleRotate, Transpose)
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
import timm
import warnings 
warnings.filterwarnings('ignore')
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from matplotlib import pyplot as plt
import joblib
VERSION=18
base_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/zindi_data/"
additional_path = "/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/image_classifier_results/"

  check_for_updates()


# Config

In [2]:
# ====================================================
# CFG
# ====================================================

class CFG:
    wandb = False
    debug = True
    train=True
    apex=True
    t4_gpu=False
    scheduler='OneCycleLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts','OneCycleLR']
    # CosineAnnealingLR params
    cosanneal_params={
        'T_max':6,
        'eta_min':1e-5,
        'last_epoch':-1
    }
    #ReduceLROnPlateau params
    reduce_params={
        'mode':'min',
        'factor':0.2,
        'patience':4,
        'eps':1e-6,
        'verbose':True
    }
    # CosineAnnealingWarmRestarts params
    cosanneal_res_params={
        'T_0':20,
        'eta_min':1e-6,
        'T_mult':1,
        'last_epoch':-1
    }
    print_freq=15
    num_workers = 1
    cnn_model_name = 'resnet1d'
    model_name = 'resnet1d'
    optimizer='Adan'
    epochs = 25
    factor = 0.9
    patience = 2
    eps = 1e-6
    lr = 1e-3
    min_lr = 1e-6
    batch_size = 16
    weight_decay = 1e-2
    batch_scheduler=True
    gradient_accumulation_steps = 1
    max_grad_norm = 1e6
    seed = 2025
    target_cols = "label"
    target_size = 1
    in_channels = 1
    n_fold = 10
    # trn_fold = [2]
    trn_fold = [i for i in range(n_fold)]

# Utils

In [3]:
def get_score(preds, targets):
    
    return log_loss(targets, preds)

def get_location(value):
  return value.split("_")[0] + '_' + value.split("_")[1]


# def seed_torch(seed=42):
#     random.seed(seed)
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)  # If using multi-GPU
#     torch.backends.cudnn.deterministic = True
#     torch.backends.cudnn.benchmark = False

def seed_torch(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.enabled = True
    
seed_torch(seed=CFG.seed)

# Load data

## Load precipitation data

In [4]:
data_test = pd.read_csv(base_path + "Test.csv")
data_test_with_cv = pd.read_csv(additional_path + "test_with_cv_results.csv")[['location_id', 'flood_probability',]]


data_test['location_id'] = data_test['event_id'].apply(lambda x: '_'.join(x.split('_')[0:2]))
data_test['event_idx'] = data_test.groupby('location_id', sort=False).ngroup()

data_test['event_t'] = data_test.groupby('location_id').cumcount()


data_test = pd.merge(data_test, data_test_with_cv, on='location_id', how='left')


print(len(set(data_test['location_id'])))

224


In [5]:
def norm_feature_engineering(df):

    # df = apply_boxcox_transformation(df, "precipitation")

    df['precipitation'] = np.log(df["precipitation"] + 0.01).astype("float32")

    
    for w in range(1, 182):    
        df['precipitation_shift_pos_' + str(w)] = df['precipitation'].shift(w).fillna(0)
        df['precipitation_shift_neg_' + str(w)] = df['precipitation'].shift(-w).fillna(0)

    
    
    
    df = df[[col for col in df.columns if 'precipitation' in col] + ['flood_probability']].fillna(0)

    return df.to_numpy().transpose(1, 0)

def apply_boxcox_transformation(df, col):
    """
    Applies the Box-Cox transformation to a specified column while handling zero values.
    
    Args:
        df (pd.DataFrame): Input DataFrame.
        col (str): Column name to transform.
    
    Returns:
        pd.DataFrame: DataFrame with the transformed column.
    """
    df = df.copy()
    pt = PowerTransformer(method='box-cox')
    
    # Mask for non-zero values
    non_zero_mask = df[col] != 0.0
    
    # Initialize transformed data array
    transformed_data = np.zeros_like(df[col], dtype=float)
    
    # Apply transformation to non-zero values
    non_zero_transformed = pt.fit_transform(df.loc[non_zero_mask, [col]]).flatten()
    
    # Assign transformed values
    min_value = non_zero_transformed.min()
    transformed_data[non_zero_mask] = non_zero_transformed
    transformed_data[~non_zero_mask] = min_value - 0.001  # Slightly below min
    
    # Assign back to DataFrame
    df[col] = transformed_data
    return df

def time_warp(features, sigma=0.2, knot=4):
    """
    Apply time warping to each feature independently.
    """
    warped_features = np.zeros_like(features)
    orig_steps = np.arange(features.shape[1])

    for i in range(features.shape[0]):  # Iterate over features (17)
        random_warp = np.random.normal(loc=1.0, scale=sigma, size=(knot,))
        interp = interp1d(np.linspace(0, features.shape[1], num=knot), random_warp, kind='linear', fill_value='extrapolate')
        warped_series = interp(orig_steps)
        warped_features[i] = np.interp(orig_steps * warped_series, orig_steps, features[i])

    return warped_features

def time_shift(features, max_shift=10):
    shift = np.random.randint(-max_shift, max_shift)
    return np.roll(features, shift, axis=1)  # Shift along the time dimension


def add_gaussian_noise(features, std=0.1):
    noise = np.random.normal(0, std, size=features.shape)
    return features + noise

def feature_dropout(features, drop_prob=0.2):
    mask = np.random.binomial(1, 1 - drop_prob, (features.shape[0], 1))  # Same mask for all time steps
    return features * mask

def scale_features(features, scale_range=(0.8, 1.2)):
    scales = np.random.uniform(scale_range[0], scale_range[1], size=(features.shape[0], 1))
    return features * scales

def freq_perturbation(features, alpha=0.1):
    fft_coeffs = np.fft.fft(features, axis=1)  # Apply FFT along time dimension
    perturb = np.random.normal(1, alpha, size=fft_coeffs.shape)
    return np.real(np.fft.ifft(fft_coeffs * perturb, axis=1))  # Apply inverse FFT



class CustomDataset(Dataset):
    def __init__(
        self, df: pd.DataFrame, augment: bool = False, 
        mode: bool = True
    ):
        self.df = df
        self.augment = augment
        self.mode = mode
        self.location_ids = df['location_id'].unique()

        

    def __len__(self):
        return len(self.df['location_id'].unique())

    def __getitem__(self, index):

        precipitation, label = self.__data_generation(index)
        
        if self.augment:
            if np.random.rand() < 0.3:
                precipitation = time_shift(precipitation)

            if np.random.rand() < 0.3:
                precipitation = add_gaussian_noise(precipitation)
            if np.random.rand() < 0.3:
                precipitation = feature_dropout(precipitation)
            
        return {'precipitation': torch.tensor(precipitation, dtype=torch.float32), 'label': torch.tensor(label, dtype=torch.float32)}

    def __data_generation(self, index):

        event_id = self.location_ids[index]
        # precipitation = self.df[self.df['event_id'] == event_id].pivot(index='event_id', columns='event_t', values='precipitation').to_numpy()
        precipitation = self.df[self.df['location_id'] == event_id].sort_values('event_t')
        precipitation = norm_feature_engineering(precipitation)
        
        label = np.zeros(730, dtype='float32')
        if self.mode != 'test':
            label = self.df[self.df['location_id'] == event_id].pivot(index='location_id', columns='event_t', values='label').to_numpy()
            label = np.squeeze(label, axis=0)

        return precipitation, label

# Models

In [6]:
class SinusoidalPositionalEmbedding(torch.nn.Module):
    def __init__(self, seq_len, dim_model):
        """
        Initialize the sinusoidal positional embedding.

        Args:
        seq_len (int): The length of the sequence (e.g., 730 days).
        dim_model (int): The model's embedding size (e.g., 512).
        """
        super(SinusoidalPositionalEmbedding, self).__init__()
        self.seq_len = seq_len
        self.dim_model = dim_model
        self.positional_embedding = self._create_positional_embedding()

    def _create_positional_embedding(self):
        """
        Create the sinusoidal positional embedding tensor.

        Returns:
        torch.Tensor: Positional embeddings of shape (seq_len, dim_model).
        """
        position = torch.arange(self.seq_len, dtype=torch.float).unsqueeze(1)  # Shape: (seq_len, 1)
        div_term = torch.exp(
            torch.arange(0, self.dim_model, 2, dtype=torch.float) *
            (-math.log(10000.0) / self.dim_model)
        )  # Frequencies: Shape (dim_model / 2)

        # Compute sin and cos
        pos_embedding = torch.zeros(self.seq_len, self.dim_model)
        pos_embedding[:, 0::2] = torch.sin(position * div_term)  # Apply sin to even indices
        pos_embedding[:, 1::2] = torch.cos(position * div_term)  # Apply cos to odd indices

        return pos_embedding

    def forward(self, x):
        """
        Add positional embedding to input tensor.

        Args:
        x (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim_model).

        Returns:
        torch.Tensor: Input tensor with positional embeddings added.
        """
        batch_size, seq_len, dim_model = x.size()
        if seq_len != self.seq_len or dim_model != self.dim_model:
            raise ValueError(
                f"Input shape mismatch: Expected (batch_size, {self.seq_len}, {self.dim_model}), got {x.size()}"
            )
        return x + self.positional_embedding.to(x.device).unsqueeze(0)  # Broadcast positional embeddings


class Wave_Block(nn.Module):
    def __init__(self, in_channels, out_channels, dilation_rates, kernel_size):
        super(Wave_Block, self).__init__()
        self.num_rates = dilation_rates
        self.convs = nn.ModuleList()
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()
    
        self.convs.append(nn.Conv1d(in_channels, out_channels, kernel_size=1))
        dilation_rates = [2 ** i for i in range(dilation_rates)]
        for dilation_rate in dilation_rates:
            self.filter_convs.append(
                nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=int((dilation_rate*(kernel_size-1))/2), dilation=dilation_rate, padding_mode='replicate'))
            self.gate_convs.append(
                nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=int((dilation_rate*(kernel_size-1))/2), dilation=dilation_rate, padding_mode='replicate'))
            self.convs.append(nn.Conv1d(out_channels, out_channels, kernel_size=1))

    def forward(self, x):
        x = self.convs[0](x)
        res = x
        for i in range(self.num_rates):
            x = torch.tanh(self.filter_convs[i](x)) * torch.sigmoid(self.gate_convs[i](x))
            x = self.convs[i + 1](x)
            res = res + x
        return res

class SEModule(nn.Module):
    def __init__(self, in_channels, reduction=2):
        super(SEModule, self).__init__()
        self.conv = nn.Conv1d(in_channels, in_channels, kernel_size=1, padding=0)

    def forward(self, x):
        s = F.adaptive_avg_pool1d(x, 1)
        s = self.conv(s)
        x *= torch.sigmoid(s)
        return x

class WAVELT(nn.Module):

    def __init__(self, inch=364, kernel_size=3):
        super().__init__()
        dropout_rate = 0.1
    
        self.conv1d_1 = nn.Conv1d(inch, 32, kernel_size=1, stride=1, dilation=1, padding=0, padding_mode='replicate')
        self.batch_norm_conv_1 = nn.BatchNorm1d(32)
        self.dropout_conv_1 = nn.Dropout(dropout_rate)
    
        self.conv1d_2 = nn.Conv1d(inch+16+32+64+128, 32, kernel_size=1, stride=1, dilation=1, padding=0, padding_mode='replicate')
        self.batch_norm_conv_2 = nn.BatchNorm1d(32)
        self.dropout_conv_2 = nn.Dropout(dropout_rate)
    
        self.wave_block1 = Wave_Block(32, 16, 12, kernel_size)
        self.wave_block2 = Wave_Block(inch+16, 32, 8, kernel_size)
        self.wave_block3 = Wave_Block(inch+16+32, 64, 4, kernel_size)
        self.wave_block4 = Wave_Block(inch+16+32+64, 128, 1, kernel_size)
    
        self.se_module1 = SEModule(16)
        self.se_module2 = SEModule(32)
        self.se_module3 = SEModule(64)
        self.se_module4 = SEModule(128)        
    
        self.batch_norm_1 = nn.BatchNorm1d(16)
        self.batch_norm_2 = nn.BatchNorm1d(32)
        self.batch_norm_3 = nn.BatchNorm1d(64)
        self.batch_norm_4 = nn.BatchNorm1d(128)
    
        self.dropout_1 = nn.Dropout(dropout_rate)
        self.dropout_2 = nn.Dropout(dropout_rate)
        self.dropout_3 = nn.Dropout(dropout_rate)
        self.dropout_4 = nn.Dropout(dropout_rate)
    
        self.gru1 = nn.GRU(32, 256, num_layers=1, batch_first=True, bidirectional=True)
        self.gru2 = nn.GRU(32, 256, num_layers=1, batch_first=True, bidirectional=True)

        self.project_embedding = nn.Linear(inch, 512)
        self.pos_emb = SinusoidalPositionalEmbedding(730, 512)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=512, nhead=512//32, dim_feedforward=2*512,
                dropout=0.1, activation=nn.GELU(), batch_first=True, norm_first=True), 2)
        self.logit = nn.Conv1d(512, 1, kernel_size=7, padding=3, stride=1)

    def extract_features(self, x):

        project_precipitation = self.project_embedding(x.permute(0, 2, 1))
    
        x0 = self.conv1d_1(x)
        x0 = F.relu(x0)
        x0 = self.batch_norm_conv_1(x0)
        x0 = self.dropout_conv_1(x0)
    
        x1 = self.wave_block1(x0)
        x1 = self.batch_norm_1(x1)
        x1 = self.dropout_1(x1)
        x1 = self.se_module1(x1)
        x2_base = torch.cat([x1, x], dim=1)
    
        x2 = self.wave_block2(x2_base)
        x2 = self.batch_norm_2(x2)
        x2 = self.dropout_2(x2)
        x2 = self.se_module2(x2)
        x3_base = torch.cat([x2_base, x2], dim=1)
    
        x3 = self.wave_block3(x3_base)
        x3 = self.batch_norm_3(x3)
        x3 = self.dropout_3(x3)
        x3 = self.se_module3(x3)
        x4_base = torch.cat([x3_base, x3], dim=1)
    
        x4 = self.wave_block4(x4_base)
        x4 = self.batch_norm_4(x4)
        x4 = self.dropout_4(x4)
        x4 = self.se_module4(x4)
        
        x5_base = torch.cat([x4_base, x4], dim=1)
        x5 = self.conv1d_2(x5_base)
        x5 = F.relu(x5)
        x5 = self.batch_norm_conv_2(x5)
        x5 = self.dropout_conv_2(x5)
    
        gru_out1, hidden1 = self.gru1(x5.permute(0, 2, 1))
        gru_out2, _ = self.gru2(x5.permute(0, 2, 1), hidden1)
        pos_embedding = self.pos_emb(project_precipitation)
        transformer_input = pos_embedding  + gru_out2
        transformer_fusion = self.transformer(transformer_input)
        
        return transformer_fusion


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        output = self.extract_features(x)
        out = self.logit(output.permute(0, 2, 1))
        return out

## OOF Score

In [7]:
oof_df = pd.read_csv("/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/wavenet_subs_and_oof/wavenet_gru_transformer_oof_df_version9.csv")
oof_df

Unnamed: 0,event_id,precipitation,label,location_id,event_idx,event_t,flood_probability,fold,wavelt_oof_preds
0,id_i1un4og9advo_X_0,0.000,0,id_i1un4og9advo,6,0,0.003559,0,3.431129e-05
1,id_i1un4og9advo_X_1,0.000,0,id_i1un4og9advo,6,1,0.003559,0,8.220568e-07
2,id_i1un4og9advo_X_2,0.000,0,id_i1un4og9advo,6,2,0.003559,0,4.033099e-07
3,id_i1un4og9advo_X_3,0.000,0,id_i1un4og9advo,6,3,0.003559,0,4.060359e-08
4,id_i1un4og9advo_X_4,0.000,0,id_i1un4og9advo,6,4,0.003559,0,1.184524e-07
...,...,...,...,...,...,...,...,...,...
492015,id_0cpm4w3t78ic_X_725,0.000,0,id_0cpm4w3t78ic,669,725,0.747077,9,1.480869e-07
492016,id_0cpm4w3t78ic_X_726,0.000,0,id_0cpm4w3t78ic,669,726,0.747077,9,4.567850e-08
492017,id_0cpm4w3t78ic_X_727,15.247,0,id_0cpm4w3t78ic,669,727,0.747077,9,2.755118e-07
492018,id_0cpm4w3t78ic_X_728,0.000,0,id_0cpm4w3t78ic,669,728,0.747077,9,2.535866e-07


In [8]:
oof_df = pd.read_csv("/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/wavenet_subs_and_oof/wavenet_gru_transformer_oof_df_version9.csv")

from sklearn.metrics import log_loss

print(f"logloss before normalizing: {log_loss(oof_df['label'], oof_df['wavelt_oof_preds'])}")

locations_to_normalize = oof_df[oof_df['flood_probability'] >= 0.5]['location_id'].unique()
oof_df['oof_sum_prob'] = oof_df.groupby('location_id')['wavelt_oof_preds'].transform('sum')

# Avoid division by zero
epsilon = 1e-8
oof_df['oof_wavelt_norm'] = oof_df['wavelt_oof_preds']  # Copy original values

oof_df.loc[oof_df['location_id'].isin(locations_to_normalize), 'oof_wavelt_norm'] = (
    oof_df.loc[oof_df['location_id'].isin(locations_to_normalize), 'wavelt_oof_preds'] /
    (oof_df.loc[oof_df['location_id'].isin(locations_to_normalize), 'oof_sum_prob'] + epsilon)
)

print(f"logloss after normalizing: {log_loss(oof_df['label'], oof_df['oof_wavelt_norm'])}")
oof_df.to_csv('wavelt_final_oof.csv', index=False)

logloss before normalizing: 0.0026192319050095864
logloss after normalizing: 0.00244253734196034


# Inference

In [9]:
test_dataset = CustomDataset(data_test, augment=False, mode="test")
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, drop_last=False)

In [10]:
def inference(test_loader, model, device):
    model.eval()
    preds = []
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, batch in tk0:
        precipitation = batch['precipitation'].to(device)
        batch_size = precipitation.size(0)
        with torch.no_grad():
            pred_precipitations = model(precipitation)
        
        preds.append(pred_precipitations.sigmoid().flatten().detach().cpu().numpy())

    predictions = np.concatenate(preds)
    
    return predictions 

In [11]:
predictions = []
# state_0_1_2_3 = ["/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/resnet_1d_weights/resnet1d_fold0_best_version16.pth", "/kaggle/input/resnet-0-1-2-3-weights/resnet1d_fold1_best_version16.pth", "/kaggle/input/resnet-0-1-2-3-weights/resnet1d_fold2_best_version16.pth", "/kaggle/input/resnet-0-1-2-3-weights/resnet1d_fold3_best_version16.pth"]
# state_4_5_6 = ["/kaggle/input/resnet1d-4-5-6-weights/resnet1d_fold4_best_version17.pth", "/kaggle/input/resnet1d-4-5-6-weights/resnet1d_fold5_best_version17.pth", "/kaggle/input/resnet1d-4-5-6-weights/resnet1d_fold6_best_version17.pth"]
# state_7_8_9 = ["/kaggle/input/resnet1d-7-8-9-weights/resnet1d_fold7_best_version17.pth", "/kaggle/input/resnet1d-7-8-9-weights/resnet1d_fold8_best_version17.pth", "/kaggle/input/resnet1d-7-8-9-weights/resnet1d_fold9_best_version17.pth"]
states = [f"/kaggle/input/final-deepmind-comp-dataset/final_deepmind_comp_dataset/wavenet_gru_weights/wavenet_gru_transformer_fold{fold}_best_version9.pth" for fold in range(10)]


for fold, state in enumerate(states):
    model = WAVELT().to(device)
    state = torch.load(state, map_location=device)['model']
    model.load_state_dict(state)
    if CFG.t4_gpu:
        model = nn.DataParallel(multimodal_model)

    prediction = inference(test_loader, model, device)
    predictions.append(prediction)
predictions = np.mean(predictions, axis=0)

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

In [12]:
data_test

Unnamed: 0,event_id,precipitation,location_id,event_idx,event_t,flood_probability
0,id_j7b6sokflo4k_X_0,0.00000,id_j7b6sokflo4k,0,0,0.999170
1,id_j7b6sokflo4k_X_1,3.01864,id_j7b6sokflo4k,0,1,0.999170
2,id_j7b6sokflo4k_X_2,0.00000,id_j7b6sokflo4k,0,2,0.999170
3,id_j7b6sokflo4k_X_3,16.61520,id_j7b6sokflo4k,0,3,0.999170
4,id_j7b6sokflo4k_X_4,2.56706,id_j7b6sokflo4k,0,4,0.999170
...,...,...,...,...,...,...
163515,id_u7z6uy4xdc9w_X_725,0.00000,id_u7z6uy4xdc9w,223,725,0.363692
163516,id_u7z6uy4xdc9w_X_726,0.00000,id_u7z6uy4xdc9w,223,726,0.363692
163517,id_u7z6uy4xdc9w_X_727,0.00000,id_u7z6uy4xdc9w,223,727,0.363692
163518,id_u7z6uy4xdc9w_X_728,0.09807,id_u7z6uy4xdc9w,223,728,0.363692


In [13]:
sample_submission = data_test[["event_id"]]
sample_submission['wavelt_preds'] = predictions
sample_submission.head()

Unnamed: 0,event_id,wavelt_preds
0,id_j7b6sokflo4k_X_0,3.015392e-05
1,id_j7b6sokflo4k_X_1,2.603836e-06
2,id_j7b6sokflo4k_X_2,5.399252e-07
3,id_j7b6sokflo4k_X_3,3.030703e-06
4,id_j7b6sokflo4k_X_4,2.612513e-05


In [14]:
data_test['wavelt_sub_preds'] = predictions
data_test.to_csv('wavelt_final_test_subs.csv')

In [15]:
sample_submission.to_csv(f'wavelt_final_subs.csv', index = False)