# https://www.kaggle.com/fffrrt/all-in-one-rfcx-baseline-for-beginners

# TODO

* [X] K-fold
* [ ] Ensembles
* [ ] More data
* [X] Input data preparation
* [ ] Data Augmentation
* [ ] LWRAP
* [ ] Logging to the file.

* I'm normalizing the samples globally, with respect to the entire training set.
* https://www.kaggle.com/c/rfcx-species-audio-detection/discussion/207624
* Check if there are any other labels in the current range (t_min-t_max)

In [None]:
save_to_disk = 0

In [None]:
import os
import csv
import time
import pickle
from pathlib import Path
from tqdm import tqdm
import librosa

from PIL import Image

import numpy as np
import pandas as pd
import cv2
from skimage.transform import resize

import matplotlib.pyplot as plt

from IPython.display import FileLink # , Image

In [None]:
!mkdir ../working/bmps

In [None]:
BASE_TRAIN_PATH = "../input/rfcx-species-audio-detection/train/"
BASE_TEST_PATH = "../input/rfcx-species-audio-detection/test/"

BASE_BMP_DIR = "../working/bmps/"
fft = 2048
hop = 512
sr = 48000
length = 10 * sr

fmin = 24000
fmax = 0

In [None]:
train_tp_fn = "../input/rfcx-species-audio-detection/train_tp.csv"
df_tr_tp = pd.read_csv(train_tp_fn)

In [None]:
fmin, fmax = df_tr_tp.f_min.min(), df_tr_tp.f_max.max()
fmin, fmax

In [None]:
fmin = int(fmin * 0.9)
fmax = int(fmax * 1.1)
fmin, fmax

In [None]:
total_rows = len(df_tr_tp)
total_rows

In [None]:
def normalize_melspec(X: np.ndarray):
    eps = 1e-6
    mean = X.mean()
    X = X - mean
    std = X.std()
    Xstd = X / (std + eps)
    norm_min, norm_max = Xstd.min(), Xstd.max()
    if (norm_max - norm_min) > eps:
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V


In [None]:

# melspec = librosa.feature.melspectrogram(
#     y_batch, sr=sr, **self.melspectrogram_parameters)
# pcen = librosa.pcen(melspec, sr=sr, **self.pcen_parameters)
# clean_mel = librosa.power_to_db(melspec ** 1.5)
# melspec = librosa.power_to_db(melspec)

# norm_melspec = normalize_melspec(melspec)
# norm_pcen = normalize_melspec(pcen)
# norm_clean_mel = normalize_melspec(clean_mel)
# image = np.stack([norm_melspec, norm_pcen, norm_clean_mel], axis=-1)
# height, width, _ = image.shape
# image = cv2.resize(image, (int(width * 224 / height), 224))
# image = np.moveaxis(image, 2, 0)
# image = (image / 255.0).astype(np.float32)

# images.append(image)
# images = np.asarray(images).astype(np.float32)

In [None]:
pcen_parameters = {
    "gain": 0.98,
    "bias": 2,
    "power": 0.5,
    "time_constant": 0.4,
    "eps": 0.000001
}

In [None]:
for row in df_tr_tp.itertuples():

    wav, sr = librosa.load(f"{BASE_TRAIN_PATH}{row.recording_id}.flac", sr=None)
    
    t_min = float(row.t_min) * sr
    t_max = float(row.t_max) * sr
    
    center = np.round((t_min + t_max) / 2)
    beginning = center - length / 2
    if beginning < 0:
        beginning = 0
        
    ending = beginning + length
    if ending > len(wav):
        ending = len(wav)
        beginning = ending - length
        
    slic = wav[int(beginning): int(ending)]
    mel_spec = librosa.feature.melspectrogram(slic, sr=sr, fmin=fmin, fmax=fmax, n_mels=128)
    pcen = librosa.pcen(mel_spec, sr=sr, **pcen_parameters)
    clean_mel = librosa.power_to_db(mel_spec ** 1.5)
#     mel_spec = librosa.power_to_db(mel_spec)
    norm_mel_spec = normalize_melspec(mel_spec)
    norm_pcen = normalize_melspec(pcen)
    norm_clean_mel = normalize_melspec(clean_mel)
    image = np.stack([norm_mel_spec, norm_pcen, norm_clean_mel], axis=-1)
    height, width, _ = image.shape
    image = cv2.resize(image, (int(width * 224 / height), 224))
#     image = (image).astype(np.float32)
#     mel_spec = resize(mel_spec, (224, 400))
#     mel_spec = mel_spec - np.min(mel_spec)
#     mel_spec = mel_spec / np.max(mel_spec)
#     mel_spec = mel_spec * 255
#     mel_spec = np.round(mel_spec)
#     mel_spec = mel_spec.astype("uint8")
#     print(mel_spec.shape)
#     mel_spec = np.asarray(mel_spec)
# #     break
    bmp = Image.fromarray(image)
    
    bmp.save(f"{BASE_BMP_DIR}{row.recording_id}_{row.species_id}_{str(int(center))}.bmp")
    if row.Index % 100 == 0 and row.Index > 0:
        print(f"Processed {str(row.Index)} train examples from {total_rows}")
    
    

In [None]:
import os
import random
import torch

num_birds = 24
batch_size = 16
SEED = 42

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
set_seed(SEED)

In [None]:
from torch.utils.data import Dataset, DataLoader


class RainforestDataset(Dataset):
    
    def __init__(self, filelist):
        
        self.filelist = filelist
        self.labels = []
        for file in filelist:
            label = int(file.split('_')[1])
            label_array = np.zeros(num_birds, dtype=np.single)
            label_array[label] = 1.0
            self.labels.append(label_array)
            
    
    def __len__(self):
        return len(self.labels)
    
    
    def __getitem__(self, idx):
        
        current_filename = self.filelist[idx]
        img = Image.open(current_filename)
        mel_spec = np.array(img)
        img.close()
        
#         image = np.moveaxis(image, 2, 0)
#         image = (image / 255.0).astype(np.float32)

#         images.append(image)
#         images = np.asarray(images).astype(np.float32)
        mel_spec = np.moveaxis(mel_spec, 2, 0)
        mel_spec = (mel_spec / 255.0).astype(np.float32)
#         mel_spec = mel_spec / 255.0
#         mel_spec = np.stack((mel_spec, mel_spec, mel_spec))
        return mel_spec, self.labels[idx]



In [None]:
file_list = []
label_list = []
for file in Path(BASE_BMP_DIR).iterdir():
    
    file_list.append(file.as_posix())
    label = str(file).split('_')[1]
    label_list.append(label)

In [None]:
# {
#     "0": {
#         "train_idx": [1, 2, 3],
#         "valid_idx": [4, 5, 6]
#     },
#     "1": {
#         "train_idx": [2, 3, 5],
#         "valid_idx": [1, 4, 6]
#     },
#     ...
# }

In [None]:
from sklearn.model_selection import StratifiedKFold

# TODO: Make n_splits is defined as constant at the beginning of the notebook.
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
fold_indices = {}

for fold_id, (train_idx, valid_idx) in enumerate(skf.split(file_list, label_list)):
    
    fold_indices[fold_id] = {}
    fold_indices[fold_id]["train_idx"] = np.take(file_list, train_idx)
    fold_indices[fold_id]["valid_idx"] = np.take(file_list, valid_idx)
    
    # if fold_id == 0:
    #     train_files = np.take(file_list, train_idx)
    #     valid_files = np.take(file_list, valid_idx)

In [None]:
# dataset = RainforestDataset(fold_indices[0]["train_idx"])
# data_loader = DataLoader(dataset, batch_size=2, shuffle=False)

# for data, target in data_loader:
#     print()
#     break

In [None]:
!pip install resnest > /dev/null

In [None]:
import torch.nn as nn
from resnest.torch import resnest50

In [None]:
N_EPOCHS = 60

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

In [None]:
def save_losses_plot(experiment_dir, fold_id, train_losses, valid_losses):
    
    plt.plot(train_losses, label=f"fold_{fold_id}_train_loss")
    plt.plot(valid_losses, label=f"fold_{fold_id}_valid_loss")
    plt.legend(loc="best")
    plt.savefig(experiment_dir / f"fold_{fold_id}_losses.png")
    plt.close()
    # plt.show()

In [None]:
def train_fold(experiment_dir, fold_id, model, loss_fn, optimizer, scheduler, train_data_loader, valid_data_loader):
    
    # TODO: Calculate the correct counts.
    best_corrects = 0
    best_val_loss = float("inf")
    train_losses = []
    valid_losses = []
    
    train_corrects = []
    valid_corrects = []
    
    train_start_time = time.time()

    for epoch in range(N_EPOCHS):

        train_loss = []
        valid_loss = []
        # train_corr = []

        single_train_epoch_start = time.time()
        # Single epoch train.
        for idx, (data, target) in enumerate(train_data_loader):

            data = data.float() # float64 to float32
            data = data.to(DEVICE)
            target = target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()
#             # Stats.
#             _, answers = torch.max(output, 1)
#             _, targets = torch.max(target, 1)
#             corrects = 0
            
#             for i in range(0, len(answers)):
#                 if answers[i] == targets[i]:
#                     corrects = corrects + 1
#             train_corrects.append(corrects)
            
            train_loss.append(loss.item())

        current_train_loss = sum(train_loss) / len(train_loss)
        train_losses.append(current_train_loss)
        total_valid_ = 0
        # Validation.
        with torch.no_grad():
            
            valid_corrects = []
            
            for idx, (data, target) in enumerate(valid_data_loader):

                data = data.float()
                data = data.to(DEVICE)
                target = target.to(DEVICE)

                output = model(data)
                loss = loss_fn(output, target)
                valid_loss.append(loss.item())
                # Stats.
                _, answers = torch.max(output, 1)
                _, targets = torch.max(target, 1)
                corrects = 0

                for i in range(0, len(answers)):
                    if answers[i] == targets[i]:
                        corrects = corrects + 1
                valid_corrects.append(corrects)
                total_valid_ += len(answers)
                
        current_valid_loss = sum(valid_loss) / len(valid_loss)
        valid_losses.append(current_valid_loss)
        
        took_single_epoch_train = time.time() - single_train_epoch_start
        
        for g in optimizer.param_groups:
            lr = g["lr"]
            
        print(f"{epoch+1}/{N_EPOCHS}. Train_loss: {current_train_loss:.5f} Valid_loss: {current_valid_loss:.5f} LR: {str(lr)} Took: {took_single_epoch_train:.3f} secs.")
        
        if current_valid_loss < best_val_loss:

            print(f"Loss improved from {best_val_loss:.5f} to {current_valid_loss:.5f}. Saving the model...")
            torch.save(model, experiment_dir / f"best_model_fold_{fold_id}.pth")
            best_val_loss = current_valid_loss
        
        if sum(valid_corrects) > best_corrects:
            print(f"Correct count increased from {best_corrects} to {sum(valid_corrects)} out of {total_valid_}. Saving the model...")
            torch.save(model, experiment_dir / f"correct_best_model_fold_{fold_id}.pth")
            best_corrects = sum(valid_corrects)
            
        # Scheduler update.
        scheduler.step()

        # print("=" * 50)

    total_train_time_in_sc = time.time() - train_start_time
    print(f"Training fold {fold_id} took {total_train_time_in_sc:.3f} secs.")
    
    return train_losses, valid_losses


In [None]:
def generate_resnest_50(pretrained):
    model = resnest50(pretrained=True)

    model.fc = nn.Sequential(
        nn.Linear(2048, 1024),
        nn.ReLU(),
        nn.Dropout(p=0.3),
        nn.Linear(1024, 1024),
        nn.ReLU(),
        nn.Dropout(p=0.3),
        nn.Linear(1024, num_birds)
    )
    return model

In [None]:
def train():
    
    N_FOLDS = 5
    EXPERIMENT_DIR = Path("resnest50_v2_image_features_updated_adam_e_60_d_0.3")
    EXPERIMENT_DIR.mkdir()
    set_seed(SEED)
    

    for fold_id in range(N_FOLDS):
        
        print(f"{'=' * 20} Training fold: {fold_id} {'=' * 20}")
        
        train_files = fold_indices[fold_id]["train_idx"]
        valid_files = fold_indices[fold_id]["valid_idx"]
        train_dataset = RainforestDataset(train_files)
        valid_dataset = RainforestDataset(valid_files)

        train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
        
        model = generate_resnest_50(pretrained=True)
#         optimizer = torch.optim.SGD(model.parameters(), 
#                             lr=0.01, weight_decay=0.0001, momentum=0.9)
#         scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
        
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    
        pos_weights = torch.ones(num_birds)
        pos_weights = pos_weights * num_birds
        loss_fn = nn.BCEWithLogitsLoss() # pos_weight=pos_weights
            
        model = model.to(DEVICE)
        loss_fn = loss_fn.to(DEVICE)
        # (fold_id, model, loss_fn, optimizer, scheduler, train_data_loader, valid_data_loader):
        fold_train_losses, fold_valid_losses = train_fold(EXPERIMENT_DIR, fold_id, model, loss_fn, optimizer, scheduler, train_data_loader, valid_data_loader)
        
        # Saving the loss history to file.
        with open(EXPERIMENT_DIR / f"fold_{fold_id}_train_losses.pkl", "wb") as f:
            pickle.dump(fold_train_losses, f)
        
        with open(EXPERIMENT_DIR / f"fold_{fold_id}_valid_losses.pkl", "wb") as f:
            pickle.dump(fold_valid_losses, f)
        
        save_losses_plot(EXPERIMENT_DIR, fold_id, fold_train_losses, fold_valid_losses)
        
        print(f"{'=' * 20} DONE Training fold: {fold_id} {'=' * 20}")

In [None]:
train()

In [None]:
from IPython.display import Image as ImageDisplay

In [None]:
ImageDisplay("resnest50_v2_image_features_updated_adam_e_60_d_0.3/fold_0_losses.png")

In [None]:
ImageDisplay("resnest50_v2_image_features_updated_adam_e_60_d_0.3/fold_1_losses.png")

In [None]:
ImageDisplay("resnest50_v2_image_features_updated_adam_e_60_d_0.3/fold_2_losses.png")

In [None]:
ImageDisplay("resnest50_v2_image_features_updated_adam_e_60_d_0.3/fold_3_losses.png")

In [None]:
ImageDisplay("resnest50_v2_image_features_updated_adam_e_60_d_0.3/fold_4_losses.png")

In [None]:
!ls resnest50_v2_image_features_updated_adam_e_60_d_0.3/

In [None]:
!zip -r resnest50_v2_image_features_updated_adam_e_60_d_0.3.zip resnest50_v2_image_features_updated_adam_e_60_d_0.3

In [None]:
FileLink("resnest50_v2_image_features_updated_adam_e_60_d_0.3.zip")

In [None]:
# # TODO: Calculate the correct counts.
# best_corrects = 0
# best_val_loss = float("inf")
# train_losses = []
# valid_losses = []

# train_start_time = time.time()
# print(f"{'='*10} Training started {'='*10}")
# for epoch in range(N_EPOCHS):
    
#     train_loss = []
#     valid_loss = []
#     # train_corr = []
    
#     single_train_epoch_start = time.time()
#     # Single epoch train.
#     for idx, (data, target) in enumerate(train_data_loader):

#         data = data.float() # float64 to float32
#         data = data.to(DEVICE)
#         target = target.to(DEVICE)
        
#         optimizer.zero_grad()
#         output = model(data)
#         loss = loss_fn(output, target)
#         loss.backward()
#         optimizer.step()
#         # _, answers = torch.max(output, 1)
#         # _, targets = torch.max(target, 1)
#         train_loss.append(loss.item())
    
#     current_train_loss = sum(train_loss) / len(train_loss)
#     train_losses.append(current_train_loss)
#     took_single_epoch_train = time.time() - single_train_epoch_start
    
#     for g in optimizer.param_groups:
#         lr = g["lr"]
#         print(f"Epoch: {epoch} training done. LR: {str(lr)} Loss: {current_train_loss:.5f} Took: {took_single_epoch_train:.3f} secs.")
    
#     single_valid_start = time.time()
#     # Validation.
#     with torch.no_grad():
        
#         for idx, (data, target) in enumerate(valid_data_loader):
            
#             data = data.float()
#             data = data.to(DEVICE)
#             target = target.to(DEVICE)
            
#             output = model(data)
#             loss = loss_fn(output, target)
#             valid_loss.append(loss.item())
            
#     current_valid_loss = sum(valid_loss) / len(valid_loss)
#     valid_losses.append(current_valid_loss)
#     took_single_epoch_valid = time.time() - single_valid_start
#     print(f"Epoch: {epoch} validation done. LR: {str(lr)} Valid Loss: {current_valid_loss:.5f} Took: {took_single_epoch_valid:.3f} secs.")
    
#     if current_valid_loss < best_val_loss:
        
#         print(f"Loss improved from {best_val_loss:.5f} to {current_valid_loss:.5f}. Saving the model...")
#         torch.save(model, "best_model.pth")
#         best_val_loss = current_valid_loss
            
#     # Scheduler update.
#     scheduler.step()
    
#     print("=" * 50)
    
# total_train_time_in_sc = time.time() - train_start_time
# print(f"{'='*10} Training ended {'='*10}")
# print(f"Training took {total_train_time_in_sc} secs.")

In [None]:
import matplotlib.pyplot as plt

In [None]:
def load_test_file(filename):
    
    wav, sr = librosa.load(filename, sr=None)
    segments = len(wav) / length
    segments = int(np.ceil(segments))
    
    mel_array = []
    
    for i in range(segments):
        
        if (i + 1) * length > len(wav):
            slic = wav[len(wav) - length: len(wav)]
        else:
            slic = wav[i * length: (i+1) * length]
            
        mel_spec = librosa.feature.melspectrogram(slic, sr=sr, fmin=fmin, fmax=fmax, n_mels=128)
        pcen = librosa.pcen(mel_spec, sr=sr, **pcen_parameters)
        clean_mel = librosa.power_to_db(mel_spec ** 1.5)
        norm_mel_spec = normalize_melspec(mel_spec)
        norm_pcen = normalize_melspec(pcen)
        norm_clean_mel = normalize_melspec(clean_mel)
        image = np.stack([norm_mel_spec, norm_pcen, norm_clean_mel], axis=-1)
        height, width, _ = image.shape
        image = cv2.resize(image, (int(width * 224 / height), 224))
        
        image = np.moveaxis(image, 2, 0)
        image = (image / 255.0).astype(np.float32)
        mel_array.append(image)
        #         mel_spec = librosa.feature.melspectrogram(slic, n_fft=fft, 
#                                                   hop_length=hop, sr=sr, 
#                                                   fmin=fmin, fmax=fmax, power=1.5)
#         mel_spec = resize(mel_spec, (224, 400))
#         mel_spec = mel_spec - np.min(mel_spec)
#         mel_spec = mel_spec / np.max(mel_spec)
#         mel_spec = np.stack((mel_spec, mel_spec, mel_spec))
#         mel_array.append(mel_spec)

    return mel_array
        

In [None]:
# torch.save(model, "last_epoch.pth")

# model = resnest50(pretrained=True)

# model.fc = nn.Sequential(
#     nn.Linear(2048, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, num_birds)
# )

# model = torch.load("../working/best_model.pth")
# model.eval()
# model.cuda()

In [None]:
# with open("submission.csv", "w", newline="") as f:
#     pass

columns = ['recording_id','s0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11',
                               's12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23']

In [None]:
k_fold_models = []

for fold_id in range(5):
    
    # model = generate_resnest_50(False)
    model = torch.load(f"../working/resnest50_v2_image_features_updated_adam_e_60_d_0.3/best_model_fold_{fold_id}.pth")
    model.eval()
    model.to(DEVICE)
    k_fold_models.append(model)

In [None]:
k_fold_models_correct = []

for fold_id in range(5):
    
    # model = generate_resnest_50(False)
    model = torch.load(f"../working/resnest50_v2_image_features_updated_adam_e_60_d_0.3/correct_best_model_fold_{fold_id}.pth")
    model.eval()
    model.to(DEVICE)
    k_fold_models_correct.append(model)

In [None]:
test_files = list(Path(BASE_TEST_PATH).iterdir())
submission_rows = []
total_test_files = len(test_files)
submission_rows2 = []

for idx, test_file in enumerate(tqdm(test_files)):
    
    data = load_test_file(test_file.as_posix())
    data = torch.tensor(data)
    data = data.float()
    data = data.to(DEVICE)
    
    prediction = None
    # Making the predictions with the kfold models.
    for model in k_fold_models:

        output = model(data)
        maxed_output = torch.max(output, dim=0)[0]
        maxed_output = maxed_output.cpu().detach()
        if prediction is None:
            prediction = maxed_output
        else:
            prediction += maxed_output
        
    prediction2 = None
    for model in k_fold_models_correct:
        output = model(data)
        maxed_output = torch.max(output, dim=0)[0]
        maxed_output = maxed_output.cpu().detach()
        if prediction2 is None:
            prediction2 = maxed_output
        else:
            prediction2 += maxed_output
            
    
    file_id = test_file.name.split(".")[0]
    current_row = [file_id]
    for pred in prediction:
        current_row.append(pred.item())
    
    submission_rows.append(current_row)
    
    
    current_row2 = [file_id]
    for pred in prediction2:
        current_row2.append(pred.item())
    
    submission_rows2.append(current_row2)

In [None]:
pd.DataFrame(submission_rows, columns=columns).to_csv("submission.csv", index=False)
FileLink("submission.csv")

In [None]:
from IPython.display import FileLink

In [None]:
FileLink("submission.csv")

In [None]:
pd.DataFrame(submission_rows2, columns=columns).to_csv("submission.csv", index=False)
FileLink("submission.csv")

In [None]:
TEST_AUDIO_ROOT = Path("../input/rfcx-species-audio-detection/test")
TEST_MFCC_ROOT = "../input/kkiller-rfcx-test-mfcc-1-0400/test_mfcc_d10_s10_sr32000"

data = pd.DataFrame({
    "recording_id": [path.stem for path in Path(TEST_AUDIO_ROOT).glob("*.flac")],
})
data["mfcc_root"] = TEST_MFCC_ROOT
print(data.shape)
data.head()

In [None]:
TEST_MFCC_ROOTs = [
    "../input/kkiller-rfcx-test-mfcc-0000-0400/test_mfcc_d10_s2_sr32000_0000_0400",
    "../input/kkiller-rfcx-test-mfcc-0400-0800/test_mfcc_d10_s2_sr32000_0400_0800",
    "../input/kkiller-rfcx-test-mfcc-0800-1200/test_mfcc_d10_s2_sr32000_0800_1200",
    "../input/kkiller-rfcx-test-mfcc-1200-1600/test_mfcc_d10_s2_sr32000_1200_1600",
    "../input/kkiller-rfcx-test-mfcc-1600-2000/test_mfcc_d10_s2_sr32000_1600_2000",
]

In [None]:
mfccs = []
for mfcc_root in TEST_MFCC_ROOTs:
    mfccs += [(mfcc.stem, mfcc.parent.as_posix()) for mfcc in Path(mfcc_root).glob("*.npy")]
mfccs = pd.DataFrame(mfccs, columns = ["recording_id", 'mfcc_root'])

data = data[["recording_id"]].merge(mfccs, on="recording_id")
print(data.shape)
data.head()

In [None]:
NUM_CLASSES = 24
SR = 32_000
DURATION =  10
STRIDE = 5


# Neural Net
TEST_BATCH_SIZE = 5
TEST_NUM_WORKERS = 2

USE_PRE_COMPUTED_MFCC = True

In [None]:
class SimpleRFCXDataset(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        record_id_path = Path(row.mfcc_root).joinpath(row.recording_id).with_suffix(".npy")
        image = np.load(record_id_path)
        return image
    
    def __len__(self):
        return len(self.data)

In [None]:
test_data = SimpleRFCXDataset(data) if (USE_PRE_COMPUTED_MFCC and TEST_MFCC_ROOT) else RFCXDataset(data=data, sr=SR)
test_loader = DataLoader(test_data, batch_size=TEST_BATCH_SIZE, num_workers=TEST_NUM_WORKERS)

In [None]:
import gc
gc.collect()

In [None]:
preds = []
# net.eval()
with torch.no_grad():
    for xb in  tqdm(test_loader):
        bsize, nframes = xb.shape[:2]
        xb = xb.to(DEVICE).view(bsize*nframes, *xb.shape[2:])

        pred = 0.
        for net in k_fold_models:
            o = net(xb)
            o = torch.sigmoid(o)
            o = o.view(bsize, nframes, *o.shape[1:]).max(1).values
            o = o.detach().cpu().numpy()

            pred += o
        
        pred /= len(k_fold_models)
        
        preds.append(pred)
preds = np.vstack(preds)
preds.shape

In [None]:
sub = pd.DataFrame(preds, columns=[f"s{i}" for i in range(24)])
sub["recording_id"] = data["recording_id"].values[:len(sub)]
sub = sub[["recording_id"] + [f"s{i}" for i in range(24)]]
print(sub.shape)
sub.head()

In [None]:
sub["recording_id"] = data["recording_id"].values[:len(sub)]
sub = sub[["recording_id"] + [f"s{i}" for i in range(24)]]
print(sub.shape)
sub.head()

In [None]:
sub.to_csv("submission.csv", index=False)

In [None]:
FileLink("submission.csv")