In [1]:
import sys
sys.path.append("/media/paniquex/samsung_2tb/IDAO_2021_oski/src")

from collections import defaultdict
import os
import random

import numpy as np
import pandas as pd
import yaml
import shutil
from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader
from torch import nn
import torch


import audiomentations
from torchlibrosa.stft import Spectrogram, LogmelFilterBank
from torchlibrosa.augmentation import SpecAugmentation

from transformers import get_linear_schedule_with_warmup

from datasets import SimpleDataset
from preprocessing import CMVN, MelSpecComputer, MFCCComputer, MelSpecComputer3D
from models import Wrapper
from pipeline_utils import evaluate_test
from models import ENCODER_PARAMS


os.chdir("/media/paniquex/samsung_2tb/")

In [2]:
PATH_TO_CFG = "/media/paniquex/samsung_2tb/IDAO_2021_oski/experiments/check_experiment/config.yaml"
with open(PATH_TO_CFG, "r") as file:
    config = yaml.load(file)

DATA_ROOT = config["general"]["data_root"]

def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

fix_seed(config["general"]["seed"])
device_ids = [str(id) for id in config["general"]["device_ids"]]
ids = ",".join(device_ids)
DEVICE = torch.device(f"cuda:{ids}")

  This is separate from the ipykernel package so we can avoid doing imports until


In [3]:
model_names = [name for name in os.listdir(config['general']['out_path']) if name.find("best_model_fold") != -1]

In [4]:
import albumentations
from albumentations import *
from albumentations.pytorch import ToTensorV2

transforms_test = albumentations.Compose([
    Resize(384, 384),
    Normalize(
         mean=[0.485, 0.456, 0.406],
         std=[0.229, 0.224, 0.225],
     ),
    ToTensorV2()
])

In [5]:
test = pd.read_csv(os.path.join(DATA_ROOT, "test.csv"))


test_dataset = SimpleDataset(df=test, mode="test", transform=transforms_test)

test_dataloader = DataLoader(test_dataset, **config["testing"]["dataloader"])

In [6]:
# spectrogram_extractor = Spectrogram(**config["preprocessing"]["spectrogram"])
# logmel_extractor = LogmelFilterBank(sr=config["preprocessing"]["sr"],
#                                     **config["preprocessing"]["logmel"])

# if config["preprocessing"]["features_type"] == "logmel":
#     spectrogram_extractor = Spectrogram(**config["preprocessing"]["spectrogram"])
#     logmel_extractor = LogmelFilterBank(sr=config["preprocessing"]["sr"],
#                                         **config["preprocessing"]["logmel"])
# elif config["preprocessing"]["features_type"] == "melspec":
#     melspec_extractor = MelSpecComputer(config=config)
# elif config["preprocessing"]["features_type"] == "mfcc":
#     mfcc_extractor = MFCCComputer(config=config)
# elif config["preprocessing"]["features_type"] == "3D":
#     melspec_extractor = MelSpecComputer3D(config=config)

# # Spec augmenter
# if "SpecAug" in config["testing"]["augmentations"]:
#     spec_augmenter = SpecAugmentation(**config["training"]["augmentations"]["SpecAug"])
# else:
#     spec_augmenter = None

# if config["preprocessing"]["use_cmvn"]:
#     cmvn = CMVN(2)
# else:
#     cmvn = None


In [7]:
if config["general"]["task_type"] == "regression":
    config["general"]["classes_num"] = 1

In [8]:
model_name = config["general"]["model_name"]
model = ENCODER_PARAMS[model_name]["init_op"]()
# if config["preprocessing"]["features_type"] == "logmelfilter":
#     feat_module = [spectrogram_extractor]
#     if cmvn is not None:
#         feat_module.append(cmvn)
#     feat_module.append(logmel_extractor)
# elif config["preprocessing"]["features_type"] == "melspec":
#     feat_module = [melspec_extractor]
#     if cmvn is not None:
#         feat_module.append(cmvn)
# elif config["preprocessing"]["features_type"] == "mfcc":
#     feat_module = [mfcc_extractor]
#     if cmvn is not None:
#         feat_module.append(cmvn)
# elif config["preprocessing"]["features_type"] == "3D":
#     feat_module = [melspec_extractor]
#     if cmvn is not None:
#         feat_module.append(cmvn)
        
if config["training"]["loss"] == "AAM":
    criterion_aam = AngularPenaltySMLoss
else:
    criterion_aam = None
model = Wrapper(model, feat_module=None, classes_num=config["general"]["classes_num"],
                    model_name=model_name,
                spec_augmenter=None, 
                mixup_module=None,
                task_type=config["general"]["task_type"],
                SED=config["general"]["SED"],
                activation_func=config["training"]["activation_func"],
                criterion_aam=criterion_aam)
# if config["testing"]["state_dict"] is not None:
#     model.load_state_dict(torch.load(config["testing"]["state_dict"],
#                                      map_location=torch.device(DEVICE))['model_state_dict'])
model.to(DEVICE);

In [9]:
try:
    os.mkdir(config["general"]["out_path"])
except:
    pass

sample2preds = None

with torch.no_grad():
    for model_name in model_names:
        model.load_state_dict(torch.load(os.path.join(config["general"]["out_path"], model_name),
                                    map_location=torch.device(DEVICE))['model_state_dict'])
        if sample2preds is None:
            sample2preds = evaluate_test(model=model, dataloader=test_dataloader,
                          DEVICE=DEVICE, config=config)
        else:
            sample2preds_new = evaluate_test(model=model, dataloader=test_dataloader,
                          DEVICE=DEVICE, config=config)
            for sample in sample2preds:
                sample2preds[sample] += sample2preds_new[sample]

            


100%|██████████| 518/518 [02:34<00:00,  3.35it/s]
100%|██████████| 518/518 [02:31<00:00,  3.41it/s]
100%|██████████| 518/518 [02:29<00:00,  3.47it/s]
100%|██████████| 518/518 [02:30<00:00,  3.45it/s]
100%|██████████| 518/518 [02:31<00:00,  3.41it/s]


In [13]:
preds = pd.read_csv('/media/paniquex/samsung_2tb/IDAO_2021_oski/data/track1_predictions_example.csv')


for sample in sample2preds:
    
    sample_short = sample.split("/")[-1][:-4]
    if config["general"]["task_type"] == "regression":
        preds.loc[preds["id"] == sample_short, "regression_predictions"] = np.mean(sample2preds[sample])
    elif config["general"]["task_type"] == "classification":
        preds.loc[preds["id"] == sample_short, "classification_predictions"] = np.argmax(np.bincount(sample2preds[sample]))

In [14]:
preds.to_csv(f"predictions_{config['general']['task_type']}.csv", index=False)

In [34]:
preds_classif = (pd.read_csv(f"predictions_classification.csv")["classification_predictions"].values < 3).astype(int)
preds_regr = pd.read_csv(f"predictions_regression.csv")["regression_predictions"].values

In [35]:
preds["classification_predictions"] = preds_classif
preds["regression_predictions"] = preds_regr
preds.to_csv(f"predictions.csv", index=False)

In [36]:
train = pd.read_csv(DATA_ROOT + "/train/train_folds_classification.csv")

In [29]:
train[(train["0"] == "NR")]["target"].unique()

array([5, 4, 3])

In [None]:
# preds_csv = pd.read_csv(os.path.join(config["general"]["data_root"], 'sample_submission.csv'), index_col=0)
# for sample in sample2preds:
#     preds = np.vstack(sample2preds[sample])
#     if config["general"]["use_silence_class"]:
#         silence_mask = np.argmax(pred, axis=1) == 24 #preds[:, -1] > 0.2
#         preds[silence_mask, :] = 0
#     preds_csv.loc[sample] = np.max(preds, axis=0)[:24] # [:24] to exclude silence class
# preds_csv.to_csv(os.path.join(config["general"]["out_path"], 'submission_ensemble_max.csv'), index='recording_id')


# for sample in sample2preds:
#     preds = np.vstack(sample2preds[sample])
#     if config["general"]["use_silence_class"]:
#         silence_mask = np.argmax(preds, axis=1) == 24
#         pred[silence_mask, :] = 0
#     preds_csv.loc[sample] = np.mean(preds, axis=0)[:24] # [:24] to exclude silence class

# preds_csv.to_csv(os.path.join(config["general"]["out_path"], 'submission_ensemble_mean.csv'), index='recording_id')

In [None]:
!ls './rfcx_kaggle_git/RFCX_kaggle/experiments/logs1_features=melspec_mr_SR=32_CMVN=F_mean_model=tf_effnet_b0_Pretrained=T_SED=T_aggr=mean_act_func=Mish_criterion=FOCAL_Balanced=T_length=6_n_classes=24_optim=ranger_sched=cosine_lr=1e-3_SpecAug=F_MixUp=True_alpha=16_Gain=T_n_mels=256/'

In [None]:
!ls /media/paniquex/samsung_2tb/rfcx_kaggle/rfcx-species-audio-detection

In [None]:
!ls

In [None]:
!nvidia-smi

In [None]:
data = pd.read_csv(os.path.join(config["general"]["data_root"], 'train_tp.csv'), index_col=0)

In [None]:
data["time_diff"] = data["t_max"] - data["t_min"]

In [None]:
data["time_diff"].describe()

In [None]:
data[data["time_diff"] < 0.6]["time_diff"].hist(bins=20)