# 0 Imports

In [None]:
"""
!pip install pretrainedmodels
!pip install albumentations
!pip install --upgrade efficientnet-pytorch
"""

In [None]:
import math, random, os
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import pandas as pd
import seaborn as sns
import librosa
import sklearn
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torch.nn import init
import torchaudio
from torchaudio import transforms
from torchaudio.transforms import MelSpectrogram
from torchvision.models import resnet34, inception_v3, vgg16_bn
from torchvision.transforms import Resize

#from efficientnet_pytorch import EfficientNet

from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer
from ignite.metrics import Accuracy, Loss, RunningAverage, ConfusionMatrix
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite.handlers.param_scheduler import LRScheduler
from ignite.contrib.handlers.tqdm_logger import ProgressBar
from ignite.contrib.metrics import ROC_AUC

plt.rcParams['figure.facecolor'] = 'white'

print(f"load completed")

In [None]:
!mkdir -p ~/.torch/models
!cp ../input/resnet34/resnet34.pth ~/.torch/models/resnet34-b627a593.pth
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp ../input/resnet34/resnet34.pth /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth

In [None]:
#/root/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth

!mkdir -p ~/.torch/models
!cp ../input/efficientnet-pytorch/efficientnet-b7-dcc49843.pth ~/.torch/models/efficientnet-b7-dcc49843.pth
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp ../input/efficientnet-pytorch/efficientnet-b7-dcc49843.pth /root/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth

In [None]:
#/root/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth

!mkdir -p ~/.torch/models
!cp ../input/efficientnet-pytorch/efficientnet-b0-08094119.pth ~/.torch/models/efficientnet-b7-dcc49843.pth
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp ../input/efficientnet-pytorch/efficientnet-b0-08094119.pth /root/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth

In [None]:
def visualize_history(history, figsize=(20, 10), metric="loss"):
    plt.title(metric.capitalize())
    sns.lineplot(data=history, x=history.index, y=metric, label=metric)
    sns.lineplot(data=history, x=history.index, y="val_"+metric, label="val_"+metric)
    plt.xlabel("epochs")
    plt.tick_params(labelright=True)
    plt.legend()
    plt.grid()

***
# 1 Data loading

In [None]:
KAGGLE_BASE_PATH = "/kaggle/input/birdclef-2022/"

In [None]:
BASE_PATH = KAGGLE_BASE_PATH

## 1.1 Train data

In [None]:
data_train = pd.read_csv(BASE_PATH + "train_metadata.csv")

In [None]:
data_train.info()

In [None]:
data_train.head()

In [None]:
data_train.drop(columns=["type", "scientific_name", "common_name", "license", "url"], inplace=True)

In [None]:
#data_train["time"] = pd.to_datetime(data_train["time"])

In [None]:
data_train.info()

In [None]:
data_train.head()

***
## 1.3 Scored birds data

In [None]:
scored_birds = pd.read_json(BASE_PATH + "scored_birds.json")

In [None]:
scored_birds.info()

In [None]:
scored_birds.head()

In [None]:
LABELS = scored_birds.iloc[:,0].to_list()
LABELS

***
# 2 Data exploration

In [None]:
data_train.info()

In [None]:
data_train.describe()

In [None]:
data_train["rating"].value_counts()

In [None]:
data_train = data_train[(data_train["rating"] >= 1.0) & (data_train["rating"] <= 5.0)]

In [None]:
data_train.info()

In [None]:
data_train["rating"].value_counts()

In [None]:
data_train = data_train[(data_train["primary_label"].isin(scored_birds.iloc[:,0].values)) | (data_train["secondary_labels"].isin(scored_birds.iloc[:,0].values))]

In [None]:
data_train.info()

In [None]:
fig = px.scatter_geo(
    data_train,
    lat="latitude",
    lon="longitude",
    color="primary_label",
    width=1000,
    height=500,
    title="Bird Distribution",
)
fig.show()

***
# 3 Utilities setup

In [None]:
class AudioUtils():
    @staticmethod
    def open(audio_file):
        sig, sr = torchaudio.load(audio_file)
        return (sig, sr)

    @staticmethod
    def rechannel(audio, new_channel):
        sig, sr = audio
        if sig.shape[0] == new_channel:
            return audio
        if new_channel == 1:
            resig = sig[:1, :]
        else:
            resig = torch.cat([sig, sig])
        return resig, sr

    @staticmethod
    def resample(audio, newsr):
        sig, sr = audio
        if sr == newsr:
            return audio
        num_channels = sig.shape[0]
        resig = torchaudio.transforms.Resample(sr, newsr)(sig[:1, :])
        if num_channels > 1:
            retwo = torchaudio.transforms.Resample(sr, newsr)(sig[1:, :])
            resig = torch.cat([resig, retwo])
        return resig, newsr

    @staticmethod
    def pad_trunc(audio, max_ms):
        sig, sr = audio
        num_rows, sig_len = sig.shape
        max_len = sr//1000 * max_ms
        if sig_len > max_len:
            sig = sig[:, :max_len]
        elif sig_len < max_len:
            pad_begin_len = random.randint(0, max_len - sig_len)
            pad_end_len  =max_len - sig_len - pad_begin_len
            pad_begin = torch.zeros((num_rows, pad_begin_len))
            pad_end = torch.zeros((num_rows, pad_end_len))
            sig = torch.cat((pad_begin, sig, pad_end), 1)
        return sig, sr

    @staticmethod
    def time_shift(audio, shift_limit):
        sig, sr = audio
        _, sig_len = sig.shape
        shift_amt = int(random.random() * shift_limit * sig_len)
        return (sig.roll(shift_amt), sr)

    @staticmethod
    def spectrogram(audio, n_mels=64, n_fft=1024, hop_len=None):
        sig, sr = audio
        top_db = 80
        spec = transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
        spec = transforms.AmplitudeToDB(top_db=top_db)(spec)
        return spec

    @staticmethod
    def spectrogram_augment(spectrogram, max_mask_pct=0.1, n_freq_maks=1, n_time_masks=1):
        _, n_mels, n_steps = spectrogram.shape
        mask_value = spectrogram.mean()
        aug_spec = spectrogram
        freq_mask_param = max_mask_pct * n_mels
        for _ in range(n_freq_maks):
            aug_spec = transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)
        time_mask_param = max_mask_pct * n_steps
        for _ in range(n_time_masks):
            aug_spec = transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)
        return aug_spec

***
# 4 Dataset and Dataloader

In [None]:
class TrainSoundDataset(Dataset):
    def __init__(self, df, data_path, label_encoder):
        self.df = df
        self.data_path = str(data_path)
        self.duration = 4000 # ?
        self.sr = 32000
        self.channel = 1 # ?
        self.shift_pct = 0.4
        self.label_encoder = label_encoder
        self.df["label"] = self.label_encoder.transform(df[["primary_label"]])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        audio_file = self.data_path + self.df["filename"].iloc[index]
        y = torch.tensor(self.df["label"].iloc[index], dtype=torch.long)

        audio = AudioUtils.open(audio_file)
        re_aud = AudioUtils.resample(audio, self.sr)
        re_chan = AudioUtils.rechannel(re_aud, self.channel)
        dur_aud = AudioUtils.pad_trunc(re_chan, self.duration)
        shift_aud = AudioUtils.time_shift(dur_aud, self.shift_pct)
        sgram = AudioUtils.spectrogram(shift_aud)
        aug_sgram = AudioUtils.spectrogram_augment(sgram)

        return aug_sgram.cuda(), y.cuda()

In [None]:
label_encoder = OrdinalEncoder()
label_encoder.fit(data_train[["primary_label"]])

In [None]:
BATCH_SIZE = 16
LABELS_COUNT = scored_birds.value_counts().count()
TRAIN_PATH = BASE_PATH + "train_audio/"

In [None]:
data_train.info()

In [None]:
data_train.head()

In [None]:
data_train = data_train[:50]

In [None]:
X_train, X_val, y_train, y_val = train_test_split(data_train.loc[:,"secondary_labels":"filename"], data_train["primary_label"], test_size=0.2)
data_train_split = X_train.join(y_train)
data_val_split = X_val.join(y_val)

In [None]:
train_ds = TrainSoundDataset(data_train_split, TRAIN_PATH, label_encoder)
val_ds = TrainSoundDataset(data_val_split, TRAIN_PATH, label_encoder)

In [None]:
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
train_ds[0]

In [None]:
for i,_ in enumerate(train_ds):
    print(train_ds[i][0][0].shape)
    if i > 20:
        break

In [None]:
val_ds[0]

In [None]:
#test_ds[0]

***
# 5 Neural Network

In [None]:

class AudioClassifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.vgg16 = vgg16_bn()
        self.vgg16.load_state_dict(torch.load("../input/vgg16weight/vgg16_bn-6c64b313.pth"))
        print(self.vgg16.classifier[6].out_features)
        self.vgg16.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        for param in self.vgg16.features.parameters():
            param.require_grad = False
        num_features = self.vgg16.classifier[6].in_features
        features = list(self.vgg16.classifier.children())[:-1]
        features.extend([nn.Linear(num_features, len(LABELS))])
        self.vgg16.classifier = nn.Sequential(*features)
        print(self.vgg16)

    def forward(self, x):
        x = self.vgg16(x)
        return x

model = AudioClassifier()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.cuda()
next(model.parameters()).device

#print(model.effnet)

***
# 6 Training

In [None]:
NUM_EPOCHS = 5

In [None]:
scored_birds.iloc[:,0].count()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
#optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device)

val_metrics = {
    "accuracy": Accuracy(),
    "loss": Loss(loss_fn),
    "conf_matrix": ConfusionMatrix(num_classes=LABELS_COUNT)
}
evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)

training_history = {"accuracy": [], "loss": []}
validation_history = {"accuracy": [], "loss": []}
last_epoch = []

RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

def score_function(engine):
    val_loss = engine.state.metrics["loss"]
    return -val_loss

early_stop_handler = EarlyStopping(patience=10, score_function=score_function, trainer=trainer)
evaluator.add_event_handler(Events.COMPLETED, early_stop_handler)

ProgressBar(persist=True).attach(trainer, ["loss"])
ProgressBar(persist=False).attach(evaluator, ["loss"])

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_dl)
    metrics = evaluator.state.metrics
    accuracy = metrics['accuracy']*100
    loss = metrics['loss']
    last_epoch.append(0)
    training_history['accuracy'].append(accuracy)
    training_history['loss'].append(loss)
    print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
          .format(trainer.state.epoch, accuracy, loss))

def log_validation_results(trainer):
    evaluator.run(val_dl)
    metrics = evaluator.state.metrics
    accuracy = metrics['accuracy']*100
    loss = metrics['loss']
    validation_history['accuracy'].append(accuracy)
    validation_history['loss'].append(loss)
    print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
          .format(trainer.state.epoch, accuracy, loss))

trainer.add_event_handler(Events.EPOCH_COMPLETED, log_validation_results)

@trainer.on(Events.COMPLETED)
def log_confusion_matrix(trainer):
    evaluator.run(val_dl)
    metrics = evaluator.state.metrics
    cm = metrics['conf_matrix']
    cm = cm.numpy()
    cm = cm.astype(int)
    classes = scored_birds.iloc[:,0].values
    fig, ax = plt.subplots(figsize=(10,10))  
    ax= plt.subplot()
    sns.heatmap(cm, annot=True, ax = ax,fmt="d")
    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels') 
    ax.set_title('Confusion Matrix') 
    ax.xaxis.set_ticklabels(classes,rotation=90)
    ax.yaxis.set_ticklabels(classes,rotation=0)

checkpoint_handler = ModelCheckpoint("saved_models", "birdclef", n_saved=2, create_dir=True, save_as_state_dict=True, require_empty=False)
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {"birdclef": model})

In [None]:
trainer.run(train_dl, max_epochs=NUM_EPOCHS)

In [None]:
plt.plot(training_history['loss'],label="Training Loss")
plt.plot(validation_history['loss'],label="Validation Loss")
plt.xlabel('No. of Epochs')
plt.ylabel('Loss')
plt.legend(frameon=False)
plt.show()

In [None]:
plt.plot(training_history['accuracy'],label="Training Accuracy")
plt.plot(validation_history['accuracy'],label="Validation Accuracy")
plt.xlabel('No. of Epochs')
plt.ylabel('Accuracy')
plt.legend(frameon=False)
plt.show()

In [None]:
def fetch_last_checkpoint_model_filename(model_save_path):
    import os
    from pathlib import Path
    checkpoint_files = os.listdir(model_save_path)
    checkpoint_files = [f for f in checkpoint_files if '.pt' in f]
    checkpoint_iter = [
        int(x.split('_')[2].split('.')[0])
        for x in checkpoint_files]
    last_idx = np.array(checkpoint_iter).argmax()
    return Path(model_save_path) / checkpoint_files[last_idx]

#model.load_state_dict(torch.load(fetch_last_checkpoint_model_filename('./saved_models')))
#print("Model Loaded")

***
# 7 Inference

In [None]:
class TestDataset(Dataset):
    def __init__(self, data_dir, meta_df, transform = None):
        super(TestDataset, self).__init__()
        self.data_dir = data_dir
        self.meta_df = meta_df
        self.transform = transform

    def __len__(self):
        return len(self.meta_df)

    def __getitem__(self, index):
        path = self.meta_df.loc[index, "file_id"]
        path = f"{os.path.join(self.data_dir, path)}.ogg"
        time = self.meta_df.loc[index, "end_time"]
        mono_audio = self.load_audio(path, time)
        mono_audio = mono_audio.unsqueeze(dim=0)
        return mono_audio

    def load_audio(self, path, time):
        audio, sample_rate = torchaudio.load(path)
        audio = audio[:, (time-5)*sample_rate: time*sample_rate]
        if self.transform != None:
            for aug in self.transform:
                audio = aug(audio)
        return audio[0,:]

In [None]:
augm = [
    MelSpectrogram(n_mels = 128),
    Resize((128, 128))
]
augm

In [None]:
TEST_PATH = BASE_PATH + "test_soundscapes"
CSV_TEST_PATH = "../input/birdclef-2022/test.csv"

In [None]:
test_df = pd.read_csv(CSV_TEST_PATH)
test_df.head()

In [None]:
test_dataset = TestDataset(TEST_PATH, test_df, transform = augm)

In [None]:
test = test_df.copy()
test["target"] = [False for _ in range(len(test))]
imp_features = ["row_id", "target"]
test = test[imp_features]
test.to_csv("submission.csv", index = False)

In [None]:
BATCH_SIZE = 16
test_dl = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)
prediction = []
with torch.no_grad():
    for index, patch in enumerate(test_dl):
        dev_patch = patch.to(device)
        output = model(dev_patch)
        output = torch.argmax(output, dim=1).tolist()
        prediction += output

test_df["target"] = prediction
test_df["target"] = test_df["target"].apply(lambda x : class_labels[str(x)])
test_df["target"] = test_df["bird"] == test_df["target"]
imp_features = ["row_id", "target"]
test_df = test_df[imp_features]
test_df.to_csv("submission.csv", index = False)

In [None]:
"""
file_list = [f.split('.')[0] for f in sorted(os.listdir(TEST_PATH))]
print('Number of test soundscapes:', len(file_list))
"""

In [None]:
"""
pred = {
  "row_id": [],
  "target": [],
  "true_label": [],
  "pred_label": []
}

files = [f.split('.')[0] for f in sorted(os.listdir(TEST_PATH))]
SCORED_BIRDS_PATH = BASE_PATH + "scored_birds.json"

with open(SCORED_BIRDS_PATH) as bf:
    birds = pd.read_json(bf)
    birds = birds.iloc[:, 0].to_list()

for file in files:
    path = TEST_PATH + file +".ogg"

    sig, sr = AudioUtils().open(path)
    duration = len(sig[0]) / sr
    chunks_nb = math.floor(duration / 5)
    segments = [[] for i in range(chunks_nb)]

    for i in range(len(segments)):
        segment_end = (i+1)*5
        audio_segment = sig[0][i*5*sr:(i+1)*5*sr].cpu().detach().numpy()
        audio_segment = np.array([audio_segment])
        audio_segment = torch.from_numpy(audio_segment)

        for bird in birds:
            re_aud = AudioUtils.resample((audio_segment, sr), sr)
            re_chan = AudioUtils.rechannel(re_aud, 1)
            dur_aud = AudioUtils.pad_trunc(re_chan, 4000)
            spectro = AudioUtils().spectrogram(dur_aud)

            spectro = spectro.cpu().detach().numpy()
            spectro = np.array([spectro])
            spectro = torch.from_numpy(spectro)

            bird_label = label_encoder.transform([[bird]])
            bird_label = torch.tensor(bird_label, dtype=torch.long)

            with torch.no_grad():
                output = model(spectro.cuda())

            output = torch.argmax(output, dim=1).cpu().tolist()

            pred_label = birds[output[0]]
            target = bird == pred_label

            row_id = file + '_' + bird + '_' + str(segment_end)

            pred["row_id"].append(row_id)
            pred["target"].append(target)
            pred["true_label"].append(bird)
            pred["pred_label"].append(pred_label)
"""

In [None]:
"""
submission_enhanced = pd.DataFrame(data=pred)
submission_enhanced[:40]
"""

In [None]:
"""
submission = submission_enhanced[["row_id", "target"]]
submission
"""

In [None]:
#submission.to_csv("submission.csv", index = False)

In [None]:
#submission.isna().any()

In [None]:
"""
sample_submission = pd.read_csv(BASE_PATH + "sample_submission.csv")
sample_submission.head()
"""

In [None]:
#submission.info()

In [None]:
#sample_submission.info()