In [None]:
!cp -R /kaggle/input/mpg123 .

In [None]:
!dpkg -i mpg123/libaudio2_1.9.4-6_amd64.deb
!dpkg -i mpg123/libportaudio2_19.6.0-1_amd64.deb
!dpkg -i mpg123/libout123-0_1.25.10-1_amd64.deb 
!dpkg -i mpg123/mpg123_1.25.10-1_amd64.deb

In [None]:
!cp -R /kaggle/input/efficientnetpytorch .

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torchvision
import torchvision.models as models
import torchvision.transforms as T
import librosa
import scipy
import os
from efficientnetpytorch.efficientnet_pytorch.model import EfficientNet

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
input_path = '/kaggle/input/'

In [None]:
class MyModel(nn.Module):

    def __init__(self):
        super(MyModel, self).__init__()
        # Convert 1 channel to 3 channel to be able to send to resnet18
        self.conv1 = nn.Conv2d(1, 3, kernel_size=3, padding=1)
        self.base_model = EfficientNet.from_name('efficientnet-b2')
        self.fc2 = nn.Linear(1000, 264) # 264 different birds

    def forward(self, x):
        
        x = self.conv1(x)
        x = self.base_model(x)
        x = self.fc2(x)
        
        return x
        


In [None]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = MyModel().to(device)

In [None]:
model.load_state_dict(torch.load('/kaggle/input/birdsongmodel2/birdsong_model2.pth'))

In [None]:
train_df = pd.read_csv('/kaggle/input/birdsong-recognition/train.csv')

In [None]:
train_df

In [None]:
train_df = train_df[(train_df.filename != 'XC395021.mp3') & (train_df.filename != 'XC504005.mp3') & (train_df.filename != 'XC504006.mp3') & (train_df.filename != 'XC505006.mp3')]

In [None]:
ebird_codes = np.array(sorted(list(set(train_df['ebird_code']))))
bird_to_idx = { bird: idx for idx, bird in enumerate(ebird_codes) }

In [None]:
from pathlib import Path
TEST = Path("../input/birdsong-recognition/test_audio").exists()

if TEST:
    DATA_DIR = "../input/birdsong-recognition/"
else:
    # dataset created by @shonenkov, thanks!
    DATA_DIR = "../input/birdcall-check/"
    

In [None]:
test_df = pd.read_csv(DATA_DIR + 'test.csv')

In [None]:
test_df.head()

In [None]:
test_df_grouped = test_df.groupby(['audio_id']).agg(list).reset_index(drop=False)

In [None]:
test_df_grouped

In [None]:
row_id_birds = {}
model.eval()
for i in range(len(test_df_grouped)):
    print(i)
    row = test_df_grouped.iloc[i]
    audio_id = row.audio_id
    ! mpg123 -q -w /kaggle/working/wav_file{i}.wav {DATA_DIR}test_audio/{audio_id}.mp3
    sr, audio = scipy.io.wavfile.read(f'/kaggle/working/wav_file{i}.wav')
    
    if len(audio.shape) == 2:
        audio = audio[:, 0]
    # Generate a melspectrogram with 256 mels.

    
    if row.site[0] != 'site3':
        audio_id_seconds = np.array(row.seconds)
        audio_id_row_ids = np.array(row.row_id)

        sorted_ids = np.argsort(audio_id_seconds)
        audio_id_seconds = audio_id_seconds[sorted_ids].astype('int')
        audio_id_row_ids = audio_id_row_ids[sorted_ids]
    
        start_second = 0
        for j in range(len(row.row_id)):
            if row.site[0] != 'site3':
                clip = audio[start_second * sr: audio_id_seconds[j] * sr]
                start_second = audio_id_seconds[j]
            else:
                clip = audio
            clip = np.pad(clip, (0, 576000 - len(clip))).astype('float32')
            mel = librosa.feature.melspectrogram(clip, sr=sr, n_mels=128)
            mel = (mel - mel.mean()) / (mel.std() + 1e-12)
            mel = mel[None, None, ...]
            y_pred = model(torch.tensor(mel).to(device)).detach().cpu().sigmoid().numpy().squeeze()
            if np.any(y_pred > 0.2):
                row_id_birds[row.row_id[j]] = ' '.join(ebird_codes[(y_pred > 0.2).astype('bool')].tolist())
            else: 
                row_id_birds[row.row_id[j]] = 'nocall'
    else:
        labels = []
        for j in range(0, len(audio), 576000):
            clip = audio[j: j + 576000]
            clip = np.pad(clip, (0, 576000 - len(clip))).astype('float32')
            mel = librosa.feature.melspectrogram(clip, sr=sr, n_mels=128)
            mel = (mel - mel.mean()) / (mel.std() + 1e-12)
            mel = mel[None, None, ...]
            y_pred = model(torch.tensor(mel).to(device)).detach().cpu().sigmoid().numpy().squeeze()
            if np.any(y_pred > 0.8):
                labels += ebird_codes[(y_pred > 0.8).astype('bool')].tolist()
        if len(labels) == 0:
            row_id_birds[row.row_id[j]] = 'nocall'
        else:
            row_id_birds[row.row_id[j]] = ' '.join(labels)
#     !rm wav_file{i}.wav

In [None]:
row_id_birds

In [None]:
submission = pd.DataFrame({
    'row_id': test_df.row_id.values,
    'birds': [row_id_birds[row_id] for row_id in test_df.row_id.values]
})

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index=False)