In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
        
import librosa
import torch
from torch.utils.data import Dataset
import time 
import os
import librosa
import numpy as np
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

test_audio_dir = '/kaggle/input/birdclef-2023/test_soundscapes/'
train_audio_dir = '/kaggle/input/birdclef-2023/train_audio/'
test_audio_processed_dir = '/kaggle/working/test_audio_processed/'

In [None]:
class TestDataset(Dataset):
    def __init__(self, folder):
        self.data = []
        self.labels = []
        for bird_folder in os.listdir(folder):
            for file in os.listdir(os.path.join(folder, bird_folder)):
                melspec = np.load(os.path.join(folder, bird_folder, file))
                self.data.append(melspec)
                self.labels.append(0)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        melspec = self.data[idx]
        label = self.labels[idx]

        # Zero-pad the mel-spectrogram to the target shape
        target_shape = (128, 216)
        padded_melspec = np.zeros(target_shape)
        h, w = melspec.shape
        padded_melspec[:h, :w] = melspec

        return torch.tensor(padded_melspec, dtype=torch.float32).unsqueeze(0), torch.tensor(label)

In [None]:
class BirdClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BirdClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 16 * 27, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.35)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

num_classes = len([i for i in os.listdir(train_audio_dir)])
model = BirdClassifier(num_classes)

In [None]:
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
state_dict_name = "bird_classifier035full.pth"
model_state_dict = torch.load("/kaggle/input/models/" + state_dict_name, map_location=device)
model.load_state_dict(model_state_dict)

In [None]:
file_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]
pred = {'row_id': []}
species_list = sorted(os.listdir(train_audio_dir))
print(len(species_list))

for i, species_code in enumerate(species_list):
  pred[species_code] = []

if not os.path.exists(test_audio_processed_dir):
    os.makedirs(test_audio_processed_dir)

for f in file_list:
  os.makedirs(os.path.join(test_audio_processed_dir, f), exist_ok=True)
  y, sr = librosa.load(test_audio_dir + f + '.ogg')
  num_clips = int(np.ceil(len(y) / (5 * sr)))
  for i in range(num_clips):
    start = i * 5 * sr
    end = start + 5 * sr
    y_clip = y[start:end]
    if len(y_clip) == 5 * sr:
      melspec = librosa.feature.melspectrogram(y = y_clip, sr=sr)
      melspec = librosa.power_to_db(melspec)
      output_file_path = os.path.join(test_audio_processed_dir, f, f"{f.split('.')[0]}_clip{i}.npy")
      np.save(output_file_path, melspec)
      row_id = f + '_' + str((i+1)*5)
      pred['row_id'].append(row_id)

In [None]:
test_set = TestDataset(test_audio_processed_dir)
test_load = DataLoader(test_set, batch_size=1, shuffle=True)

with torch.no_grad():
  for i, data in enumerate(test_load):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    norm = torch.nn.Softmax(dim=1)
    outputs = norm(model(inputs))
    outputs = outputs[0].tolist()
    for i, bird_id in enumerate(species_list):
        prediction = outputs[i]
        pred[bird_id].append(prediction)

In [None]:
submission = pd.DataFrame(pred, columns = ['row_id'] + species_list)
print(submission)
submission.to_csv("/kaggle/working/submission.csv", index=False)