In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import matplotlib.pyplot as plt

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
from torch import nn, optim
import torchvision
import torchaudio
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
import glob

In [None]:
tfms = [
    torchaudio.transforms.MelSpectrogram(n_mels=64),
    transforms.Resize((64, 512))
]

In [None]:
path = "../input/birdclef-2022/train_audio/"

In [None]:
files = glob.glob(path + "*/*")
len(files)

In [None]:
class AudioDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.classes = os.listdir(root)
        self.files = glob.glob(path + "*/*")
    def __len__(self):
        return len(self.files)
    def __getitem__(self, idx):
        file = self.files[idx]
        class_ = file.split("/")[-2]
        audio_mono = torchaudio.load(file, normalize = True)[0]
        audio_mono = torch.mean(audio_mono, dim=0).unsqueeze(0)
        if self.transforms is not None:
            for tf in self.transforms:
                audio_mono = tf(audio_mono)
        return audio_mono, class_
        

In [None]:
dataset = AudioDataset(path, transforms = tfms)
train_data_len = int(len(dataset)*0.75)
valid_data_len = int((len(dataset) - train_data_len))

In [None]:
dataset[6][0].shape

In [None]:
train_data, val_data = random_split(dataset, [train_data_len, valid_data_len])

In [None]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=32, shuffle=True, num_workers=4)

In [None]:
batch = next(iter(train_loader))

In [None]:
n_classes = len(dataset.classes)
n_classes

In [None]:
model = torchvision.models.densenet121(pretrained=True)

In [None]:
for param in model.parameters():
    param.requires_grad = False
n_in = model.classifier.in_features
model.classifier = nn.Linear(n_in, n_classes)
print(model.classifier)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.classifier.parameters(), lr=0.001)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.5)