In [1]:
import sys
import glob
import os
import torch
from torch.utils.data.dataset import random_split
from tqdm import tqdm
import pandas as pd
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torchvision import datasets, models, transforms
from datetime import datetime
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from pathlib import Path
import torchvision


In [2]:
transform = transforms.Compose([ transforms.ToTensor() ])
dataset = datasets.ImageFolder('./kaggle/input/birdclef-2023/train_melspectrogram', transform=transform )

# split dataset
rows = len(dataset) 
test_size = int(rows*0.2)
train_size = rows - test_size

trainDataset, testDataset = random_split(dataset, [train_size, test_size])

trainloader = torch.utils.data.DataLoader(trainDataset, batch_size=64, shuffle=True)

In [4]:
dataset.class_to_idx

{'abethr1': 0,
 'abhori1': 1,
 'abythr1': 2,
 'afbfly1': 3,
 'afdfly1': 4,
 'afecuc1': 5,
 'affeag1': 6,
 'afgfly1': 7,
 'afghor1': 8,
 'afmdov1': 9,
 'afpfly1': 10,
 'afpkin1': 11,
 'afpwag1': 12,
 'afrgos1': 13,
 'afrgrp1': 14,
 'afrjac1': 15,
 'afrthr1': 16,
 'amesun2': 17,
 'augbuz1': 18,
 'bagwea1': 19,
 'barswa': 20,
 'bawhor2': 21,
 'bawman1': 22,
 'bcbeat1': 23,
 'beasun2': 24,
 'bkctch1': 25,
 'bkfruw1': 26,
 'blacra1': 27,
 'blacuc1': 28,
 'blakit1': 29,
 'blaplo1': 30,
 'blbpuf2': 31,
 'blcapa2': 32,
 'blfbus1': 33,
 'blhgon1': 34,
 'blhher1': 35,
 'blksaw1': 36,
 'blnmou1': 37,
 'blnwea1': 38,
 'bltapa1': 39,
 'bltbar1': 40,
 'bltori1': 41,
 'blwlap1': 42,
 'brcale1': 43,
 'brcsta1': 44,
 'brctch1': 45,
 'brcwea1': 46,
 'brican1': 47,
 'brobab1': 48,
 'broman1': 49,
 'brosun1': 50,
 'brrwhe3': 51,
 'brtcha1': 52,
 'brubru1': 53,
 'brwwar1': 54,
 'bswdov1': 55,
 'btweye2': 56,
 'bubwar2': 57,
 'butapa1': 58,
 'cabgre1': 59,
 'carcha1': 60,
 'carwoo1': 61,
 'categr': 62,
 'cc

In [4]:
images, labels = next(iter(trainloader))
print(images[0].size())

torch.Size([3, 200, 300])


In [39]:
from torchsummary import summary
# torch.cuda.empty_cache()

class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = torch.nn.Sequential(  
            torch.nn.Conv2d(kernel_size=6, stride=3, padding=1, in_channels=3, out_channels=6),
            # nn.Dropout(0.25), 
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=4, stride=3), 
            torch.nn.Flatten(),
            # nn.Linear(5544, 5544),
            # nn.ReLU(),
            torch.nn.Linear(4032, len(class_names))
        )

    def forward(self, x: torch.Tensor): 
        logits = self.linear_relu_stack(x) 
        
        return logits
    

model = NeuralNetwork().to(device) 
# print(model)

summary(model, input_size=(3, 200, 300))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 66, 99]             654
              ReLU-2            [-1, 6, 66, 99]               0
         MaxPool2d-3            [-1, 6, 21, 32]               0
           Flatten-4                 [-1, 4032]               0
            Linear-5                  [-1, 264]       1,064,712
Total params: 1,065,366
Trainable params: 1,065,366
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.69
Forward/backward pass size (MB): 0.66
Params size (MB): 4.06
Estimated Total Size (MB): 5.41
----------------------------------------------------------------


In [64]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [15]:
def train_loop(dataloader, model, criteria, optimizer ):
    epochs = 9 
    size = len(dataloader.dataset)
    for i in range(epochs): 
        progression_bar = tqdm(enumerate(dataloader), total=len(dataloader), leave=False)

        for batch, (X, y) in progression_bar:
            
            X = torch.FloatTensor(X).to(device)
            print(X.shape)
            return
            y = torch.as_tensor(y).to(device)
            pred = model(X) 
            loss = criteria(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward() 
            optimizer.step()

            if batch % 100 == 0:
                loss, current = loss.item(), (batch + 1) * len(X)
                progression_bar.set_description(f"loss: {loss:>7f}") 
        
        today = datetime.today().strftime('%Y-%m-%d')
        Path('./models').mkdir(parents=True, exist_ok=True)
        load_name = os.path.join('./models', f'model{today}{i}.pth')
        torch.save({
            'epoch': i,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': criteria, 
            }, load_name)
        
         



# train_loop(trainloader, model, loss_fn, optimizer)

In [60]:
today = datetime.today().strftime('%Y-%m-%d')
Path('./models').mkdir(parents=True, exist_ok=True)
load_name = os.path.join('./models', f'model{today}{0}.pth')
torch.save({
            'epoch': 0,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss_fn, 
            }, load_name)

In [3]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (X, y) in tqdm(enumerate(dataloader), total=len(dataloader), leave=False):
            X = torch.FloatTensor(X).to(device)
            
            y = y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

testloader = torch.utils.data.DataLoader(testDataset, batch_size=128, shuffle=False)

# test_loop(testloader, model, loss_fn )

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [74]:
class_names = sorted(os.listdir('./data/train_audio/'))

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = torch.nn.Linear(num_ftrs, len(class_names))
model_ft = model_ft.to(device)
criterion = torch.nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)



In [16]:
model_conv = train_loop(trainloader, model_ft, criterion, optimizer_ft  )

                                        

torch.Size([64, 3, 200, 300])




In [8]:
test_loop(testloader, model_ft, criterion )

                                                 

Test Error: 
 Accuracy: 96.5%, Avg loss: 0.188252 





In [4]:
class_names = sorted(os.listdir('./kaggle/input/birdclef-2023/train_audio'))


In [5]:
model_ft = models.resnet18(pretrained=False)
num_ftrs = model_ft.fc.in_features
model_ft.fc = torch.nn.Linear(num_ftrs, len(class_names))
model_ft = model_ft.to(device)
criterion = torch.nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

checkpoint = torch.load(os.path.join('./models', f'model2023-04-028.pth'))
model_ft.load_state_dict(checkpoint['model_state_dict'])
optimizer_ft.load_state_dict(checkpoint['optimizer_state_dict'])



In [18]:


firstBatch, first_y = dataset[0]
secondBatch, second_y = dataset[4444]
thirdBatch, third_y = dataset[137255]
 
print(f"expected: {[first_y, second_y, third_y]}")

X = firstBatch.unsqueeze(0) 
X = torch.cat((X, secondBatch.unsqueeze(0)), 0)
X = torch.cat((X, thirdBatch.unsqueeze(0)), 0).cuda()
 
model_ft.train() 
with torch.no_grad():
    firstBatch = torch.FloatTensor(firstBatch).cuda()
   
    pred = model_ft(X)
    print(f"predicted: {pred.argmax(1).cpu().numpy()}")


expected: [0, 12, 245]
predicted: [  0  12 245]


In [11]:
print(len(dataset))

147255


expected: [0, 89]
predicted: [202  89]


In [22]:
dataset = datasets.ImageFolder('./kaggle/input/birdclef-2023/test_melspectrogram', transform=transform )


firstBatch, first_y = dataset[0]
 
print(f"expected: {[first_y]}")

X = firstBatch.unsqueeze(0).cuda()
 
 
with torch.no_grad():
    pred = model_ft(X)
    print(f"predicted: {pred.argmax(1).cpu().numpy()}")


expected: [0]
predicted: [243 245]
