In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.models as models
import torch.nn.functional as F
from torch.nn import Conv2d, ReLU, MaxPool2d, Linear, BatchNorm2d, Dropout, Softmax, Flatten
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid
import numpy as np

In [None]:
# using the resnet 50 for transfer learnig and fine tuning
# model  = models.resnet50(pretrained = True)

""" In transfer learning, we freeze the weights of the earlier layers to retain
the learned features and only train the final layers """

# NUM_CLASSES = 512

# # freezing layers 
# for param in model.parameters():
#     param.requires_grad = False

# # note that the resenet50 final layer is designed for 1000 imageNet classes, so it is to be replaced with a custom layer 
# model.fc = Linear(model.fc.in_features, NUM_CLASSES)

# for fine tuning some weights are unfreezed 
# for name, param in model.named_parameters():
#     if "layer4" in name or "fc" in name: # fc is the fully connected layer
#         #adjust the layer to be fine tuned 
#         param.requires_grad = True 

# adding more custom layers onto the resnet50 

# class resBirdsDetect(nn.Module):
#     def __init__(self) -> None:
#         super(resBirdsDetect, self).__init__()
#         self.resnet = models.resnet50(pretrained = True)
#         # freeze or unfreeze layers as needed.
    #     for param in self.resnet.parameters():
    #         param.requires_grad = False 
    #     self.resnet.fc = nn.Identity() # remove the default layer 
    #     self.layers = nn.Sequential(
    #         nn.Linear(2048, 1024),
    #         #input size is 2048 becuase of the resnet50 output 
    #         nn.ReLU(),
    #         nn.Linear(1024, 512)
    #     )
    
    # def forwad(self, x):
    #     x = self.resnet(x)
    #     # forward pass through custom layers 
    #     x = self.layers(x)
    #     return x


In [7]:
class BirdsDetect(nn.Module):
    def __init__(self):
        super(BirdsDetect).__init__()
        self.conv1 = Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1)
        self.pool1 = MaxPool2d(kernel_size = 2, stride = 1)
        self.conv2 = Conv2d(64, 128, kernel_size= 3, stride = 1, padding = 1)
        self.pool2 = MaxPool2d(kernel_size = 2, stride = 1)
        self.conv3  = Conv2d(128, 256, kernel_size =3, stride = 1, padding =1)
        self.pool3 = MaxPool2d(kernel_size = 2)
        self.fc1 = Linear(32*32*256, 1024)
        self.fc2 = Linear(1024, 2048)
        self.fc3 = Linear(1024, 512) # the last interger here is the number of bird species that are being categorized
        self.softmax = Softmax()
        self.flatten = Flatten()
        self.relu = F.relu()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.relu(self.conv3(x))
        x = self.pool3(x)
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x


In [9]:
# open drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_DIR_PATH = "/content/drive/MyDrive/DATA/birdsDatasets/birds_525_species/"
    BATCH_SIZE = 64
except Exception as e:
    print(f"failed to mount google drive with error : {e}")

Mounted at /content/drive


In [10]:
def visualizeImages(dataloader:DataLoader):
    dataloader_iter = iter(dataloader)
    images, labels = dataloader_iter.next()
    fig = plt.figure(figsize = (10, 10))
    n_columns = 5
    if len(images) % n_columns == 0:
        n_rows = len(images) // n_columns
    else:
        n_rows = (len(images) // n_columns) + 1
    for index, image in enumerate(images):
        ax = fig.add_subplot(n_rows, n_columns, index + 1)
        ax.imshow(image.permute(1, 2, 0))
        ax.set_title(f"Label: {labels[index]}")
        ax.axis("off")
    plt.show()

def showImageGrid(images, n_rows = 8, padding = 2, normalize =False):
    grid = make_grid(images, n_rows, padding, normalize)
    # convert the grid tensor to a numpy array for dispaly with maptlolib
    np_grid = grid.permute(1, 2, 0).numpy()
    # display using maplotlib
    plt.figure(figsize = (8, 8))
    plt.imshow(np_grid)
    plt.show()


In [19]:
def createDataLoader(root, batch_size, shuffle = True):
    dataset = createDataset(root)
    dataLoader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
    return dataLoader

def createDataset(root):
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor()
    ])
    dataset = ImageFolder(root, transform=transform)
    return dataset

In [None]:
train_dataloader = createDataLoader(DATA_DIR_PATH + "train", BATCH_SIZE)
test_dataloader = createDataLoader(DATA_DIR_PATH+"test", BATCH_SIZE)
valid_dataloader = createDataLoader(DATA_DIR_PATH+"valid", BATCH_SIZE)

visualizeImages(train_dataloader)