# Notebook example using Kaggle GPU

In [1]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from torchvision.models.detection import maskrcnn_resnet50_fpn

import shutil, sys  
from tqdm import tqdm
import PIL.Image as Image


if not os.path.isdir('./experiments'):
    os.makedirs('./experiments')
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
batch_size = 32
epochs = 16
size=(299,299)
interpolation = Image.BICUBIC


data_transforms = {
    'detect' : transforms.ToTensor(),

    'train' : transforms.Compose([
    #Data augmentation
    transforms.Resize(size, interpolation=interpolation),  
    #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    ]),

    # Without data augmentation for validation
    'val': transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])]),
    }

  "Argument interpolation should be of type InterpolationMode instead of int. "


# Bird detection and Image cropping

In [3]:
def detect(path):
    """Detects birds in the dataset then crops the images
    """
    maskrcnn = maskrcnn_resnet50_fpn(pretrained=True)
    if torch.cuda.is_available():
        maskrcnn.cuda()
    maskrcnn.eval()
    print("Detector loaded !\n")

    print("Detecting birds ...\n")
    
    def sort_images(loader, folder, mapping):
        name = 0
        for data, target in tqdm(loader, leave=True, position=0):
            results = maskrcnn(data.cuda())
            for e, result in enumerate(results):
                boxes = result['boxes'].tolist()    # Bounding boxes
                labels =  result['labels'].tolist() # Labels
                scores = result['scores'].tolist()  # Probability associated with bounding box

                # Keep only bird labels and boxes (label 16 in COCO)
                only_bird_boxes = np.array([boxes[i] for i in range(len(boxes)) if labels[i] == 16])
                only_birds_scores= np.array([scores[i] for i in range(len(boxes))  if labels[i] == 16])
                
                # if low confidence -> hard image
                if only_bird_boxes.size == 0 or only_birds_scores.max() < 0.85:   
                    pass
                else : 
                    try:
                        i = np.argmax(only_birds_scores)
                        box = only_bird_boxes[i]

                        a, b, c, d = int(box[0]), int(box[1]), int(box[2]), int(box[3])

                        # Crop image on bird
                        cropped = data[e, :, b:d, a:c]
                        save_image(cropped, folder +"/"+mapping[target[e].item()]+"/"+str(name) +".png", format ="png")
                    except ValueError:
                        # Bounding box outside image (very rare)
                        pass
            name += 1

    train_dataset = ImageFolder(path + '/train_images',
                                        transform=data_transforms['detect'])
                                  
    class_to_id = train_dataset.class_to_idx 
    id_to_class = {v: k for k, v in class_to_id.items()}  # la classe ImageFolder assigne automatiquement un label pour chaque nom de classe (class -> idx)
    
    preprocess_train_loader = DataLoader(train_dataset,batch_size=1, 
                                         num_workers=1, shuffle=True)
            
    preprocess_val_loader = DataLoader(
        datasets.ImageFolder(path + '/val_images',
                             transform=data_transforms['detect']), batch_size=1,  
                             num_workers=1)

    sort_images(preprocess_train_loader, path + "/train_images", id_to_class)
    sort_images(preprocess_val_loader, path + "/val_images", id_to_class)

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

def detect_test(path):
    maskrcnn = maskrcnn_resnet50_fpn(pretrained=True)
    if torch.cuda.is_available():
        maskrcnn.cuda()
    maskrcnn.eval()
    print("Detector loaded !\n")

    for f in tqdm(os.listdir(path +'/test_images/mistery_category')):
        if 'jpg' in f:
            data = data_transforms['detect'](pil_loader(path +'/test_images/mistery_category/' + f))
            data = data.view(1, data.size(0), data.size(1), data.size(2)).cuda() 
            
            results = maskrcnn(data.cuda())

            for e, result in enumerate(results):
                boxes = result['boxes'].tolist()    # Bounding boxes
                labels =  result['labels'].tolist() # Labels
                scores = result['scores'].tolist()  # Confidence associated with bounding box

                # Keep only bird labels and boxes (label 16 in COCO)
                only_bird_boxes = np.array([boxes[i] for i in range(len(boxes)) if labels[i] == 16])
                only_birds_scores= np.array([scores[i] for i in range(len(boxes))  if labels[i] == 16])
                
                # if low confidence -> hard image
                if only_bird_boxes.size == 0 or only_birds_scores.max() < 0.85:   
                    shutil.copy(path +'/test_images/mistery_category/'+f, path+'/test_images/hard_test_images')
                else : 
                    try:
                        i = np.argmax(only_birds_scores)
                        box = only_bird_boxes[i]

                        a, b, c, d = int(box[0]), int(box[1]), int(box[2]), int(box[3])

                        # Crop image on bird
                        cropped = data[e, :, b:d, a:c]

                        shutil.copy(path +'/test_images/mistery_category/'+f, path+'/test_images/easy_test_images')
                    except ValueError:
                        # Bounding box outside image (very rare)
                        pass

Since we do not have permission to modify the 'input data' (data provided by the competition administrators), we copy it into our working directory :

In [4]:
!cp -r ../input/mva-recvis-2021/bird_dataset/train_images ./
!cp -r ../input/mva-recvis-2021/bird_dataset/val_images ./
!cp -r ../input/mva-recvis-2021/bird_dataset/test_images ./

In [5]:
#Adding images cropped on birds into our dataset
detect('./.')
detect_test('./.')

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth


  0%|          | 0.00/170M [00:00<?, ?B/s]

Detector loaded !

Detecting birds ...



100%|██████████| 1082/1082 [01:57<00:00,  9.25it/s]
100%|██████████| 103/103 [00:10<00:00,  9.69it/s]


Detector loaded !



100%|██████████| 517/517 [00:43<00:00, 11.90it/s]


# Data Loading & Augmentation

In [6]:
train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder('./train_images',transform=data_transforms['train']),
    batch_size=batch_size, shuffle=True, num_workers=1)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder('./val_images',transform=data_transforms['val']),
    batch_size=batch_size, shuffle=True, num_workers=1)

test_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder('./test_images',transform=data_transforms['val']),
    batch_size=1, shuffle=False, num_workers=1)

# Define and tune model


In [7]:

def set_parameter_requires_grad(model, feature_extracting):
    """Function to set which layers are being frozen
    """
    if feature_extracting: 
        #We do feature extracting
        for param in model.parameters():
            param.requires_grad = False #freeze
    else :
        #We do finetunig (but we still freeze some layers)
        for name, module in model.named_children():
            if name not in ['layer3','layer4','fc']:
                for param in module.parameters():
                    param.requires_grad = False  #freeze
    
            
#Initialize and Reshape the Networks
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnext":
        """resnext101_32x8d
        """
        model_ft = torchvision.models.resnext101_32x8d(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet169
        """
        model_ft = models.densenet169(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size


In [8]:
num_classes = 20
feature_extract = False

# Initialize the model for this run
model, input_size = initialize_model("resnext",num_classes, feature_extract , use_pretrained=True)

# Send the model to GPU
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /root/.cache/torch/hub/checkpoints/resnext101_32x8d-8ba56ff5.pth


  0%|          | 0.00/340M [00:00<?, ?B/s]

In [9]:
# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = []

for param in model.parameters():
    if param.requires_grad == True :
        params_to_update.append(param)      
    
# Observe that all parameters are being optimized
optimizer = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
#optimizer = optim.Adam(params_to_update, lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

# Train

In [10]:
def train(model, epoch):
    model.train()
    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        #forward
        preds = model(data)
        loss = criterion(preds, labels)
        loss.backward()
        optimizer.step()
        if batch_idx % 25 == 0:
            print('[{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))


def validation(model):
    model.eval()
    validation_loss = 0
    correct = 0
    with torch.no_grad():
        for data, labels in val_loader:
            data, labels = data.to(device), labels.to(device)
            preds = model(data)
            # sum up batch loss
            validation_loss += criterion(preds, labels).data.item()
            m = nn.Softmax(dim=1)
            probs = m(preds)
            preds_classes = probs.max(1, keepdim=True)[1]
            correct += preds_classes.eq(labels.data.view_as(preds_classes)).sum()
        validation_loss /= len(val_loader.dataset)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))

In [11]:
for epoch in range(1, epochs + 1):
    print("################################################# EPOCH", epoch)
    train(model, epoch) 
    preds = validation(model)
    model_file = 'experiments' + '/model_' + str(epoch) + '.pth'
    torch.save(model.state_dict(), model_file)

################################################# EPOCH 1

Validation set: Average loss: 0.0535, Accuracy: 149/204 (73%)
################################################# EPOCH 2

Validation set: Average loss: 0.0246, Accuracy: 178/204 (87%)
################################################# EPOCH 3

Validation set: Average loss: 0.0183, Accuracy: 181/204 (89%)
################################################# EPOCH 4

Validation set: Average loss: 0.0142, Accuracy: 185/204 (91%)
################################################# EPOCH 5

Validation set: Average loss: 0.0133, Accuracy: 185/204 (91%)
################################################# EPOCH 6

Validation set: Average loss: 0.0123, Accuracy: 187/204 (92%)
################################################# EPOCH 7

Validation set: Average loss: 0.0130, Accuracy: 186/204 (91%)
################################################# EPOCH 8

Validation set: Average loss: 0.0116, Accuracy: 184/204 (90%)
################################

# Test

In [12]:
preds = np.array([])
model.eval()
with torch.no_grad():
    for i, (data, labels) in tqdm(enumerate(test_loader, 0)):
        data, labels = data.to(device), labels.to(device)
        output1 = model(data)
        sm = nn.Softmax(dim=1)(output1)
        pred = sm.max(1, keepdim=True)[1]    
        preds = np.hstack((preds, torch.squeeze(pred).cpu().numpy()))

517it [00:33, 15.47it/s]


In [13]:
f = open("submission.csv", "w")
f.write("Id,Category\n")
for (n,_),p in zip(test_loader.dataset.samples,preds):
    f.write("{},{}\n".format(n.split('/')[-1].split('.')[0], int(p)))
f.close()