# iWildCam 2019
### Deep Learning Project

This notebook is inspired by https://www.kaggle.com/xhlulu/cnn-baseline-iwildcam-2019, https://www.kaggle.com/xhlulu/reducing-image-sizes-to-32x32 and https://github.com/cfotache/pytorch_objectdetecttrack.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
import collections

import torch
import torchvision
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import TensorDataset, random_split

import torch.nn as nn
import torch.optim as optim

In [2]:
CUDA = torch.cuda.is_available()
device = torch.device('cuda:0') if CUDA else torch.device('cpu')
print(device)

cuda:0


## 1. Preprocess data
* Load the 32x32 dataset produced with https://www.kaggle.com/xhlulu/reducing-image-sizes-to-32x32
* Change the dimension of the images from [32,32,3] to [3,32,32] (for Darknet model)
* Save data

In [3]:
preprocess = False

In [4]:
def change_array_dimension(images, labels, num_images, include_empty_images=True):
    """
    Change dimension of images from [32,32,3] to [3,32,32]
    
    Args:
    images                target images to be converted to different dimension
    num_images            total number of images included to result
    include_empty_images  if True, empty images are included to result
    """
    images_temp = []
    for i in range(0, num_images):
        if include_empty_images or (len(labels)>=i and labels[i] != 0):
            temp = []
            temp.append(images[i][:,:,0]) 
            temp.append(images[i][:,:,1])
            temp.append(images[i][:,:,2])
            images_temp.append(temp)
    images_temp = np.array(images_temp)
    labels_temp = labels[:num_images] if len(labels)>=num_images else []
    if include_empty_images==False: labels_temp = labels_temp[labels_temp != 0]
        
    return images_temp, labels_temp

In [5]:
#### Loading the 32x32 dataset ####
if preprocess:
    # The data, split between train and test sets:
    x_train = np.load('./input/preprocess/reducing-image-sizes-to-32x32/X_train.npy')
    x_test = np.load('./input/preprocess/reducing-image-sizes-to-32x32/X_test.npy')
    y_train = np.load('./input/preprocess/reducing-image-sizes-to-32x32/y_train.npy')

    # Convert the images to float and scale it to a range of 0 to 1
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.
    x_test /= 255.

    #Convert y train binary values to numeric
    y_train =  y_train.argmax(axis=1)
        
    # Convert dimensions
    x_train, y_train = change_array_dimension(x_train, y_train, len(y_train))
    x_test, y_test = change_array_dimension(x_test, [], x_test.shape[0])
    
    #Save data
    np.save('./input/preprocess/changing-image-dimensions-to-3x32x32/x_train.npy', x_train)
    np.save('./input/preprocess/changing-image-dimensions-to-3x32x32/x_test.npy', x_test)
    np.save('./input/preprocess/changing-image-dimensions-to-3x32x32/y_train.npy', y_train)
else:
    # The data, split between train and test sets:
    x_train = np.load('./input/preprocess/changing-image-dimensions-to-3x32x32/x_train.npy')
    x_test = np.load('./input/preprocess/changing-image-dimensions-to-3x32x32/x_test.npy')
    y_train = np.load('./input/preprocess/changing-image-dimensions-to-3x32x32/y_train.npy')



In [6]:
df_data = pd.read_csv('./input/train.csv')
y_train = df_data['category_id']

In [7]:
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print("Class spread: ", collections.Counter(y_train))

196299 train samples
153730 test samples
Class spread:  Counter({0: 131457, 19: 14106, 13: 8623, 11: 7209, 8: 6938, 1: 6102, 16: 5975, 17: 4759, 3: 3398, 18: 3035, 4: 2210, 14: 1361, 10: 1093, 22: 33})


...

In [None]:
max_count = 3000
all_indices = []
for class_id, class_count in collections.Counter(y_train).most_common():
    # Take the indices of current class
    indices = np.argwhere(y_train == class_id).squeeze()
    
    # Up- or down-sample the current class
    if class_count >= max_count: # Down-sample, take unique indices
        random_indices = list(random.sample(list(indices),max_count))
    else: #Up-sample, take unique indices + duplicate indices
        random_indices = list(random.sample(list(indices),class_count))
        random_indices.extend(list(np.random.choice(indices,max_count-class_count)))
        
    # Add the selected indices of the class to list
    all_indices.extend(random_indices)

# Shuffle the order of the images
all_indices=np.random.permutation(all_indices)

y_train_len = len(y_train)
y_train2 = y_train[all_indices]
x_train2 = x_train[all_indices]

print("Reduced the amount of training samples from {} to {}".format(y_train_len, len(y_train2)))
print("Class spread: ",collections.Counter(y_train2).most_common())

In [8]:
# Split training data to two sets: use 90 % to train and 10 % to validate

# Create tensors and torch dataset
x_train_tensor = torch.FloatTensor(x_train)
y_train_tensor = torch.LongTensor(y_train)
dataset = TensorDataset(x_train_tensor, y_train_tensor)

# Split the dataset
train_size = int(0.9 * x_train.shape[0])
validation_size = x_train.shape[0] - train_size
train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])
print("Train on {} samples, validate on {} samples".format(train_size, validation_size))

Train on 176669 samples, validate on 19630 samples


## 2. Creating the Model

<img src="darknet-architecture.JPG" width=300 style="float: right;">

 https://pjreddie.com/media/files/papers/YOLOv3.pdf

+copied from darknet code https://github.com/cfotache/pytorch_objectdetecttrack

In [9]:
class Darknet(nn.Module):
    def __init__(self, num_classes, img_size, use_leakyrelu=True):
        """
        Args:
        num_classes       number of classes used in Softmax
        img_size          size of the image used in Global average pooling
        use_leakyrelu     if True LeakyReLU is used instead of ReLU
        """
        super(Darknet, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
            nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        
        #Layer 3 x1
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.LeakyReLU(negative_slope=0.1) if use_leakyrelu else nn.ReLU())
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
            nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        
        # Layer 5 x2
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.LeakyReLU(negative_slope=0.1) if use_leakyrelu else nn.ReLU())
    
        self.layer6 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
            nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))

        # Layer 7 x8
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.LeakyReLU(negative_slope=0.1) if use_leakyrelu else nn.ReLU())
        
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
            nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        
        # Layer 9 x8
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.LeakyReLU(negative_slope=0.1) if use_leakyrelu else nn.ReLU())
        
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
            nn.BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        
        # Layer 11 x4
        self.layer11 = nn.Sequential(
            nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.LeakyReLU(negative_slope=0.1) if use_leakyrelu else nn.ReLU())
        
        self.layer12 = nn.AvgPool2d(kernel_size=img_size)
        #self.layer13 = nn.Linear(img_size, num_classes)
        self.layer13 = nn.Sequential(
            nn.Linear(img_size, 1000),
            nn.Linear(1000, num_classes))
        self.layer14 = nn.LogSoftmax(dim=0)
        

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        for i in range(0,2): x = self.layer5(x)
        x = self.layer6(x)
        for i in range(0,8): x = self.layer7(x)
        x = self.layer8(x)
        for i in range(0,8): x = self.layer9(x)
        x = self.layer10(x)
        for i in range(0,4): x = self.layer11(x)
        x=x.view(x.shape[0],32,32,1)
        x = self.layer12(x).squeeze()
        x = self.layer13(x)
        x = self.layer14(x)
        return x

In [12]:
img_size=32
classes = ['empty', 'deer', 'moose', 'squirrel', 'rodent', 'small_mammal', 
           'elk', 'pronghorn_antelope', 'rabbit', 'bighorn_sheep', 'fox', 
           'coyote', 'black_bear', 'raccoon', 'skunk', 'wolf', 'bobcat', 
           'cat', 'dog', 'opossum', 'bison', 'mountain_goat', 'mountain_lion']
num_classes = num_classes = len(pd.Series(y_train).unique()) # Note that the training samples only cover classes form 0 to 13
print("Number of classes: ", num_classes)

Number of classes:  14


In [None]:
model = Darknet(num_classes,img_size)
print(model)

## 3. Training the Model

In [13]:
train = False

#Create dataloaders for training and validation
batch_size = 16#64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size)

In [14]:
def get_predictions(model, testloader):
    """
    Args:
    model              model used for predicting
    testloader         batched dataset for predicting
    """
    predictions = []
    real_values = []
    
    model.eval()
    with torch.no_grad():
        for x, y in testloader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = torch.max(outputs.data, 1)
            predictions.extend(predicted.cpu().numpy())
            real_values.extend(y.cpu().numpy())

    return np.array(predictions), np.array(real_values)

def get_accuracy_score(model, testloader):
    """
    Args:
    model              model used for predicting
    testloader         batched dataset for predicting and validating
    """
    predictions, real_values = get_predictions(model, testloader)
    return accuracy_score(predictions,real_values)

def train_model(model,dataloader,testloader,n_epochs,use_SGD_optimizer=True, verbose=True):
    """
    Args:
    model              model used for training
    dataloader         batched dataset for training
    testloader         batched dataset for validating
    n_epochs           number of epochs used in training
    use_SGD_optimizer  if True, SGD optimizer is used for training. Otherwise Adam.
    verbose            if True, statistics are printed
    """
    model.train()
    criterion = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) if use_SGD_optimizer \
                    else optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(n_epochs):
        running_loss = 0.0

        for i, (x, y) in enumerate(dataloader, 0):
            x, y = x.to(device), y.to(device)
            # zero the parameter gradients         
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            
            # get statistics
            running_loss += loss.item()

        # print statistics per epoch
        if verbose:
            training_accuracy = get_accuracy_score(model,dataloader)
            validation_accuracy = get_accuracy_score(model,testloader)
            print("[{}] loss: {:.3f}, training accuracy: {:.3f}, validation accuracy: {:.3f}".format(
                        epoch+1, running_loss/len(dataloader), training_accuracy, validation_accuracy))
            torch.save(model.state_dict(), 'darknet_linear1000_{}_epochs.pth'.format(epoch+1))
            
    if verbose: print('Finished Training')

In [15]:
if train:
    # Create the model
    model = Darknet(num_classes,img_size)
    model.to(device)

    # Train the model
    num_epochs = 30
    train_model(model,train_dataloader,validation_dataloader,num_epochs) 

    # Save the model
    torch.save(model.state_dict(), 'darknet.pth')
    #torch.save(model.state_dict(), 'darknet.pth')
else:
    # Load the model
    model = Darknet(num_classes,img_size)
    model.load_state_dict(torch.load('.\prediction-models\darknet_16_sub5.pth', map_location=lambda storage, loc: storage))
    model.to(device)
    #model.eval()

## 4. Evaluating model

In [None]:
training_predictions, training_real_values = get_predictions(model,train_dataloader)
validation_predictions, validation_real_values = get_predictions(model,validation_dataloader)
print("Training accuracy: {}, Validation accuracy: {}".format(accuracy_score(training_predictions,training_real_values),
                                                             accuracy_score(validation_predictions,validation_real_values)))

In [None]:
def create_confusion_matrix(y_test, y_pred, classifier_name='Darknet-53', classes=[]):
    c_matrix = confusion_matrix(y_test, y_pred, labels=classes) 
    fig, ax = plt.subplots(1, figsize=(10, 10))
    ax.set_title("{} confusion matrix".format(classifier_name))
    sns.heatmap(c_matrix, cmap='Blues', annot=True, fmt='g', cbar=False)
    ax.set_xlabel('Predictions')
    ax.set_ylabel('True labels')
    ax.set_xticklabels(classes)
    ax.set_yticklabels(classes)
    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
    plt.setp(ax.get_yticklabels(), rotation=0, ha="right",rotation_mode="anchor")
    #plt.savefig('{}.jpg'.format(classifier_name.replace(' ','')))
    plt.show()

In [None]:
print("Confusion matrix for training images")
create_confusion_matrix(training_real_values,training_predictions,classes=range(0,num_classes))

In [None]:
print("Confusion matrix for validation images")
create_confusion_matrix(validation_real_values,validation_predictions,classes=range(0,num_classes))

## 5. Predicting labels for submission

In [17]:
num_images = x_test.shape[0]
x_test_tensor = torch.FloatTensor(x_test)
y_test_tensor_dummy = torch.LongTensor(np.zeros(num_images))
dataset = TensorDataset(x_test_tensor, y_test_tensor_dummy)
dataloader = DataLoader(dataset, batch_size=batch_size)
predictions, real_values = get_predictions(model, dataloader)

submission_df = pd.read_csv('./input/iwildcam-2019-fgvc6/sample_submission.csv')
submission_df['Predicted'] = predictions
print(submission_df.shape)
submission_df.head()

submission_df.to_csv('darknet_submission5.csv',index=False)

(153730, 2)
