In [3]:
import os, cv2, random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

In [10]:
TRAIN_DIR = './dataset/the-nature-conservancy-fisheries-monitoring/train/'
TEST_DIR = './dataset/the-nature-conservancy-fisheries-monitoring/test_stg1/'
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
ROWS = 90  
COLS = 160 
CHANNELS = 3

### Loading and Preprocessing Data

In [None]:
# This part is from https://www.kaggle.com/jeffd23/deep-learning-in-the-deep-blue-lb-1-279

In [8]:
def get_images(fish):
    """Load files from train folder"""
    fish_dir = TRAIN_DIR+'{}'.format(fish)
    images = [fish+'/'+im for im in os.listdir(fish_dir)]
    return images

def read_image(src):
    """Read and resize individual images"""
    im = cv2.imread(src, cv2.IMREAD_COLOR)
    im = cv2.resize(im, (COLS, ROWS), interpolation=cv2.INTER_CUBIC)
    return im

In [11]:
files = []
y_all = []

for fish in FISH_CLASSES:
    fish_files = get_images(fish)
    files.extend(fish_files)
    
    y_fish = np.tile(fish, len(fish_files))
    y_all.extend(y_fish)
    print("{0} photos of {1}".format(len(fish_files), fish))
    
y_all = np.array(y_all)

1719 photos of ALB
200 photos of BET
117 photos of DOL
67 photos of LAG
465 photos of NoF
299 photos of OTHER
176 photos of SHARK
734 photos of YFT


In [13]:
y_all.shape

(3777,)

In [31]:
X_all = np.ndarray((len(files), ROWS, COLS, CHANNELS), dtype=np.uint8)

for i, im in enumerate(files): 
    X_all[i] = read_image(TRAIN_DIR+im)
    if i%1000 == 0: print('Processed {} of {}'.format(i, len(files)))

print(X_all.shape)

Processed 0 of 3777
Processed 1000 of 3777
Processed 2000 of 3777
Processed 3000 of 3777
(3777, 90, 160, 3)


In [34]:
X_all = X_all.transpose([0,3,1,2])

In [35]:
X_all.shape

(3777, 3, 90, 160)

### Splitting the Training Data

In [41]:
def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return np.eye(num_classes, dtype='uint8')[y]

In [42]:
# One Hot Encoding Labels
y_all = LabelEncoder().fit_transform(y_all)
y_all = to_categorical(y_all,8)

X_train, X_valid, y_train, y_valid = train_test_split(X_all, y_all, 
                                                    test_size=0.2, random_state=23, 
                                                    stratify=y_all)

In [50]:
np.sum(y_train, axis=0)

array([1375,  160,   93,   54,  372,  239,  141,  587], dtype=uint64)

### Model

In [None]:
# Try add maxpool layer
model = None
optimizer = None

# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# pass

class ThreeLayerConvNet(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, channel_3, num_classes):
        super().__init__()
        ########################################################################
        # TODO: Set up the layers you need for a three-layer ConvNet with the  #
        # architecture defined above.                                          #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        #         pass
        self.cn1 = nn.Conv2d(in_channel, channel_1, kernel_size=5, stride=1, padding=2, bias=True)
        nn.init.kaiming_normal_(self.cn1.weight)
        
        self.cn2 = nn.Conv2d(channel_1, channel_2, kernel_size=3, stride=1, padding=1, bias=True)
        nn.init.kaiming_normal_(self.cn2.weight)
        
        self.cn3 = nn.Conv2d(channel_2, channel_3, kernel_size=3, stride=1, padding=1, bias=True)
        nn.init.kaiming_normal_(self.cn3.weight)
        
        self.fc = nn.Linear(channel_3 * 4 * 4, num_classes)
        nn.init.kaiming_normal_(self.fc.weight)
        
        self.batchnorm_conv1 = nn.BatchNorm2d(channel_1)
        self.batchnorm_conv2 = nn.BatchNorm2d(channel_2)
        self.batchnorm_conv3 = nn.BatchNorm2d(channel_2)
        
        self.dropout1 = nn.Dropout2d(p=0.2)
        self.dropout2 = nn.Dropout2d(p=0.2)
        self.dropout3 = nn.Dropout2d(p=0.2)
        
        self.pool1 = nn.MaxPool2d(2)
        self.pool2 = nn.MaxPool2d(2)
        self.pool3 = nn.MaxPool2d(2)


        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                          END OF YOUR CODE                            #       
        ########################################################################

    def forward(self, x):
        scores = None
        ########################################################################
        # TODO: Implement the forward function for a 3-layer ConvNet. you      #
        # should use the layers you defined in __init__ and specify the        #
        # connectivity of those layers in forward()                            #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        

        h1 = F.relu(self.cn1(x))
        h1_pool = self.pool1(h1)

        
        h2 = F.relu(self.cn2(h1_pool))
        h2_pool = self.pool2(h2)
        
        h3 = F.relu(self.cn3(h2_pool))
        h3_pool = self.pool3(h3)

        
        
        scores = self.fc(flatten(h3_pool))
        
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                             END OF YOUR CODE                         #
        ########################################################################
        return scores
    

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                                 END OF YOUR CODE                             
################################################################################


model = ThreeLayerConvNet(in_channel=3, channel_1=64, channel_2=64, channel_3=64, num_classes=10)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

# You should get at least 70% accuracy
train_part34(model, optimizer, epochs=10)