Let's break the notebook into separate steps. Feel free to navigate the notebook and comment if you have any suggestions.

Step 0: Import Datasets \
Step 1: Detect Dogs\
Step 2: Create a CNN to Classify Dog Breeds (from Scratch)\
Step 3: Create a CNN to Classify Dog Breeds (using Transfer Learning)\
Step 4: Test

# Initializations
At first we need to import the libraries. It is considered as standard imports.


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid

from PIL import Image
from IPython.display import display
import cv2
from PIL import ImageFile
import torchvision.transforms as transforms
ImageFile.LOAD_TRUNCATED_IMAGES = True  # 如果不想花时间把数据集中的破损图片找出来后删除掉, 如果文件时损坏的就跳过。

import glob   
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline  
# 直接在python console中生成图像 
import warnings
warnings.filterwarnings('ignore')

We will be using this function mostly everywhere to run our experiments deterministically. Random functions of Numpy and Pandas will behave deterministically after this. To learn more about Deterministic Neural Networks please check out [this notebook](https://www.kaggle.com/bminixhofer/deterministic-neural-networks-using-pytorch)

In [2]:
# 设置各种随机种子
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # 使用cuda保证每次结果一样
    torch.backends.cudnn.deterministic = True
seed_everything(42)

In [None]:
!nvidia-smi

Have you wondered about why they use 42? Do you want to know about the reason behind 42? Look [Here ](https://en.wikipedia.org/wiki/Phrases_from_The_Hitchhiker%27s_Guide_to_the_Galaxy#Answer_to_the_Ultimate_Question_of_Life.2C_the_Universe_and_Everything_.2842.29):p



In [3]:
#Read the dataset 
PATH = '../input/dog-breed-identification/'
labels = pd.read_csv(PATH+'labels.csv')
print(labels.head(5))
# print(type(labels))  # <class 'pandas.core.frame.DataFrame'>
# 就是去掉X中的第一个，然后再把去掉后的结果返回给X
labelnames = pd.read_csv(PATH + 'sample_submission.csv').keys()[1:]
# print(type(labelnames))  # <class 'pandas.core.indexes.base.Index'>
print(len(labelnames)) # 120种
print("Train folder has ", len(os.listdir(PATH+'train')),'images which matches with label\'s', len(labels),'images')

In [4]:
# 测试程序
# labelnames1 = pd.read_csv(PATH + 'sample_submission.csv').keys()[1:]
# labelnames1
print(labels.head())
print('--------------------')
# 牵涉到pandas操作
codes = range(len(labelnames))
# print(codes)  # range(0, 120)
breed_to_code = dict(zip(labelnames, codes))
# print(breed_to_code)
code_to_breed = dict(zip(codes, labelnames))
# print(code_to_breed)
# labels['target'] = [breed_to_code[x] for x in labels.breed]
# 以下等价的写法
labels['target'] = labels['breed'].apply(lambda x : breed_to_code[x])
print(labels.head())
print('--------------------')
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
print(labels_pivot.head())
print(labels_pivot.index)
print(labels_pivot.columns)

In [5]:
# 牵涉到pandas操作
codes = range(len(labelnames))
# print(codes)  # range(0, 120)
breed_to_code = dict(zip(labelnames, codes))
# print(breed_to_code)
code_to_breed = dict(zip(codes, labelnames))
# print(code_to_breed)
# labels['target'] = [breed_to_code[x] for x in labels.breed]
# 以下等价的写法
labels['target'] = labels['breed'].apply(lambda x : breed_to_code[x])
print("--------------------------")
# print(labels.head(10))
# labels['rank'] = labels.groupby('breed').rank()['id']
print("--------------------------")
# print(labels.head(10))

# pivot()方法相当于unstack()、实现二维透视，参数为（index、column、values）
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
# sample方法 分割训练集集和开发集
train = labels_pivot.sample(frac=0.85)
# print(train.iloc[3702, :])
valid = labels_pivot[~labels_pivot['id'].isin(train['id'])]
print(train.shape, valid.shape)

In [None]:
# 测试程序
codes = range(len(labelnames))
breed_to_code_test = dict(zip(labelnames, codes))
labels_test = labels
labels_test['target'] = labels['breed'].apply(lambda x : breed_to_code_test[x])

In [None]:
# 测试程序
labels_test = labels_test.head(50)
g["rank"] = labels_test.groupby('breed').rank()["id"]
for name, group in g:
    print(name)
    print(group)
    print()
# g.get_group('target')

In [None]:
# 测试程序
# codes1 = range(len(labelnames))
# breed_to_code = dict(zip(labelnames, codes))
# code_to_bread = dict(zip(codes, labelnames))
# for data in breed_to_code:
#     print(data, breed_to_code[data])
# print('-------------------------------')
# # for data in code_to_bread:
# #     print(data, code_to_breed[data])
# labels['target'] = [breed_to_code[x] for x in labels.breed]
# print(len(labels['target']))head 10222
# labels['rank'] = labels.groupby('breed').rank()['id']
# print(labels['rank'])


In [None]:
# 显示前5个labels
print(labels.head(5))
# print(labels.head(10))
# print(labels.breed[3])

In [None]:
train.head()

Using the code cell below to write three separate [data loaders](http://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader) for the training, validation, and test datasets of dog images (located at `dog_images/train`, `dog_images/valid`, and `dog_images/test`, respectively).  You may find [this documentation on custom datasets](https://pytorch.org/vision/stable/datasets.html) to be a useful resource.  If you are interested in augmenting your training and/or validation data, check out the wide variety of [transforms](https://pytorch.org/vision/stable/transforms.html)!

In [None]:
# Image transformations
img_transform = {
    'valid':transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'train':transforms.Compose([
        transforms.RandomResizedCrop(size = 256),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),  # 改变图像的亮度、对比度、饱和度和色调
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'test':transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

I resized the pictures to 256256 and then cropped the image into 224224 randomly to avoid squashed images and normalized it using Imagenet's mean and standard deviation after converting to tensor. for train, test and valid set.

For training images, I used data augmentation which includes random rotation of 30 degrees and horizontal flip.

In [None]:
class DogBreedDataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, img_dir, label, transform):
        'Initialization'
        self.img_dir = img_dir
        self.transform = transform
        self.label = label

    def __len__(self):
        'Denotes the total number of samples'
        return self.label.shape[0]

    def __getitem__(self, index):
        if self.label is not None:
            img_name = '{}.jpg'.format(self.label.iloc[index, 0])
            fullname = self.img_dir + img_name
            image = Image.open(fullname)
            label = self.label.iloc[index, 1:].astype('float').to_numpy()
            label = np.argmax(label)
            if self.transform:
                image = self.transform(image)
            return [image, label]
        

In [None]:
batch_size = 64
num_workers = 4
print(train.shape)
print(valid.shape)
train_img = DogBreedDataset(PATH+'train/', train, transform = img_transform['train'])
valid_img = DogBreedDataset(PATH+'train/', valid, transform = img_transform['valid'])

dataloaders={
    'train':torch.utils.data.DataLoader(train_img, batch_size, shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_img, batch_size, shuffle=False)
}
# train_dataloader = torch.utils.data.DataLoader(train_img, batch_size, shuffle=True)
# valid_dataloader = torch.utils.data.DataLoader(valid_img, batch_size, shuffle=False)

In [None]:
# import torch
use_cuda = torch.cuda.is_available()
print(use_cuda)
print(torch.__version__)
# !pip install torchsummary
# from torchsummary import summary
# !pip install pymysql

In [None]:
def imshow(axis, inp):
    """Denormalize and show"""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    axis.imshow(inp)

In [None]:
from mpl_toolkits.axes_grid1 import ImageGrid
img, label = next(iter(dataloaders['train']))
print(img.size(), label.size())
fig = plt.figure(1, figsize=(16, 12))
grid = ImageGrid(fig, 111, nrows_ncols=(8, 8), axes_pad=0.05)    
for i in range(img.size()[0]):
    ax = grid[i]
    imshow(ax, img[i])

# CNN Model from scratch

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class
    def __init__(self):
        super(Net, self).__init__()
        ## Define layers of a CNN
        
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=(3, 3), stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            
            nn.Conv2d(32, 64, kernel_size=(3, 3), stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            
            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            
            nn.Conv2d(128, 64, kernel_size=(1, 1), stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            
            nn.Conv2d(64, 32, kernel_size=(1, 1), stride=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))              
        )
        
        self.linear = nn.Sequential(
            nn.Linear(32*7*7, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 120),
        )
#         # Convolution layers
#         self.conv1 = nn.Conv2d(3, 32, 3, padding = 1)
#         self.conv2 = nn.Conv2d(32, 64, 3, padding = 1)
#         self.conv3 = nn.Conv2d(64, 128, 3, padding = 1)
        
#         # Max pooling layer (divides image size by 2)
#         self.pool = nn.MaxPool2d(2, 2)
        
#         # Fully connected layers
#         self.fc1 = nn.Linear(128 * 28 * 28, 500)
#         self.fc2 = nn.Linear(500, 120)
        
#         # Dropout
#         self.dropout = nn.Dropout(0.3)
        
        
    def forward(self, x):
#         ## Define forward behavior
        
#         # Sequence of convolutional and max pooling layers
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = self.pool(F.relu(self.conv3(x)))
#         # Flatten image input
#         x = x.view(-1, 128 * 28 * 28)
#         # Dropout layer
#         x = self.dropout(x)
#         # 1st hidden layer, with relu activation function
#         x = F.relu(self.fc1(x))
#         # Dropout layer
#         x = self.dropout(x)
#         # 2nd hidden layer
#         x = self.fc2(x)
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
model_scratch.cuda()

I used three convolutional layers with relu activations which are followed by maxpool layers. Also, used two fully connected layers. Between fully connected layers, dropout technique with probability = 0.25 is used to avoid the overfitting.

In [None]:
!pip install torchsummary

In [None]:
#let's see the model
from torchsummary import summary
summary(model_scratch, input_size=(3, 224, 224))

the next code cell to specify a [loss function](http://pytorch.org/docs/stable/nn.html#loss-functions) and [optimizer](http://pytorch.org/docs/stable/optim.html).  

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_scratch.parameters(), lr=0.1, momentum = 0.9)

In [None]:
# 导入SummaryWriter
from torch.utils.tensorboard import SummaryWriter

# 添加tensorboard
writer = SummaryWriter("./logs_dogstrain")

In [None]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    # 设置训练网络的一些参数
    # 记录训练的次数
    total_train_step = 0
    # 记录测试的次数
    total_valid_step = 0
    
    for epoch in range(1, n_epochs+1):
        print("----------第{}轮训练开始-------------".format(epoch))
        # initialize variables to monitor training and validation loss
#         train_loss = 0.0
#         valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
#             train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            total_train_step += 1
            if (batch_idx+1) % 50 == 0:
                print('Epoch: %d \tBatch: %d \tTraining Loss: %.6f' %(epoch, batch_idx + 1, loss.item()))
                writer.add_scalar("train_loss", loss.item(), total_train_step)
                
        ######################    
        # validate the model #
        ######################
        model.eval()
        total_valid_loss = 0.0
        total_accuracy = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(loaders['valid']):
                # move to GPU
                data, target = data.cuda(), target.cuda()
                ## update the average validation loss
                output = model(data)
                loss = criterion(output, target)
#                 valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
                total_valid_loss += loss.item()
                accuracy = (output.argmax(1) == target).sum().item()
                total_accuracy += accuracy

            # print training/validation statistics 
#             print('Epoch: {} \tTraining Loss: {:.4f} \tValidation Loss: {:.4f}'.format(
#                 epoch, 
#                 train_loss,
#                 valid_loss
#                 ))
        
            ## TODO: save the model if validation loss has decreased
            if total_valid_loss < valid_loss_min:
                torch.save(model.state_dict(), save_path)
                print('BOOM! Validation loss decreased ({:.4f} --> {:.4f}).  Saving model...'.format(valid_loss_min,total_valid_loss))
                valid_loss_min = total_valid_loss    
        print("整体开发集上的loss:{}".format(total_valid_loss))
        print("整体开发集上的正确率：{}".format(total_accuracy / valid.shape[0]))
        writer.add_scalar("valid_loss", total_valid_loss, total_valid_step)
        writer.add_scalar("valid_accuracy", total_accuracy / valid.shape[0], total_valid_step)
        total_valid_step += 1

    # return trained model
    return model

# train the model
# model_scratch = train(20, dataloaders, model_scratch, optimizer, 
#                       criterion, use_cuda, 'model_scratch.pth')

# load the model that got the best validation accuracy
# model_scratch.load_state_dict(torch.load('model_scratch.pth'))

In [None]:
resnet50 = train(20, dataloaders, resnet50, optimizer, criterion, use_cuda, 'model_resnet50.pth')

as you can see it has a pretty big loss value. Training a model from scratch and getting good loss can be hard with epochs like 10. So let's move to transfer learning models which are pretrained. 

In [None]:
resnet50 = models.resnet50(pretrained=True)
print(resnet50)
summary(resnet50, input_size=(3, 256, 256),batch_size=-1, device='cuda')

In [None]:
##  Specify model architecture 
model_transfer = models.resnet50(pretrained=True)

# count = 0
# Freeze training for all "features" layers
for param in model_transfer.parameters():
    param.requires_grad = False
    print(param.shape)
#     if count == 0:
#         print(param)
    
# replace the last fully connected layer with a Linnear layer 133 output
in_features = model_transfer.fc.in_features
model_transfer.fc = nn.Linear(in_features, 120)

if use_cuda:
    model_transfer = model_transfer.cuda()

In [None]:
# filter函数 过滤是False的，保留是True的项
criterion_transfer = nn.CrossEntropyLoss()
model_transfer_grad_paramaters = filter(lambda p: p.requires_grad, model_transfer.parameters())
optimizer_transfer = torch.optim.SGD(model_transfer_grad_paramaters, lr=0.01)

In [None]:
# 测试程序
for param in model_transfer.parameters():
    print(param.requires_grad, param.shape)

In [None]:
for param in model_transfer_grad_paramaters:
    print(param.requires_grad, param.shape)

In [None]:
n_epochs = 20
# train the model
model_transfer =  train(n_epochs, dataloaders, model_transfer, optimizer_transfer, criterion_transfer, use_cuda, 'model_transfer.pt')


# SubmissionAdam

In [None]:
submission = pd.read_csv(PATH+'/sample_submission.csv')
output = pd.DataFrame(index=submission.index, columns=submission.keys() )
output.head()
output['id'] = submission['id']
submission['target'] =  [0] * len(submission)

#will do this part later :3 

In [6]:
submission = pd.read_csv(PATH + './sample_submission.csv')

In [12]:
submission.columns[1:]

In [None]:
# 测试程序
for param in model_transfer.parameters():
    param.requires_grad = True
optimizer_all = torch.optim.SGD(model_transfer.parameters(), lr=0.01)
# optimizer_transfer = torch.optim.SGD(model_transfer_grad_paramaters, lr=0.01)
n_epochs = 10
# train the model
model_transfer =  train(n_epochs, dataloaders, model_transfer, optimizer_all, criterion_transfer, use_cuda, 'model_transfer.pth')

# 测试代码
# print(submission.head())

# print(output.head())



In [None]:
count = 0
for param in model_transfer.parameters():
    print(param.requires_grad, param.shape)
    if count == 0:
        print(param)