# import 

In [19]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
from glob import glob
import re
from matplotlib import pyplot as plt
%matplotlib inline

os.environ["CUDA_VISIBLE_DEVICES"]='2' 

from torchsummary import summary
import torchvision.models as models
import torch.nn.functional as F

# data

In [3]:
def get_paths_and_labels(img_type='training', isLbael=True):
    base_dir = '/workdir/home/feynman52/NTU-ML2020/hw3-Food-Classification-by-CNN/datasets'
    paths = sorted(glob(os.path.join(base_dir, img_type, '*')))[:]
    
    if isLbael==True: 
        Y = [int(re.search('/(.{1,2})_', path).group(1)) for path in paths]
        return paths, Y
    else:
        return paths

In [4]:
x_train_paths, y_train = get_paths_and_labels(img_type='training', isLbael=True)
x_valid_paths, y_valid = get_paths_and_labels(img_type='validation', isLbael=True)
x_test_paths = get_paths_and_labels(img_type='testing', isLbael=False)

len(x_train_paths), len(y_train), len(x_valid_paths), len(y_valid), len(x_test_paths)


(9866, 9866, 3430, 3430, 3347)

In [5]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomRotation(15), 
    transforms.ToTensor(), 
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])

In [6]:
class ImgDataset(Dataset):
    def __init__(self, img_paths, labels=None, transform=None):
        self.img_paths = img_paths
        
        self.labels = labels
        if self.labels != None:
            self.labels = torch.LongTensor(labels) ###
            
        self.transform = transform
        
    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, index):
        img_path = self.img_paths[index]
        img = cv2.imread(img_path)
        img = cv2.resize(img, (128, 128))
        
        if self.transform!=None: img = self.transform(img)
            
        if self.labels==None: 
            return img
        else:
            label = self.labels[index]
            return img, label
            

In [7]:
train_set = ImgDataset(x_train_paths, y_train, train_transform)
valid_set = ImgDataset(x_valid_paths, y_valid, test_transform)

In [8]:
x, y = train_set[-1]
x.shape, y

(torch.Size([3, 128, 128]), tensor(9))

In [9]:
batch_size = 50
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # shuffle select index
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)


In [10]:
g = iter(train_loader)
x_batch, y_batch = next(g)
x_batch.shape, y_batch.shape
x_batch.size(), y_batch.size()

(torch.Size([50, 3, 128, 128]), torch.Size([50]))

In [11]:
len(train_loader), len(valid_loader)

(198, 69)

# model

## teacher model

In [12]:
teacher_net = models.resnet18(pretrained=False, num_classes=11).cuda()

In [13]:
teacher_net.load_state_dict(torch.load(f'./teacher_resnet18.bin'))

<All keys matched successfully>

In [14]:
summary(teacher_net, input_size=(3, 128, 128))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
       BasicBlock-11           [-1, 64, 32, 32]               0
           Conv2d-12           [-1, 64, 32, 32]          36,864
      BatchNorm2d-13           [-1, 64, 32, 32]             128
             ReLU-14           [-1, 64,

## student model

In [15]:
class ClassifierPrune(nn.Module):
    def __init__(self):
        super(ClassifierPrune, self).__init__()
        
        self.Conv2d_prune = nn.Sequential(
        )
        
        self.cnn = nn.Sequential(
            #nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            self.make_prune_cnn(3, 64, 3, 1, 1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            #nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            self.make_prune_cnn(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            #nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            self.make_prune_cnn(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            #nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            self.make_prune_cnn(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            #nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            self.make_prune_cnn(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def make_prune_cnn(self, in_chs, out_chs, kernel_size, stride, padding):
        prune_cnn = nn.Sequential(
            nn.Conv2d(in_chs, in_chs, kernel_size, stride, padding, groups=in_chs),
            nn.Conv2d(in_chs, out_chs, 1)
        )
        return prune_cnn
        
    def forward(self, in_):
        x = self.cnn(in_)
        x = x.reshape(x.shape[0], -1) 
        out_ = self.fc(x)
        return out_

In [17]:
student_net = ClassifierPrune().cuda()
summary(student_net, input_size=(3, 128, 128))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 3, 128, 128]              30
            Conv2d-2         [-1, 64, 128, 128]             256
       BatchNorm2d-3         [-1, 64, 128, 128]             128
              ReLU-4         [-1, 64, 128, 128]               0
         MaxPool2d-5           [-1, 64, 64, 64]               0
            Conv2d-6           [-1, 64, 64, 64]             640
            Conv2d-7          [-1, 128, 64, 64]           8,320
       BatchNorm2d-8          [-1, 128, 64, 64]             256
              ReLU-9          [-1, 128, 64, 64]               0
        MaxPool2d-10          [-1, 128, 32, 32]               0
           Conv2d-11          [-1, 128, 32, 32]           1,280
           Conv2d-12          [-1, 256, 32, 32]          33,024
      BatchNorm2d-13          [-1, 256, 32, 32]             512
             ReLU-14          [-1, 256,

# train

## original model

In [20]:
def loss_fn_kd(student_outputs, labels, teacher_outputs, T=20, alpha=0.5):

    hard_loss = F.cross_entropy(student_outputs, labels) * (1. - alpha)

    soft_loss = (alpha * T * T) * nn.KLDivLoss(reduction='batchmean')(
        F.log_softmax(student_outputs/T, dim=1),
        F.softmax(teacher_outputs/T, dim=1))
    return hard_loss + soft_loss

In [None]:
print('train')
alpha=0.5
epochs = 10
optimizer = torch.optim.AdamW(student_net.parameters(), lr=1e-3)

for epoch in range(epochs):
    # initialize time
    epoch_start_time = time.time()
    # initialize metric
    train_acc = 0.
    valid_acc = 0.
    train_loss = 0.
    valid_loss = 0.
    
    # -----------------------
    #  train 
    # -----------------------
    student_net.train() ###
    for (i, data) in enumerate(train_loader):
        x, y = data[0].cuda(), data[1].cuda()
        
        optimizer.zero_grad()
        
        # student
        y_hat_student = student_net(x)
        
        # teacher
        with torch.no_grad():
            y_hat_teacher = teacher_net(x)
        
        # loss
        batch_loss = loss_fn_kd(y_hat_student, y, y_hat_teacher, 20, alpha)
        batch_loss.backward()
        optimizer.step()
        
        # metric, acc, loss
        label_hat = np.argmax(y_hat_student.cpu().data.numpy(), axis=1)
        label = y.cpu().data.numpy()
        match = (label_hat==label)
        train_acc += sum(match)
        train_loss += batch_loss.item()
        

    
    # -----------------------
    #  valid 
    # -----------------------
    student_net.eval() ###
    with torch.no_grad(): ###
        for (i, data) in enumerate(valid_loader):
            x, y = data[0].cuda(), data[1].cuda()

            # student
            y_hat_student = student_net(x)
            
            # teacher
            with torch.no_grad():
                y_hat_teacher = teacher_net(x)

            # loss
            batch_loss = loss_fn_kd(y_hat_student, y, y_hat_teacher, 20, alpha)

            # metric
            label_hat = np.argmax(y_hat_student.cpu().data.numpy(), axis=1)
            label = y.cpu().data.numpy()
            match = (label_hat==label)
            valid_acc += sum(match)

            valid_loss += batch_loss.item()
    
    
    # -----------------------
    #  progress 
    # -----------------------
    epoch += 1
    t = time.time() - epoch_start_time
    train_loss /= len(train_set)
    valid_loss /= len(valid_set)
    train_acc /= len(train_set)
    valid_acc /= len(valid_set)

    print('epoch = %d, time = %d, train_loss = %.3f, train_acc = %.2f, valid_loss = %.3f, valid_acc = %.2f'%(
        epoch, t, train_loss, train_acc, valid_loss, valid_acc))


train
