In [3]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
from glob import glob
import re
from matplotlib import pyplot as plt
%matplotlib inline

os.environ["CUDA_VISIBLE_DEVICES"]='2' 


# read data
- x_train_paths, y_train
- x_valid_paths, y_valid
- x_test_paths

In [4]:
def get_paths_and_labels(img_type='training', isLbael=True):
    base_dir = '/workdir/home/feynman52/NTU-ML2020/hw3-Food-Classification-by-CNN/datasets'
    paths = sorted(glob(os.path.join(base_dir, img_type, '*')))[:]
    
    if isLbael==True: 
        Y = [int(re.search('/(.{1,2})_', path).group(1)) for path in paths]
        return paths, Y
    else:
        return paths

In [5]:
x_train_paths, y_train = get_paths_and_labels(img_type='training', isLbael=True)
x_valid_paths, y_valid = get_paths_and_labels(img_type='validation', isLbael=True)
x_test_paths = get_paths_and_labels(img_type='testing', isLbael=False)

len(x_train_paths), len(y_train), len(x_valid_paths), len(y_valid), len(x_test_paths)


(9866, 9866, 3430, 3430, 3347)

# data

## transform

In [6]:
# training 時做 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])

### test
https://pytorch.org/docs/stable/torchvision/transforms.html

## dataset

In [7]:
class ImgDataset(Dataset):
    def __init__(self, img_paths, labels=None, transform=None):
        self.img_paths = img_paths
        
        self.labels = labels
        if self.labels != None:
            self.labels = torch.LongTensor(labels) ###
            
        self.transform = transform
        
    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, index):
        img_path = self.img_paths[index]
        img = cv2.imread(img_path)
        img = cv2.resize(img, (128, 128))
        
        if self.transform!=None: img = self.transform(img)
            
        if self.labels==None: 
            return img
        else:
            label = self.labels[index]
            return img, label
            

In [8]:
train_set = ImgDataset(x_train_paths, y_train, train_transform)
valid_set = ImgDataset(x_valid_paths, y_valid, test_transform)

In [9]:
x, y = train_set[10]

In [10]:
y

tensor(0)

## dataloader

In [8]:
batch_size = 50
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # shuffle select index
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)


In [9]:
g = iter(train_loader)
x_batch, y_batch = next(g)
x_batch.shape, y_batch.shape
x_batch.size(), y_batch.size()

(torch.Size([50, 3, 128, 128]), torch.Size([50]))

In [10]:
len(train_loader) # batch_num = round(9866//50)

198

In [11]:
len(valid_loader)

69

### test

# model

In [12]:
# import inspect
# f=(nn.BatchNorm2d)
# inspect.signature(f).parameters

In [13]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, in_):
        x = self.cnn(in_)
        print(x.shape)
        x = x.reshape(x.shape[0], -1) # x.shape = x.sise()
        out_ = self.fc(x)
        return out_

In [14]:
model = Classifier().cuda()

In [11]:
# y = model(x) 
# y = model.__call__(x)
# y = model.forward(x)

In [15]:
print(model)

Classifier(
  (cnn): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [16]:
from torchsummary import summary
summary(model, input_size=(3, 128, 128))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 128, 128]           1,792
       BatchNorm2d-2         [-1, 64, 128, 128]             128
              ReLU-3         [-1, 64, 128, 128]               0
         MaxPool2d-4           [-1, 64, 64, 64]               0
            Conv2d-5          [-1, 128, 64, 64]          73,856
       BatchNorm2d-6          [-1, 128, 64, 64]             256
              ReLU-7          [-1, 128, 64, 64]               0
         MaxPool2d-8          [-1, 128, 32, 32]               0
            Conv2d-9          [-1, 256, 32, 32]         295,168
      BatchNorm2d-10          [-1, 256, 32, 32]             512
             ReLU-11          [-1, 256, 32, 32]               0
        MaxPool2d-12          [-1, 256, 16, 16]               0
           Conv2d-13          [-1, 512, 16, 16]       1,180,160
      BatchNorm2d-14          [-1, 512,

# train

In [17]:
import inspect
inspect.signature(torch.optim.Adam).parameters


mappingproxy({'params': <Parameter "params">,
              'lr': <Parameter "lr=0.001">,
              'betas': <Parameter "betas=(0.9, 0.999)">,
              'eps': <Parameter "eps=1e-08">,
              'weight_decay': <Parameter "weight_decay=0">,
              'amsgrad': <Parameter "amsgrad=False">})

In [51]:
print('train')

model = Classifier().cuda()
epochs = 5

# initialize loss
loss = nn.CrossEntropyLoss()

# initialize update_weight
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

for epoch in range(epochs):
    # initialize time
    epoch_start_time = time.time()
    # initialize metric
    train_acc = 0.
    valid_acc = 0.
    train_loss = 0.
    valid_loss = 0.
    
    # -----------------------
    #  train 
    # -----------------------
    model.train() ###
    for (i, data) in enumerate(train_loader):
        x, y = data[0].cuda(), data[1].cuda()
        
        # reset gradient !!!
        optimizer.zero_grad()
        
        # y_hat, (-1,11)
        y_hat = model(x)
        
        # loss
        batch_loss = loss(y_hat, y)
        
        # gradient
        batch_loss.backward()
        
        # update_weight
        optimizer.step()
        
        # metric, acc, loss
        label_hat = np.argmax(y_hat.cpu().data.numpy(), axis=1)
        label = y.cpu().data.numpy()
        match = (label_hat==label)
        train_acc += sum(match)
        train_loss += batch_loss.item()
        

    
    # -----------------------
    #  valid 
    # -----------------------
    model.eval() ###
    with torch.no_grad(): ###
        for (i, data) in enumerate(valid_loader):
            x, y = data[0].cuda(), data[1].cuda()

            # y_hat
            y_hat = model(x)

            # loss
            batch_loss = loss(y_hat, y)

            # metric
            label_hat = np.argmax(y_hat.cpu().data.numpy(), axis=1)
            label = y.cpu().data.numpy()
            match = (label_hat==label)
            valid_acc += sum(match)

            valid_loss += batch_loss.item()
    
    
    # -----------------------
    #  progress 
    # -----------------------
    epoch += 1
    t = time.time() - epoch_start_time
    train_loss /= len(train_set)
    valid_loss /= len(valid_set)
    train_acc /= len(train_set)
    valid_acc /= len(valid_set)

    print('epoch = %d, time = %d, train_loss = %.3f, train_acc = %.2f, valid_loss = %.3f, valid_acc = %.2f'%(
        epoch, t, train_loss, train_acc, valid_loss, valid_acc))


train
epoch = 1, time = 88, train_loss = 0.043, train_acc = 0.26, valid_loss = 0.040, valid_acc = 0.30
epoch = 2, time = 87, train_loss = 0.036, train_acc = 0.37, valid_loss = 0.040, valid_acc = 0.30
epoch = 3, time = 85, train_loss = 0.033, train_acc = 0.43, valid_loss = 0.038, valid_acc = 0.36
epoch = 4, time = 86, train_loss = 0.030, train_acc = 0.48, valid_loss = 0.031, valid_acc = 0.47
epoch = 5, time = 84, train_loss = 0.028, train_acc = 0.52, valid_loss = 0.030, valid_acc = 0.49


# save model

In [30]:
# model_dict = model.state_dict()
# for key in model_dict:
#     val = model_dict[key]
#     print('%-30s %-30s'%(key, val.shape))

In [52]:
torch.save(model.state_dict(), './best_model.pt')

In [53]:
print(model.state_dict()['cnn.0.weight'][0,0,0,:])

tensor([-0.1079, -0.2099, -0.0213], device='cuda:0')


# load model

In [59]:
best_model = Classifier().cuda()
print(best_model.state_dict()['cnn.0.weight'][0,0,0,:])

best_model.load_state_dict(torch.load('./best_model.pt'))
print(best_model.state_dict()['cnn.0.weight'][0,0,0,:])

tensor([ 0.1091, -0.0359, -0.1072], device='cuda:0')
tensor([-0.1079, -0.2099, -0.0213], device='cuda:0')


# infer

In [60]:
test_set = ImgDataset(x_test_paths, y_train, test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [61]:
predict = []

best_model.eval()
with torch.no_grad():
    for (i, data) in enumerate(valid_loader):
        x = data[0].cuda()
        y_hat = model(x)
        y_hat = y_hat.cpu().data.numpy()
        predict.append(y_hat)

In [64]:
result = np.concatenate(predict, axis=0)
result_label = np.argmax(result, axis=1)

result.shape, result_label.shape

((3430, 11), (3430,))