In [2]:
import cv2
import numpy as np
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
from torchvision.transforms import transforms
import os
import time

## 利用 cv2 讀入照片並且存在numpy array中 

In [4]:
def readfile(path,label):
    #label 是一個boolean variable ，表示是否需要回傳Y值
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3) ,dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i , file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, : , : ] = cv2.resize(img, (128, 128))
        if label:
            y[i] = int(file.split('_')[0])
    
    if label:
        return x,y
    else:
        return x

In [5]:
# 分別將 test , train , valid 用 readfile讀入
workdir = './food-11'
print("Reading data")
train_x , train_y = readfile(os.path.join(workdir,"training"), True)
print("size of training data:{}".format(len(train_x)))
valid_x , valid_y = readfile(os.path.join(workdir,"validation"), True)
print("size of validation data:{}".format(len(valid_x)))
test_x = readfile(os.path.join(workdir,"testing"), False)
print("size of testing data:{}".format(len(test_x)))

Reading data
size of training data:9866
size of validation data:3430
size of testing data:3347


## 包裝成Dataset
- 使用DataLoader 來載入資料

In [6]:
# Training 時做 Augumentation
train_transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.RandomHorizontalFlip(),
  transforms.RandomRotation(15),
  transforms.ToTensor(),
])
# testing 不需要做 Augumentation
test_transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.ToTensor(),
])

class imageData(Dataset):
    def __init__(self, x, y=None, transforms=None):
        self.x = x
        self.y = y
        # label is required to be a LongTensor
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transforms = transforms
  
    def __len__(self):
        return len(self.x)
  
    def __getitem__(self, index):
        X = self.x[index]
        if self.transforms is not None:
            X = self.transforms(X)

        if self.y is not None:
            Y = self.y[index]
            return X,Y
        else:
            return X

In [7]:
batch_size = 64
train_set = imageData(train_x,train_y,train_transform)
valid_set = imageData(valid_x,valid_y,test_transform)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valid_set, batch_size=batch_size , shuffle=False)

In [8]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),     # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128,64,64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),     # [64, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256,32,32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),     # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [521,16,16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),     # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512,8,8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),     # [512, 4, 4]
            
        )

        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,11)
        )

    def forward(self,x):
        out = self.cnn(x)
        out = out.view(out.size()[0],-1)
        return self.fc(out)

## Training 

In [9]:
model = Classifier().cuda()
print(model)

Classifier(
  (cnn): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


## 清空顯卡內存 
```
torch.cuda.empty_cache() # 清空內存
```

In [10]:
torch.cuda.empty_cache() # 清空內存
loss = nn.CrossEntropyLoss() # 因為是classfication 所以用 cross entropy
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epoch= 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc =0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train()
    for i , data in enumerate(train_loader):
        optimizer.zero_grad() # 用optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 呼叫model forward參數去預測
        batch_loss = loss(train_pred , data[1].cuda()) # 計算loss 
        batch_loss.backward() # 利用back propagation 算出每個參數的 gradient
        optimizer.step()
        
        # np.argmax() 該函數回傳最大值的index 
        # 看有沒有與正確答案的index 一樣
        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy() ,axis =1 ) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred , data[1].cuda())
            
            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy() , axis =1 ) == data[1].numpy())
            val_loss += batch_loss.item()
        
        #將結果print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f Loss%3.6f' % \
             (epoch + 1 , num_epoch , time.time()- epoch_start_time, train_acc/train_set.__len__(),\
             train_loss/train_set.__len__(), val_acc/valid_set.__len__(), val_loss/valid_set.__len__()))

[001/030] 37.89 sec(s) Train Acc: 0.250456 Loss: 0.034093 | Val Acc: 0.327988 Loss0.030527
[002/030] 37.83 sec(s) Train Acc: 0.350193 Loss: 0.028700 | Val Acc: 0.319825 Loss0.031760
[003/030] 37.85 sec(s) Train Acc: 0.415366 Loss: 0.026426 | Val Acc: 0.354810 Loss0.028841
[004/030] 37.84 sec(s) Train Acc: 0.464727 Loss: 0.024119 | Val Acc: 0.379883 Loss0.029857
[005/030] 37.87 sec(s) Train Acc: 0.498176 Loss: 0.022730 | Val Acc: 0.494169 Loss0.023268
[006/030] 37.87 sec(s) Train Acc: 0.539935 Loss: 0.020962 | Val Acc: 0.488921 Loss0.025008
[007/030] 37.93 sec(s) Train Acc: 0.562639 Loss: 0.019905 | Val Acc: 0.553644 Loss0.020516
[008/030] 37.96 sec(s) Train Acc: 0.589702 Loss: 0.018516 | Val Acc: 0.430321 Loss0.030290
[009/030] 37.98 sec(s) Train Acc: 0.615447 Loss: 0.017291 | Val Acc: 0.547813 Loss0.021487
[010/030] 37.99 sec(s) Train Acc: 0.640381 Loss: 0.016233 | Val Acc: 0.517784 Loss0.023721
[011/030] 37.96 sec(s) Train Acc: 0.660349 Loss: 0.015379 | Val Acc: 0.583965 Loss0.020182

## 得到好的參數後，我們將 training set 與 validation set 共同訓練，讓資料變多 

In [11]:
train_val_x = np.concatenate((train_x, valid_x) , axis = 0)
train_val_y = np.concatenate((train_y, valid_y) , axis = 0)

train_val_set = imageData(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [12]:
# 清空顯卡內存
torch.cuda.empty_cache()
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_best.parameters(), lr = 0.001)
num_epoch = 30

for epoch in range(num_epoch):
    epoch_strat_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    
    model_best.train()
    for i,data in enumerate(train_val_loader):
        optimizer.zero_grad()
        pred = model_best(data[0].cuda())
        batch_loss = loss(pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()
        
        train_acc += np.sum(np.argmax(pred.cpu().data.numpy() , axis = 1) == data[1].numpy())
        train_loss += batch_loss.item()
        
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Train Loss: %3.6f' %\
         (epoch + 1, num_epoch , time.time()-epoch_strat_time , train_acc/train_val_set.__len__(),\
         train_loss/train_val_set.__len__()))

[001/030] 45.42 sec(s) Train Acc: 0.288658 Train Loss: 0.031960
[002/030] 45.30 sec(s) Train Acc: 0.396886 Train Loss: 0.027077
[003/030] 45.46 sec(s) Train Acc: 0.463899 Train Loss: 0.024252
[004/030] 45.42 sec(s) Train Acc: 0.514967 Train Loss: 0.022093
[005/030] 45.35 sec(s) Train Acc: 0.548812 Train Loss: 0.020365
[006/030] 45.33 sec(s) Train Acc: 0.580551 Train Loss: 0.018885
[007/030] 45.36 sec(s) Train Acc: 0.614170 Train Loss: 0.017547
[008/030] 45.43 sec(s) Train Acc: 0.636582 Train Loss: 0.016311
[009/030] 45.43 sec(s) Train Acc: 0.665162 Train Loss: 0.015172
[010/030] 45.42 sec(s) Train Acc: 0.683589 Train Loss: 0.014476
[011/030] 45.42 sec(s) Train Acc: 0.702016 Train Loss: 0.013485
[012/030] 45.37 sec(s) Train Acc: 0.719013 Train Loss: 0.012871
[013/030] 45.39 sec(s) Train Acc: 0.730445 Train Loss: 0.012158
[014/030] 45.44 sec(s) Train Acc: 0.748195 Train Loss: 0.011416
[015/030] 45.38 sec(s) Train Acc: 0.761658 Train Loss: 0.010841
[016/030] 45.43 sec(s) Train Acc: 0.7776

## Testing

In [13]:
test_set = imageData(test_x , transforms=test_transform)
test_loader = DataLoader(test_set, batch_size = batch_size ,shuffle= False)

In [16]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i , data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis = 1)
        
        for y in test_label:
            prediction.append(y)

In [20]:
# 將結果寫入csv file
with open('submission.csv','w') as f:
    f.write('Id,Category\n')
    for i , y in enumerate(prediction):
        f.write('{},{}\n'.format(i,y))