### Overview - CNN Image Recognition
Recognizing food types:
Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, Vegetable/Fruit

1. Readfile (training / validation / testing)
2. Augementation / dataset & dataloader
3. Model pipeline
4. Model training
5. Model training (combine traing & val set)
6. Testing recognition

In [3]:
# Test MPS (GPU switch) on MAC
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [1]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time

#Read image function
def readfile(path, label):
#    '''return np array x & y'''
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x, y
    else:
        return x

#Read data
workspace_dir = './food'
print('Read data')
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print('Size of traing data: {}'.format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, 'validation'), True)
print('Size of validation data: {}'.format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, 'testing'), False)
print('Size of testing data: {}'.format(len(test_x)))

#Augmentation / Data transforming
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    ])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    ])   

#Dataloader / Dataset
class ImgDataset():
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

batch_size = 16
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

#Model
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
# torch.nn.MaxPool2d(kernel_size, stride, padding)
# input 維度 [3, 128, 128]

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), #[64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), #[64, 64, 64]
            
            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), #[128, 32, 32]
            
            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), #[256, 16, 16]
            
            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), #[512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), #[512, 4, 4]
            )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11),
            )
        
    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)



Read data
Size of traing data: 9866
Size of validation data: 3430
Size of testing data: 3347


In [2]:

#Tranining

# For MAC M2
model = Classifier()
mps_device = torch.device("mps")
model= nn.DataParallel(model)
model.to(mps_device)

loss = nn.CrossEntropyLoss() #classification task
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #Optimizer by Adam
num_epoch = 10

for epoch in range(num_epoch):
    start_time = time.time()
    train_acc = 0
    train_loss = 0
    val_acc = 0
    val_loss = 0
    
    model.train() #train mode on, be able to dropout neurons
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() #Dump gradients in model
        train_pred = model(data[0].to(mps_device))
        batch_loss = loss(train_pred, data[1].to(mps_device)) #pred & label must be either GPU or CPU
        batch_loss.backward()
        optimizer.step()
        
        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1)==data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].to(mps_device))
            batch_loss = loss(val_pred, data[1].to(mps_device))
            
            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
             (epoch + 1, num_epoch, time.time()-start_time, \
              train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))
#        print('[{}/{}], time:{}sec(s), train_acc:{}, loss:{}; val_acc:{}, loss:{}'.format(epoch+1, num_epoch, \
#        time.time()-start_time, train_acc/train_set.__len__(), train_loss/train_set.__len__(), \
#        val_acc/val_set.__len__(), val_loss/val_set.__len__())

[001/010] 76.41 sec(s) Train Acc: 0.245996 Loss: 0.133276 | Val Acc: 0.306122 loss: 0.123129
[002/010] 74.68 sec(s) Train Acc: 0.314413 Loss: 0.120558 | Val Acc: 0.285423 loss: 0.134926
[003/010] 74.60 sec(s) Train Acc: 0.371376 Loss: 0.111338 | Val Acc: 0.361224 loss: 0.116603
[004/010] 74.09 sec(s) Train Acc: 0.419724 Loss: 0.103544 | Val Acc: 0.372303 loss: 0.121082
[005/010] 74.18 sec(s) Train Acc: 0.459051 Loss: 0.097178 | Val Acc: 0.469096 loss: 0.097328
[006/010] 74.43 sec(s) Train Acc: 0.487432 Loss: 0.091832 | Val Acc: 0.494752 loss: 0.096134
[007/010] 73.71 sec(s) Train Acc: 0.517028 Loss: 0.086696 | Val Acc: 0.530612 loss: 0.086295
[008/010] 73.40 sec(s) Train Acc: 0.544902 Loss: 0.082444 | Val Acc: 0.491545 loss: 0.096570
[009/010] 73.31 sec(s) Train Acc: 0.564160 Loss: 0.079756 | Val Acc: 0.560641 loss: 0.082161
[010/010] 75.08 sec(s) Train Acc: 0.582810 Loss: 0.074572 | Val Acc: 0.563557 loss: 0.081449


In [3]:
# Combine train set & val set to have a more robust model

train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [10]:
model_best = Classifier()
model_best
mps_device = torch.device("mps")
model_best = nn.DataParallel(model_best)
model_best.to(mps_device)

loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 10

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].to(mps_device))
        batch_loss = loss(train_pred, data[1].to(mps_device))
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/010] 91.29 sec(s) Train Acc: 0.269028 Loss: 0.129425
[002/010] 91.77 sec(s) Train Acc: 0.347699 Loss: 0.115679
[003/010] 90.72 sec(s) Train Acc: 0.397939 Loss: 0.107459
[004/010] 90.38 sec(s) Train Acc: 0.448330 Loss: 0.099164
[005/010] 95.35 sec(s) Train Acc: 0.489847 Loss: 0.091481
[006/010] 90.63 sec(s) Train Acc: 0.532717 Loss: 0.085090
[007/010] 91.23 sec(s) Train Acc: 0.553400 Loss: 0.080285
[008/010] 93.20 sec(s) Train Acc: 0.587771 Loss: 0.074901
[009/010] 92.85 sec(s) Train Acc: 0.616426 Loss: 0.069950
[010/010] 94.40 sec(s) Train Acc: 0.638011 Loss: 0.065411


In [11]:
# Predict test set

test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [13]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.to(mps_device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)

In [14]:
#將結果寫入 csv 檔
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))