In [None]:
#deep learning example for Covolutional Neural Network
#dataset url
#!wget https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/dataset/hotdog.zip
#!unzip hotdog.zip

In [None]:
#!pip install opencv-python

In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import numpy as np
import time
import cv2

In [2]:
class ImgClassifier(nn.Module):
    def __init__(self):
        super(ImgClassifier, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input dimensions [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),    #[64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        #[64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1),  #[128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        #[128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), #[256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        #[256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), #[512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        #[512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), #[512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        #[512, 4, 4]
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [3]:
#create instance of neural network
model = ImgClassifier()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = model.to(device)

#loss = nn.CrossEntropyLoss()
loss = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [4]:
def readfile(path):
    #read hotdog folder
    hotdog_dir = sorted(os.listdir(path+'/hotdog'))
    not_hotdog_dir = sorted(os.listdir(path+'/not-hotdog'))
    x = np.zeros((len(hotdog_dir)+len(not_hotdog_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(hotdog_dir)+len(not_hotdog_dir)), dtype=np.uint8)
    
    for i, file in enumerate(hotdog_dir):
        img = cv2.imread(os.path.join(path+'/hotdog', file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        y[i] = 1

    for i, file in enumerate(not_hotdog_dir):
        img = cv2.imread(os.path.join(path+'/not-hotdog', file))
        x[i+len(hotdog_dir), :, :] = cv2.resize(img,(128, 128))
        y[i+len(hotdog_dir)] = 0
    
    return x, y
             
#read training and testing data
print("Reading data")
train_x, train_y = readfile('./hotdog/train')
print("Size of training data = {}".format(len(train_x)))
test_x, test_y = readfile('./hotdog/test')
print("Size of testing data = {}".format(len(test_x)))

Reading data
Size of training data = 2000
Size of testing data = 800


In [5]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),])
test_transform = transforms.Compose([
    transforms.ToPILImage(),                      
    transforms.ToTensor(),])

class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [6]:
train_set = ImgDataset(train_x, train_y, train_transform)
test_set = ImgDataset(test_x, test_y, test_transform)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=16)

def evaluation(outputs, labels):
    outputs[outputs>=0.5] = 1
    outputs[outputs<0.5] = 0
    correct = torch.sum(torch.eq(outputs, labels)).item()
    return correct

In [7]:
#training
epochs = 30

for epoch in range(epochs):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    test_acc = 0.0
    test_loss = 0.0

    model.train()
    for i, data in enumerate(train_loader):
        inputs = data[0].to(device)
        labels = data[1].to(device, dtype=torch.float)
        
        optimizer.zero_grad()
        train_predicted = model.forward(inputs)
        train_predicted = train_predicted.squeeze()
        batch_loss = loss(train_predicted, labels)
        batch_loss.backward()
        optimizer.step()

        correct = evaluation(train_predicted, labels)
        train_acc += correct
        train_loss += batch_loss.item()
    
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_loader):
            inputs = data[0].to(device)
            labels = data[1].to(device, dtype=torch.float)
            
            test_predicted = model.forward(inputs)
            test_predicted = test_predicted.squeeze()
            batch_loss = loss(test_predicted, labels)

            correct = evaluation(test_predicted, labels)
            test_acc += correct
            test_loss += batch_loss.item()

        #print result for each epoch
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, epochs, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), test_acc/test_set.__len__(), test_loss/test_set.__len__()))

[001/030] 14.79 sec(s) Train Acc: 0.780000 Loss: 0.035036 | Val Acc: 0.828750 loss: 0.024481
[002/030] 9.18 sec(s) Train Acc: 0.830500 Loss: 0.024508 | Val Acc: 0.872500 loss: 0.021771
[003/030] 9.16 sec(s) Train Acc: 0.842500 Loss: 0.024029 | Val Acc: 0.875000 loss: 0.020952
[004/030] 9.17 sec(s) Train Acc: 0.863500 Loss: 0.022683 | Val Acc: 0.871250 loss: 0.022598
[005/030] 9.21 sec(s) Train Acc: 0.861000 Loss: 0.021379 | Val Acc: 0.857500 loss: 0.022445
[006/030] 9.71 sec(s) Train Acc: 0.869000 Loss: 0.020363 | Val Acc: 0.883750 loss: 0.017537
[007/030] 10.01 sec(s) Train Acc: 0.881500 Loss: 0.018063 | Val Acc: 0.887500 loss: 0.017079
[008/030] 10.60 sec(s) Train Acc: 0.883000 Loss: 0.018008 | Val Acc: 0.776250 loss: 0.034436
[009/030] 10.46 sec(s) Train Acc: 0.899000 Loss: 0.017258 | Val Acc: 0.875000 loss: 0.021093
[010/030] 10.95 sec(s) Train Acc: 0.890500 Loss: 0.017475 | Val Acc: 0.860000 loss: 0.021059
[011/030] 11.16 sec(s) Train Acc: 0.878000 Loss: 0.017938 | Val Acc: 0.8750

In [8]:
#model save and load
torch.save(model.state_dict(), './cnn_model.pt')
model = ImgClassifier()
model.load_state_dict(torch.load('./cnn_model.pt'))
model = model.to(device)

In [9]:
#generate final predictions
prediction = []

with torch.no_grad():
    model.eval()
    for i, data in enumerate(test_loader):
        inputs = data[0].to(device)
        labels = data[1].to(device)

        test_predicted = model.forward(inputs)
        test_predicted = test_predicted.squeeze()
        test_predicted[test_predicted>=0.5]=1
        test_predicted[test_predicted<0.5]=0
        test_label = test_predicted.cpu().data.numpy()
        for y in test_label:
            prediction.append(y)
        
with open("prediction.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))

In [10]:
#generate predictions for some new images

#read test_new folder
test_dir = sorted(os.listdir('./hotdog/test_new'))
test_new_x = np.zeros((len(test_dir), 128, 128, 3), dtype=np.uint8)

for i, file in enumerate(test_dir):
    img = cv2.imread(os.path.join('./hotdog/test_new', file))
    test_new_x[i, :, :] = cv2.resize(img,(128, 128))

test_new_set = ImgDataset(test_new_x, transform=test_transform)
test_new_loader = DataLoader(test_new_set, batch_size=8)

prediction = []

with torch.no_grad():
    model.eval()
    for i, data in enumerate(test_new_loader):
        inputs = data.to(device)

        test_predicted = model.forward(inputs)
        test_predicted = test_predicted.squeeze()
        print(test_predicted)
        test_predicted[test_predicted>=0.5]=1
        test_predicted[test_predicted<0.5]=0
        test_label = test_predicted.cpu().data.numpy()
        print(test_label)
        for y in test_label:
            prediction.append(y)
        
with open("prediction_new.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(test_dir[i], y))

tensor([3.9300e-05, 1.3235e-02, 8.3172e-01, 4.6780e-01, 1.0017e-01, 1.0156e-03,
        8.3702e-02, 4.0218e-06], device='cuda:0')
[0. 0. 1. 0. 0. 0. 0. 0.]
tensor([0.2384, 0.9728, 0.9912, 0.1810], device='cuda:0')
[0. 1. 1. 0.]
