In [1]:
import os
import random
import glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torchvision.models as models
from PIL import Image

## Build Own Model:

In [22]:
def load_data_valid(img_path, label_path):
    train_image = sorted(glob.glob(os.path.join(img_path, '*.jpg')))
    train_label = pd.read_csv(label_path)
    train_label = train_label.iloc[:,1].values.tolist()
    
    train_data = list(zip(train_image, train_label))
    random.seed(42)
    random.shuffle(train_data)
    
    train_set = train_data[:20000]
    valid_set = train_data[20000:]
    
    return train_set, valid_set

In [3]:
class hw3_dataset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img = Image.open(self.data[idx][0])
        img = self.transform(img)
        label = self.data[idx][1]
        return img, label
    
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, padding=2),
            nn.LeakyReLU(negative_slope=0.05),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.05),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.05),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.05),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(128*3*3, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 7)
        )
        
    def forward(self, x):
        #image size: (48,48)
        x = self.conv1(x) #(24,24)
        x = self.conv2(x) #(12,12)
        x = self.conv3(x) #(6,6)
        x = self.conv4(x) #(3,3)
        x = x.view(-1, 128*3*3)
        x = self.fc(x)
        return x

In [4]:
if __name__ == '__main__':
    use_gpu = torch.cuda.is_available()
    train_set, valid_set = load_data('data/train_img/', 'data/train.csv')
    
    # transform to tensor, data augmentation
    transform = transforms.Compose([
        transforms.RandomAffine(15, translate=(0.1,0.1), scale=(0.9,1.1), shear=10, fillcolor=0),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        #transforms.Normalize([mean], [std], inplace=False)
    ])
    
    train_dataset = hw3_dataset(train_set, transform)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    
    valid_dataset = hw3_dataset(valid_set, transform)
    valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
    
    model = Net()
    if use_gpu:
        model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#     optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    loss_fn = nn.CrossEntropyLoss()
    
    num_epoch = 10
    for epoch in range(num_epoch):
        model.train()
        train_loss = []
        train_acc = []
        for idx, (img, label) in enumerate(train_loader):
            if use_gpu:
                img = img.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            output = model(img)
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()
            
            predict = torch.max(output, 1)[1]
            acc = np.mean((label == predict).cpu().numpy())
            train_acc.append(acc)
            train_loss.append(loss.item())
        print("Epoch: {}, train Loss: {:.4f}, train Acc: {:.4f}".format(epoch + 1, np.mean(train_loss), np.mean(train_acc)))
            
    model.eval()
    with torch.no_grad():
        valid_loss = []
        valid_acc = []
        for idx, (img, label) in enumerate(valid_loader):
            if use_gpu:
                img = img.cuda()
                label = label.cuda()
            output = model(img)
            loss = loss_fn(output, label)
            predict = torch.max(output, 1)[1]
            acc = np.mean((label == predict).cpu().numpy())
            valid_loss.append(loss.item())
            valid_acc.append(acc)
        print("Epoch: {}, valid Loss: {:.4f}, valid Acc: {:.4f}".format(epoch + 1, np.mean(valid_loss), np.mean(valid_acc)))
    
    if np.mean(train_acc) > 0.9:
        checkpoint_path = 'model_{}.pth'.format(epoch+1)
        torch.save(model.state_dict(), checkpoint_path)
        print('model saved to %s' % checkpoint_path)

Epoch: 1, train Loss: 1.7347, train Acc: 0.3272
Epoch: 2, train Loss: 1.4530, train Acc: 0.4357
Epoch: 3, train Loss: 1.3409, train Acc: 0.4888
Epoch: 4, train Loss: 1.2817, train Acc: 0.5138
Epoch: 5, train Loss: 1.2324, train Acc: 0.5278
Epoch: 6, train Loss: 1.2092, train Acc: 0.5393
Epoch: 7, train Loss: 1.1764, train Acc: 0.5543
Epoch: 8, train Loss: 1.1588, train Acc: 0.5568
Epoch: 9, train Loss: 1.1387, train Acc: 0.5691
Epoch: 10, train Loss: 1.1273, train Acc: 0.5706
Epoch: 10, valid Loss: 1.1884, valid Acc: 0.5514


## Use Pretrained Model:

In [2]:
models.resnet50()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [16]:
class hw3_dataset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        img = Image.open(self.data[idx][0]).convert('RGB')
        img = self.transform(img)
        label = self.data[idx][1]
        return img, label

class Resnet18(nn.Module):
    def __init__(self):
        super(Resnet18, self).__init__()
        self.resnet = nn.Sequential(*list(models.resnet18(pretrained=True).children())[:-1])
        self.fc = nn.Linear(512,7)
    def forward(self, x):
        x = self.resnet(x)
        x = x.view(-1, 1*1*512)
        x = self.fc(x)
        
        return x

In [41]:
def load_data(img_path, label_path = None):
    images = sorted(glob.glob(os.path.join(img_path, '*.jpg')))
    if label_path:
        labels = pd.read_csv(label_path)
        labels = labels.iloc[:,1].values.tolist()
    else:
        labels = [0 for x in range(len(images))]
    
    data = list(zip(images, labels))
    if label_path:
        random.seed(42)
        random.shuffle(data)
    
    return data

In [43]:
if __name__ == '__main__':
    use_gpu = torch.cuda.is_available()
#     train_set, valid_set = load_data('data/train_img/', 'data/train.csv')
    train_set = load_data('data/train_img/', 'data/train.csv')
    
    transform = transforms.Compose([
#         transforms.RandomAffine(15, translate=(0.1,0.1), scale=(0.9,1.1), shear=10, fillcolor=0),
#         transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5], inplace=False)
    ])
    
    train_dataset = hw3_dataset(train_set, transform)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
#     valid_dataset = hw3_dataset(valid_set, transform)
#     valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
    
    model = Resnet18()
    if use_gpu:
        model.cuda()
        
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.CrossEntropyLoss()
    num_epoch = 50
    
    for epoch in range(num_epoch):
        model.train()
        train_loss = []
        train_acc = []
        for idx, (img, label) in enumerate(train_loader):
            if use_gpu:
                img = img.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            output = model(img)
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()
            
            predict = torch.max(output, 1)[1]
            acc = np.mean((label == predict).cpu().numpy())
            train_acc.append(acc)
            train_loss.append(loss.item())
        print("Epoch: {}, train Loss: {:.4f}, train Acc: {:.4f}".format(epoch + 1, np.mean(train_loss), np.mean(train_acc)))

        if epoch % 10 == 9:
            checkpoint_path = 'models/resnet18_model_{}.pth'.format(epoch+1)
            torch.save(model.state_dict(), checkpoint_path)
            print('model saved to %s' % checkpoint_path)

Epoch: 1, train Loss: 1.3453, train Acc: 0.4907
Epoch: 2, train Loss: 1.0785, train Acc: 0.5980
Epoch: 3, train Loss: 0.9448, train Acc: 0.6507
Epoch: 4, train Loss: 0.8210, train Acc: 0.6974
Epoch: 5, train Loss: 0.6900, train Acc: 0.7458
Epoch: 6, train Loss: 0.5726, train Acc: 0.7937
Epoch: 7, train Loss: 0.4529, train Acc: 0.8407
Epoch: 8, train Loss: 0.3481, train Acc: 0.8781
Epoch: 9, train Loss: 0.2772, train Acc: 0.9033
Epoch: 10, train Loss: 0.2193, train Acc: 0.9238
model saved to models/resnet18_model_10.pth
Epoch: 11, train Loss: 0.1788, train Acc: 0.9387
Epoch: 12, train Loss: 0.1448, train Acc: 0.9503
Epoch: 13, train Loss: 0.1252, train Acc: 0.9576
Epoch: 14, train Loss: 0.1366, train Acc: 0.9530
Epoch: 15, train Loss: 0.1025, train Acc: 0.9662
Epoch: 16, train Loss: 0.0994, train Acc: 0.9670
Epoch: 17, train Loss: 0.0917, train Acc: 0.9683
Epoch: 18, train Loss: 0.0902, train Acc: 0.9702
Epoch: 19, train Loss: 0.0760, train Acc: 0.9754
Epoch: 20, train Loss: 0.0826, tra

KeyboardInterrupt: 

In [44]:
# testing
def write_ans(ans, ansfile):
    print("Writing answers to %s" % ansfile)
    import csv
    with open(ansfile, "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["id", "label"])
        for i in range(len(ans)):
            writer.writerow([i, ans[i]])

if __name__ == '__main__':
#     use_gpu = torch.cuda.is_available()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    test_set = load_data('data/test_img/')
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5], inplace=False)
    ])
    test_dataset = hw3_dataset(test_set, transform)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    
    model = Resnet18()
    model.load_state_dict(torch.load('models/resnet18_model_40.pth'))
#     if use_gpu:
#         model.cuda()
    model.to(device)
    
    model.eval()
    results = []
    with torch.no_grad():
        for idx, (imgs, _) in enumerate(test_loader):
#             if use_gpu:
#                 imgs.cuda()
            imgs = imgs.to(device)
            outputs = model(imgs)
            predict = torch.max(outputs, 1)[1]
            results += predict.tolist()
    write_ans(results, 'results/ans-resnet18_model_40.csv')
    

Writing answers to results/ans-resnet18_model_40.csv
