In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pickle
import skimage
import skimage.io
import skimage.feature
import pandas as pd
% matplotlib inline

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

from tensorboardX import SummaryWriter 

%env CUDA_VISIBLE_DEVICES=5

env: CUDA_VISIBLE_DEVICES=5


In [2]:
# prepare dataset
train_img_folder = "../dlcv_final_2_dataset/train/"
train_img_path = sorted(os.listdir(train_img_folder))
train_X_img = [skimage.io.imread(os.path.join(train_img_folder, path))
              for path in train_img_path]
with open("../dlcv_final_2_dataset/train_id.txt","r") as f:
    train_y = f.readlines()
    train_y = [line.strip().split(" ")[1] for line in train_y]
    
    
valid_img_folder = "../dlcv_final_2_dataset/val/"
valid_img_path = sorted(os.listdir(valid_img_folder))
valid_X_img = [skimage.io.imread(os.path.join(valid_img_folder, path))
              for path in valid_img_path]

with open("../dlcv_final_2_dataset/val_id.txt","r") as f:
    valid_y = f.readlines()
    valid_y = [line.strip().split(" ")[1] for line in valid_y]

    
test_img_folder = "../dlcv_final_2_dataset/test/"
test_img_path = sorted(os.listdir(test_img_folder))
test_X_img = [skimage.io.imread(os.path.join(test_img_folder, path))
              for path in test_img_path]

with open("../features/train_y.pkl", "rb") as f:
    train_y = pickle.load(f)
    

with open("../features/valid_y.pkl", "rb") as f:
    valid_y = pickle.load(f)


In [3]:
def normalize(image):
    '''
    normalize for pre-defined model input
    '''
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform_input = transforms.Compose([
             transforms.ToPILImage(),
            transforms.Pad((23,3), fill=0, padding_mode='constant'),
#              transforms.CenterCrop((120,90)),
    #         transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
#             normalize
        ])
    return transform_input(image)

def normalize_flip_aug(image):
    '''
    normalize for pre-defined model input
    '''
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform_input = transforms.Compose([
             transforms.ToPILImage(),
            transforms.Pad((23,3), fill=0, padding_mode='constant'),
#              transforms.CenterCrop((120,90)),
            transforms.RandomHorizontalFlip(1),
            transforms.ToTensor(),
#             normalize
        ])
    return transform_input(image)

In [4]:
train_X = [normalize(img) for img in train_X_img] + [normalize_flip_aug(img) for img in train_X_img]
train_X = torch.stack(train_X)

valid_X = [normalize(img) for img in valid_X_img] + [normalize_flip_aug(img) for img in valid_X_img]
valid_X = torch.stack(valid_X)

test_X = [normalize(img) for img in test_X_img] + [normalize_flip_aug(img) for img in test_X_img]
test_X = torch.stack(test_X)

In [5]:
train_y = train_y + train_y
valid_y = valid_y + valid_y

In [6]:
label_encoder = {}
label_set = []
index = 0
for i in train_y:
    if i not in label_set:
        label_set.append(i)
        label_encoder[i] = index
        index += 1
output_size = 2360

## build model

In [7]:
class DenseNet121(nn.Module):

    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = torchvision.models.densenet121(pretrained=False)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),
#             nn.Softmax()
        )

    def forward(self, x):
        x = self.densenet121(x)
        return x

In [8]:
model = DenseNet121(out_size=output_size).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001,betas=(0.5,0.999))
BATCH_SIZE = 64
loss_function = nn.CrossEntropyLoss().cuda()

  nn.init.kaiming_normal(m.weight.data)


In [None]:
max_accuracy = 0
model.train()
training_loss_list = []
validation_acc_list = []

writer = SummaryWriter(log_dir="../log/densenet_flip_01N")
# iteration = 0
for epoch in range(400,500):
    print("Epoch:", epoch+1)
    CE_loss = 0.0
    total_length = len(train_X)
    # shuffle
    perm_index = np.random.permutation(total_length)
    train_X_sfl = [train_X[i] for i in perm_index]
    train_y_sfl = [train_y[i] for i in perm_index]

    # construct training batch
    for index in range(0,total_length ,BATCH_SIZE):
        iteration+=1
        if index+BATCH_SIZE > total_length:
            break
            
        # zero the parameter gradients
        optimizer.zero_grad()
        input_X = torch.stack(train_X_sfl[index:index+BATCH_SIZE])
        input_y = train_y_sfl[index:index+BATCH_SIZE]
        input_y = [label_encoder[y] for y in input_y]
        input_y = torch.tensor(input_y).type(torch.LongTensor)
        # use GPU
        # forward + backward + optimize
        output = model(input_X.cuda())
        # compute loss for each sample in training data
        loss = loss_function(output, input_y.cuda())
        loss.backward()
        optimizer.step()
        CE_loss += loss.cpu().data.numpy()
        writer.add_scalar('loss', loss.data[0], iteration)
    print("training loss",CE_loss)
    training_loss_list.append(CE_loss)
    # validation
    same_difference = []
    with torch.no_grad():
        model.eval()
        valid_output = []
        valid_y_list = []
        for X, y in zip(valid_X, valid_y):
            input_valid_X = X.unsqueeze(0)
            output = model(input_valid_X.cuda(), )
            prediction = torch.argmax(torch.squeeze(output.cpu())).data.numpy()
            valid_gt = label_encoder[y]
            same_difference.append(prediction==valid_gt)

        accuracy = np.mean(same_difference)
        validation_acc_list.append(accuracy)
        print("validation accuracy: ",accuracy)
        writer.add_scalar('accuracy', accuracy,  epoch+1)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
#         torch.save(model.state_dict(), "../models/RNN_seq2seq_model.pkt")
    model.train()

Epoch: 401




training loss 2.6868214337155223
validation accuracy:  0.6275828595201776
Epoch: 402
training loss 2.39652394130826
validation accuracy:  0.6780612952433782
Epoch: 403
training loss 1.9578396053984761
validation accuracy:  0.6879073637498266
Epoch: 404
training loss 3.68998159840703
validation accuracy:  0.6870753016225212
Epoch: 405
training loss 2.1343359649181366
validation accuracy:  0.682429621411732
Epoch: 406
training loss 3.037007248029113
validation accuracy:  0.6757037858826792
Epoch: 407
training loss 2.1978342812508345
validation accuracy:  0.691790320343919
Epoch: 408
training loss 3.196839727461338
validation accuracy:  0.6817362363056442
Epoch: 409
training loss 2.6039224565029144
validation accuracy:  0.6865205935376508
Epoch: 410
training loss 2.881478387862444
validation accuracy:  0.6825682984329496
Epoch: 411
training loss 2.4232459645718336
validation accuracy:  0.6797254194979891
Epoch: 412
training loss 2.448295058682561
validation accuracy:  0.6899181805574817
E

In [None]:
with torch.no_grad():
        model.eval()
        test_output = []
        for X in test_X:
            input_test_X = X.unsqueeze(0)
            output = model(input_test_X.cuda())
            prediction = int(torch.argmax(torch.squeeze(output.cpu())).data.numpy())
            test_output.append(label_encoder_inv[prediction])

In [None]:
label_encoder_inv ={v: k for k, v in label_encoder.items()}

In [None]:
# load SampleSubmission
submission = pd.read_csv("../SampleSubmission.csv")
submission["ans"] = test_output

In [None]:
submission.to_csv("test_output.csv",index=False)