In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import os
import pandas as pd
import copy
from PIL import Image
%matplotlib inline

In [2]:
tr = pd.read_csv('./sun397_train_lt.txt', header=None, sep=' ')

In [3]:
class_names = {}

In [4]:
for lab in tr[1].unique():
    temp = tr.loc[tr[1] == lab].iloc[0, 0]
    class_name = temp.split('/')[-2]
    class_names[lab] = class_name

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [6]:
#  device = torch.device('cpu')

In [44]:
LOG_DIR = './log'
DATALOADER_WORKERS = 4
LEARNING_RATE = 0.01
MOMENTUM = 0.9
EPOCHS = 60
BATCH_SIZE = 256
DISPLAY_STEP = 1
NUM_CLASSES = 397

if not os.path.isdir(LOG_DIR):
    os.makedirs(LOG_DIR)

In [8]:
class sun_dataset (torch.utils.data.Dataset):
    
    def __init__ (self, txt_file, transform=None):
        super().__init__()
        self.df = pd.read_csv(txt_file, header=None, sep=' ')
        self.transform = transform
        
    def __len__ (self):
        return len(self.df)
    
    def __getitem__ (self, idx):
        
        image = Image.open(self.df.iloc[idx, 0])
        label = self.df.iloc[idx, 1] - 1
#         image_dir = self.df.iloc[idx, 0]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

In [9]:
# transforms.RandomResizeCrop(224)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [10]:
datasets = {x: sun_dataset(txt_file='./sun397_%s_lt.txt' % x, transform=data_transforms[x]) for x in ['train', 'val', 'test']} 
dataloaders = {x: torch.utils.data.DataLoader(datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=DATALOADER_WORKERS) for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val', 'test']}

In [11]:
# train_set = sun_dataset(txt_file='./sun397_train_lt.txt', transform=data_transforms['train'])
# train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

# val_set = sun_dataset(txt_file='./sun397_val_lt.txt', transform=data_transforms['val_test'])
# val_loader = torch.utils.data.DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# test_set = sun_dataset(txt_file='./sun397_test_lt.txt', transform=data_transforms['val_test'])
# test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [12]:
# def plot_image (img_tensor, title=None):
    
#     img = img_tensor.numpy().transpose((1, 2, 0))
    
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     img = img * std + mean
    
#     img = np.clip(img, 0, 1)
    
#     plt.figure(figsize=(15, 10))
#     plt.imshow(img)
#     if title:
#         plt.title(title)
    

# images, labels = next(iter(dataloaders['train']))

# image_grid = torchvision.utils.make_grid(images)

# plot_image(image_grid, [class_names[l.item()] for l in labels ])

In [13]:
def train_model (model, loss_function, optimizer, scheduler, num_epochs, model_id=None):
    
    # Deep copy model weights
    best_model_weights = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    # Start training
    training_step = 0
    for epoch in range(num_epochs):
          
        # Loop over training phase and validation phase
        for phase in ['train', 'val']:
            
            # Set model modes and set scheduler
            if phase == 'train':
                scheduler.step()
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_correct = 0
            
            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                
                inputs, labels = inputs.to(device), labels.to(device)
                
                # Zero parameter gradients
                optimizer.zero_grad()
                
                # Forward
                # If on training phase, enable gradients
                with torch.set_grad_enabled(phase == 'train'):
                    
                    logits = model(inputs)
                    _, preds = torch.max(logits, 1)
                    loss = loss_function(logits, labels)
                    
                    # Backward if training
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        training_step += 1
                        
                        if training_step % DISPLAY_STEP == 0:
                            minibatch_loss = loss.item()
                            minibatch_acc = (preds == labels).sum().item() / BATCH_SIZE
                            print('Epoch: %d, Step: %5d, Minibatch_loss: %.3f, Minibatch_accuracy: %.3f' % (epoch, training_step, minibatch_loss, minibatch_acc))
                        
                # Record loss and correct predictions
                running_loss += loss.item() * inputs.shape[0]
                running_correct += (preds == labels).sum().item()
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_correct / dataset_sizes[phase]
            
            print('Epoch: %d, Phase: %s, Epoch_loss: %.3f, Epoch_accuracy: %.3f' % (epoch, phase, epoch_loss, epoch_acc))
            
            # Deep copy the best model weights
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = copy.deepcopy(model.state_dict())
                
    print()
    print('Training Complete.')
    print('Best validation accuracy: %.3f' % best_acc)
    
    # Load the best model weights
    model.load_state_dict(best_model_weights)
    
    # Save the best model
    model_states = {'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc,
                    'optimizer' : optimizer.state_dict()}
    
    torch.save(model_states, 'model_%s_checkpoint.pth.tar' % model_id)
    
    return model
        

In [45]:
# Load pretrained model
resnet = torchvision.models.resnet152(pretrained=True)
# Freeze all layers
for param in resnet.parameters():
    param.requires_grad = False

In [46]:
# Reset the fc layer
num_features = resnet.fc.in_features
resnet.fc = nn.Linear(num_features, NUM_CLASSES)

In [47]:
resnet = resnet.to(device)

In [48]:
# Loss function
loss_function = nn.CrossEntropyLoss()

# Optimizer only on the last fc layer
optimizer = optim.SGD(resnet.fc.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

# Decay LR by a factor of 0.1 every 30 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

In [49]:
resnet = train_model(model=resnet, loss_function=loss_function, optimizer=optimizer, scheduler=exp_lr_scheduler, num_epochs=EPOCHS, model_id='plain')

Epoch: 0, Step:     1, Minibatch_loss: 6.046, Minibatch_accuracy: 0.004
Epoch: 0, Step:     2, Minibatch_loss: 6.008, Minibatch_accuracy: 0.000
Epoch: 0, Step:     3, Minibatch_loss: 6.018, Minibatch_accuracy: 0.012
Epoch: 0, Step:     4, Minibatch_loss: 5.948, Minibatch_accuracy: 0.020
Epoch: 0, Step:     5, Minibatch_loss: 5.851, Minibatch_accuracy: 0.023
Epoch: 0, Step:     6, Minibatch_loss: 5.735, Minibatch_accuracy: 0.031
Epoch: 0, Step:     7, Minibatch_loss: 5.723, Minibatch_accuracy: 0.016
Epoch: 0, Step:     8, Minibatch_loss: 5.462, Minibatch_accuracy: 0.035
Epoch: 0, Step:     9, Minibatch_loss: 5.602, Minibatch_accuracy: 0.016
Epoch: 0, Step:    10, Minibatch_loss: 5.274, Minibatch_accuracy: 0.051
Epoch: 0, Step:    11, Minibatch_loss: 5.389, Minibatch_accuracy: 0.059
Epoch: 0, Step:    12, Minibatch_loss: 5.325, Minibatch_accuracy: 0.043
Epoch: 0, Step:    13, Minibatch_loss: 5.143, Minibatch_accuracy: 0.066
Epoch: 0, Step:    14, Minibatch_loss: 5.027, Minibatch_accuracy

Process Process-24:
Process Process-23:
Process Process-22:
Process Process-21:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/zhmiao/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/zhmiao/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/zhmiao/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/zhmiao/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/zhmiao/miniconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/home/zhmiao/miniconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
Traceback (most recent call last):
  File "/home/zhmiao/mini

RuntimeError: DataLoader worker (pid 18865) exited unexpectedly with exit code 1.

In [55]:
print('Epoch: %d, Step: %5d, Minibatch_loss: %.3f, Minibatch_accuracy: %.3f'
      % (1, 1, 1, 1))

Epoch: 1, Step:     1, Minibatch_loss: 1.000, Minibatch_accuracy: 1.000


In [50]:
chp = torch.load('./log/model_plain_10_checkpoint.pth.tar')

In [52]:
optimizer.load_state_dict(chp['optimizer'])