In [None]:
'''

Data Split
Use train_dataset and eval_dataset as train / test sets

'''
from torchvision.datasets import EMNIST
from torch.utils.data import ConcatDataset, Subset
from torchvision.transforms import ToTensor, Compose
import numpy as np
    
# For convenience, show image at index in dataset
def show_image(dataset, index):
    import matplotlib.pyplot as plt
    plt.imshow(dataset[index][0][0], cmap=plt.get_cmap('gray'))

def get_datasets(split='balanced', save=False):
    download_folder = './data'
  
    transform = Compose([ToTensor()])

    dataset = ConcatDataset([EMNIST(root=download_folder, split=split, download=True, train=False, transform=transform),
                           EMNIST(root=download_folder, split=split, download=True, train=True, transform=transform)])
    
  # Ignore the code below with argument 'save'
    if save:
        random_seed = 4211 # do not change
        n_samples = len(dataset)
        eval_size = 0.2
        indices = list(range(n_samples))
        split = int(np.floor(eval_size * n_samples))

        np.random.seed(random_seed)
        np.random.shuffle(indices)

        train_indices, eval_indices = indices[split:], indices[:split]

        # cut to half
        train_indices = train_indices[:len(train_indices)//2]
        eval_indices = eval_indices[:len(eval_indices)//2]

        np.savez('train_test_split.npz', train=train_indices, test=eval_indices)
  
  # just use save=False for students
  # load train test split indices
    else:
        with np.load('./train_test_split.npz') as f:
            train_indices = f['train']
            eval_indices = f['test']

    train_dataset = Subset(dataset, indices=train_indices)
    eval_dataset = Subset(dataset, indices=eval_indices)
  
    return train_dataset, eval_dataset

# TODO
# 1. build your own CNN classifier with the given structure. DO NOT COPY OR USE ANY TRICK
# 2. load pretrained encoder from 'pretrained_encoder.pt' and build a CNN classifier on top of the encoder
# 3. load pretrained encoder from 'pretrained_encoder.pt' and build a Convolutional Autoencoder on top of the encoder (just need to implement decoder)
# *** Note that all the above tasks include implementation, training, analyzing, and reporting

# example main code
# each img has size (1, 28, 28) and each label is in {0, ..., 46}, a total of 47 classes
#if __name__=='__main__':
  #train_ds, eval_ds = get_datasets()
  
  #img_index = 10
  #show_image(train_ds, img_index)
  #show_image(eval_ds, img_index)

In [None]:
#helper function to print mean and S.D of optimal loss and top 1 accuracy and top 3 accuracy
from statistics import *
def print_stat(optimal_loss,optimal_acc1,optimal_acc3):
    stacked = torch.stack(optimal_loss,dim=0)
    print('mean of optimal loss: ', stacked.mean().item())
    print('SD of optimal loss: ', stacked.std().item())
    print('mean of optimal top1 accuracy: ', mean(optimal_acc1))
    print('SD of optimal top1 accuracy: ', pstdev(optimal_acc1))
    print('mean of optimal top3 accuracy: ', mean(optimal_acc3))
    print('SD of optimal top3 accuracy: ', pstdev(optimal_acc3))

In [None]:
from torchvision.utils import make_grid
def train(model, loaders, optimizer, n_epochs, hyperpara,writer=None):
    def run_epoch(train_or_eval):
        epoch_loss = 0.
        epoch_acc1 = 0.
        epoch_acc3 = 0.
        for i, (images, labels) in enumerate(loaders[train_or_eval], 1):
            if train_or_eval == 'train':
                optimizer.zero_grad()
            logits = model(images)
            batch_loss = model.loss(logits, labels)
            batch_acc1 = model.top1_accuracy(logits, labels)
            batch_acc3 = model.top3_accuracy(logits,labels)
            if train_or_eval == 'eval':
                loss_list.append(batch_loss)
                top1_list.append(batch_acc1)
                top3_list.append(batch_acc3)
            epoch_loss += batch_loss.item()
            epoch_acc1 += batch_acc1
            epoch_acc3 += batch_acc3
            if train_or_eval == 'train':
                batch_loss.backward()
                optimizer.step()
                total_step = len(loaders['train'])
                if (i + 1) % 100 == 0 or (i+1) == total_step or (i+1) == total_step:
                    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Top 1 Accuracy: {:.2f}%, Top 3 Accuracy: {:.2f}%'
                          .format(epoch + 1, n_epochs, i + 1, total_step, batch_loss,batch_acc1,batch_acc3))
#             if writer is not None:
#                 if len(images.size()) == 2: # when it is flattened, reshape it
#                     images = images.view(-1, 1, 28, 28)
#                 img_grid = make_grid(images)
#                 writer.add_image('%s/images_%s' % (model.__class__.__name__,epoch), img_grid, epoch)
        epoch_loss /= i
        epoch_acc1 /= i
        epoch_acc3 /= i
        losses[train_or_eval] = epoch_loss
        accs1[train_or_eval] = epoch_acc1
        accs3[train_or_eval] = epoch_acc3
        if writer is None:
            print('epoch {} [{}] Loss: {:.4f} Top 1 Accuracy: {:.2f}% Top 3 Accuracy: {:.2f}%'.format(epoch, train_or_eval, epoch_loss, epoch_acc1, epoch_acc3))
        elif train_or_eval == 'eval':
            writer.add_scalars('%s_loss_%s_%s' % (model.__class__.__name__,hyperpara['opt'],hyperpara['lr']), # CnnClassifier or FcClassifier
                               {'train': losses['train'], 
                                'eval': losses['eval']}, 
                              epoch)
            writer.add_scalars('%s_top1_accuracy_%s_%s' % (model.__class__.__name__,hyperpara['opt'],hyperpara['lr']), # CnnClassifier or FcClassifier
                         {'train': accs1['train'], 
                          'eval': accs1['eval']}, 
                          epoch)
            writer.add_scalars('%s_top3_accuracy_%s_%s' % (model.__class__.__name__,hyperpara['opt'],hyperpara['lr']), # CnnClassifier or FcClassifier
                         {'train': accs3['train'], 
                          'eval': accs3['eval']}, 
                          epoch)
    # end of run_epoch
    losses = dict()
    accs1 = dict()
    accs3 = dict()
    loss_list = []
    top1_list = []
    top3_list = []
    for epoch in range(n_epochs):
        run_epoch('train')
        run_epoch('eval')
    return loss_list,top1_list,top3_list

In [21]:
from torchvision.utils import make_grid
def train_cae(model, loaders, optimizer, n_epochs, hyperpara,writer=None):
    def run_epoch(train_or_eval):
        epoch_loss = 0.
        for i, (images, labels) in enumerate(loaders[train_or_eval], 1):
            if train_or_eval == 'train':
                optimizer.zero_grad()
            logits = model(images)
            batch_loss = model.loss(logits, images)
            if train_or_eval == 'eval':
                loss_list.append(batch_loss)
            epoch_loss += batch_loss.item()
            if train_or_eval == 'train':
                batch_loss.backward()
                optimizer.step()
                total_step = len(loaders['train'])
                if (i + 1) % 100 == 0 or (i+1) == total_step or (i+1) == total_step:
                    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                          .format(epoch + 1, n_epochs, i + 1, total_step, batch_loss))
            if writer is not None:
                if len(images.size()) == 2: # when it is flattened, reshape it
                    images = images.view(-1, 1, 28, 28)
                #img_grid = make_grid(images)
                img_grid = make_grid(logits) # reconstructed image
                writer.add_image('%s/images_%s_%s_%s' % (model.__class__.__name__,hyperpara['opt'],hyperpara['lr'],epoch), img_grid, epoch)
        epoch_loss /= i
        losses[train_or_eval] = epoch_loss
        print('epoch {} [{}] Loss: {:.4f} '.format(epoch, train_or_eval, epoch_loss))
        if writer is not None and train_or_eval == 'eval':
            writer.add_scalars('%s_loss_%s_%s' % (model.__class__.__name__,hyperpara['opt'],hyperpara['lr']), # CnnClassifier or FcClassifier
                               {'train': losses['train'], 
                                'eval': losses['eval']}, 
                              epoch)
    # end of run_epoch
    losses = dict()
    loss_list = []
    for epoch in range(n_epochs):
        run_epoch('train')
        run_epoch('eval')
    return loss_list

In [None]:
#partition training data randomly into 80% training, 20% validation
from torch.optim import Adam, SGD
def holdout(model_name,dataset,hyperpara_list,writer=None):
    train_ds,validation_ds = torch.utils.data.random_split(dataset,[int(0.8*len(dataset)),int(0.2*len(dataset))])
    dataloaders = {
        'train': DataLoader(dataset=train_ds, batch_size=32, shuffle=True),
        'eval': DataLoader(dataset=validation_ds, batch_size=32, shuffle=False) 
    }
    # train all the models
    loss_list = []
    top1_acc_list = []
#     top3_acc_list = []
    for i in range(0,len(hyperpara_list)):
        if model_name == 'cnn':
            model = CnnClassifier(hyperpara_list[i]['H'])
        elif model_name == 'cnn_pretrained':
            model = FcClassifier(hyperpara_list[i]['H'])
        if (hyperpara_list[i]['opt'] == 'ADAM'):
            optimizer = Adam(model.parameters(), lr=hyperpara_list[i]['lr'])
        else:
            optimizer = SGD(model.parameters(), lr=hyperpara_list[i]['lr'])
        loss,top1_acc,top3_acc = train(model,dataloaders,optimizer,3,hyperpara_list[i],writer)
        loss_list.append(loss)
        top1_acc_list.append(top1_acc)
#         top3_acc_list.append(top3_acc)

    # print the optimal results
    optimal_acc1 = [] # top 1 acc for criteria
#     optimal_acc3 = []
    for i in range(0,len(hyperpara_list)):
        print('optimal loss for hyperparameter setting {}: {}'.format(i+1,min(loss_list[i])))
        print('optimal top1 accuracy for hyperparameter setting {}: {}%'.format(i+1,max(top1_acc_list[i])))
#         optimal_loss.append(min(loss_list[i]))
        optimal_acc1.append(max(top1_acc_list[i]))
#         optimal_acc3.append(max(top3_acc_list[i]))
    # choose the best accuracy model
    max_index = optimal_acc1.index(max(optimal_acc1))
    return hyperpara_list[max_index]

In [None]:
import torch
import torch.nn as nn
class CnnClassifier(nn.Module):
  # n_hidden: number of units at the last fc layer
    def __init__(self,n_hidden):
        super(CnnClassifier, self).__init__()
        #CNN encoder
        self.cnn_layers = nn.Sequential(
          nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0),
          nn.ReLU(),
          nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=2, padding=0),
          nn.ReLU(),
          nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=0),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=3,stride=1,padding=0),
          nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=0),
          nn.Sigmoid()
        )
        # CNN predictor
        # linear layers transforms flattened image features into logits before the softmax layer
        self.linear = nn.Sequential(
          nn.Linear(32, n_hidden),
          nn.ReLU(),
          nn.Linear(n_hidden, 47) 
        )
        self.softmax = nn.Softmax(dim=1)
        self.loss_function = nn.CrossEntropyLoss(reduction='sum') 
        
    def forward(self, in_data):
        img_features = self.cnn_layers(in_data).view(in_data.size(0), -1) 
        logits = self.linear(img_features)
        return logits  
    
    def loss(self, logits, labels):
        #preds = self.softmax(logits) 
        return self.loss_function(logits, labels) / logits.size(0) 
    
    def top1_accuracy(self, logits, labels):
        total = labels.size(0)
        _, predicted = torch.max(logits.data, 1)
        correct = (predicted == labels).sum().item()
        return (correct / total) * 100
    
    def top3_accuracy(self, logits, labels):
        total = labels.size(0)
        _, pred = torch.topk(logits,3,dim=1)
        stack_labels = torch.stack([labels,labels,labels],1)
        correct_tensor = pred.eq(stack_labels)
        correct = torch.sum(correct_tensor).item()
        return (correct / total) * 100

In [None]:
class FcClassifier(nn.Module):
  # n_hidden: number of units at the last fc layer
    def __init__(self,n_hidden):
        super(FcClassifier, self).__init__()
        
        saved = torch.load('pretrained_encoder.pt')
        cnn_layers = saved['model'][0]
        self.cnn_layers = cnn_layers
        
        # CNN predictor
        # linear layers transforms flattened image features into logits before the softmax layer
        self.linear = nn.Sequential(
          nn.Linear(32, n_hidden),
          nn.ReLU(),
          nn.Linear(n_hidden, 47) 
        )
        self.softmax = nn.Softmax(dim=1)
        self.loss_function = nn.CrossEntropyLoss(reduction='sum') 
        
    def forward(self, in_data):
        img_features = self.cnn_layers(in_data).view(in_data.size(0), -1) 
        logits = self.linear(img_features)
        return logits  
    
    def loss(self, logits, labels):
        #preds = self.softmax(logits) 
        return self.loss_function(logits, labels) / logits.size(0) 
    
    def top1_accuracy(self, logits, labels):
        total = labels.size(0)
        _, predicted = torch.max(logits.data, 1)
        correct = (predicted == labels).sum().item()
        return (correct / total) * 100
    
    def top3_accuracy(self, logits, labels):
        total = labels.size(0)
        _, pred = torch.topk(logits,3,dim=1)
        stack_labels = torch.stack([labels,labels,labels],1)
        correct_tensor = pred.eq(stack_labels)
        correct = torch.sum(correct_tensor).item()
        return (correct / total) * 100

In [None]:
# CAE decoder
class Decoder(nn.Module):
  # n_hidden: number of units at the last fc layer
    def __init__(self):
        super(Decoder, self).__init__()
        
        saved = torch.load('pretrained_encoder.pt')
        cnn_layers = saved['model'][0]
        self.cnn_layers = cnn_layers
        
        self.decoder_layers = nn.Sequential(
          nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=3, stride=1, padding=0),
          nn.ReLU(),
          nn.ConvTranspose2d(in_channels=16, out_channels=8, kernel_size=3, stride=1, padding=0),
          nn.ReLU(),
          nn.ConvTranspose2d(in_channels=8, out_channels=8, kernel_size=3, stride=2, padding=0),
          nn.ReLU(),
          nn.ConvTranspose2d(in_channels=8, out_channels=4, kernel_size=3, stride=1, padding=0),
          nn.ReLU(),
          nn.ConvTranspose2d(in_channels=4, out_channels=1, kernel_size=4, stride=2, padding=0),
          nn.Sigmoid()
        )
        self.loss_function = nn.MSELoss(reduction='sum')
        
    def forward(self, in_data):
        img_features = self.cnn_layers(in_data)
        #print('img',img_features.size())
        logits = self.decoder_layers(img_features)
        #print('logits',logits.size())
        return logits  
    
    def loss(self, logits, images):
        #preds = self.softmax(logits) 
        return self.loss_function(logits, images) / logits.size(0) 

In [None]:
def holdout_cae(dataset,hyperpara_list,writer=None):
    train_ds,validation_ds = torch.utils.data.random_split(dataset,[int(0.8*len(dataset)),int(0.2*len(dataset))])
    dataloaders = {
        'train': DataLoader(dataset=train_ds, batch_size=32, shuffle=True),
        'eval': DataLoader(dataset=validation_ds, batch_size=32, shuffle=False) 
    }
    # train all the models
    loss_list = []
    for i in range(0,len(hyperpara_list)):
        model = Decoder()
        if (hyperpara_list[i]['opt'] == 'ADAM'):
            optimizer = Adam(model.parameters(), lr=hyperpara_list[i]['lr'])
        else:
            optimizer = SGD(model.parameters(), lr=hyperpara_list[i]['lr'])
        loss = train_cae(model,dataloaders,optimizer,3,hyperpara_list[i],writer)
        loss_list.append(loss)


    # print the optimal results
    optimal_loss = [] # top 1 acc for criteria
    for i in range(0,len(hyperpara_list)):
        print('optimal loss for hyperparameter setting {}: {}'.format(i+1,min(loss_list[i])))
        optimal_loss.append(min(loss_list[i]))
    # choose the best accuracy model
    min_index = optimal_loss.index(min(optimal_loss))
    return hyperpara_list[min_index]

In [None]:
#main function
from tensorboardX import SummaryWriter
from torch.utils.data import DataLoader
import time
#from torch.optim import Adam, SGD
train_ds, test_ds = get_datasets()
dataloaders = {
    'train': DataLoader(dataset=train_ds, batch_size=32, shuffle=True),
    'eval': DataLoader(dataset=test_ds, batch_size=32, shuffle=False)
}
writer = SummaryWriter('log')
print('=========running holdout validation for cnn============')
cnn_hyper_list = [{'H' : 32, 'opt' : 'ADAM', 'lr' : 0.001},
                  {'H' : 32, 'opt' : 'SGD', 'lr' : 0.1},
                  {'H' : 32, 'opt' : 'SGD', 'lr' : 0.01},
                  {'H' : 64, 'opt' : 'ADAM', 'lr' : 0.001},
                  {'H' : 64, 'opt' : 'SGD', 'lr' : 0.1},
                  {'H' : 64, 'opt' : 'SGD', 'lr' : 0.01}    
                  ]
cnn_best_hyperpara = holdout('cnn',train_ds,cnn_hyper_list)
print('cnn hyperparameter chosen: ',cnn_best_hyperpara)
print('=========finish holdout validation==============')
# #After validation, learn from scratch using the best parameter
print('============training model from scratch with optimal hyperparameters===========')
loss_list = []
top1_acc_list = []
top3_acc_list = []
optimal_loss = []
optimal_acc1 = []
optimal_acc3 = []
best_model = CnnClassifier(cnn_best_hyperpara['H'])
if (cnn_best_hyperpara['opt'] == 'ADAM'):
    optimizer = Adam(best_model.parameters(), lr=cnn_best_hyperpara['lr'])
else:
    optimizer = SGD(best_model.parameters(), lr=cnn_best_hyperpara['lr'])
# train the model 5 times
t0 = time.time()
for i in range(5):  
    loss,top1_acc,top3_acc = train(best_model,dataloaders, optimizer, 3, cnn_best_hyperpara,writer)
    loss_list.append(loss)
    top1_acc_list.append(top1_acc)
    top3_acc_list.append(top3_acc)
    optimal_loss.append(min(loss_list[i]))
    optimal_acc1.append(max(top1_acc_list[i]))
    optimal_acc3.append(max(top3_acc_list[i]))
print('training time: {} seconds'.format(time.time() - t0))
print_stat(optimal_loss,optimal_acc1,optimal_acc3)
print('=============finish training=======================')

In [None]:
print('=============learning from pre-trained Encoder==================')
cnn_best_hyperpara = holdout('cnn_pretrained',train_ds,cnn_hyper_list)
print('cnn hyperparameter chosen: ',cnn_best_hyperpara)
print('=========finish holdout validation==============')
print('============training model from scratch with optimal hyperparameters===========')
loss_list = []
top1_acc_list = []
top3_acc_list = []
optimal_loss = []
optimal_acc1 = []
optimal_acc3 = []
best_model = FcClassifier(cnn_best_hyperpara['H'])
if (cnn_best_hyperpara['opt'] == 'ADAM'):
    optimizer = Adam(best_model.parameters(), lr=cnn_best_hyperpara['lr'])
else:
    optimizer = SGD(best_model.parameters(), lr=cnn_best_hyperpara['lr'])
# train the model 5 times
t0 = time.time()
for i in range(5):  
    loss,top1_acc,top3_acc = train(best_model,dataloaders, optimizer, 3, cnn_best_hyperpara,writer)
    loss_list.append(loss)
    top1_acc_list.append(top1_acc)
    top3_acc_list.append(top3_acc)
    optimal_loss.append(min(loss_list[i]))
    optimal_acc1.append(max(top1_acc_list[i]))
    optimal_acc3.append(max(top3_acc_list[i]))
print('training time: {} seconds'.format(time.time() - t0))
print_stat(optimal_loss,optimal_acc1,optimal_acc3)
print('==============finish training====================')

In [None]:
print('=========running holdout validation for cae============')
cae_hyper_list = [{'opt' : 'ADAM', 'lr' : 0.001},
                  {'opt' : 'SGD', 'lr' : 0.1},
                  {'opt' : 'SGD', 'lr' : 0.01}
                  ]
cae_best_hyperpara = holdout_cae(train_ds,cae_hyper_list,writer) #reconstruct images
print('cae hyperparameter chosen: ',cae_best_hyperpara)
print('===========finish holdout validation=====================')
print('============training model from scratch with optimal hyperparameters===========')
loss_list = []
optimal_loss = []
best_cae = Decoder()
if (cae_best_hyperpara['opt'] == 'ADAM'):
    optimizer = Adam(best_cae.parameters(), lr=cae_best_hyperpara['lr'])
else:
    optimizer = SGD(best_cae.parameters(), lr=cae_best_hyperpara['lr'])
optimizer = SGD(best_cae.parameters(), lr=0.01) # delete this
loss_cae = train_cae(best_cae,dataloaders, optimizer, 3, cae_best_hyperpara,writer)
loss_list.append(loss_cae)
optimal_loss.append(min(loss_list))
print('optimal loss: ', min(optimal_loss))
print('=============finish training=======================')

Epoch [1/3], Step [100/1645], Loss: 66.2611
Epoch [1/3], Step [200/1645], Loss: 67.4874
Epoch [1/3], Step [300/1645], Loss: 69.8884
Epoch [1/3], Step [400/1645], Loss: 66.8353
Epoch [1/3], Step [500/1645], Loss: 68.0339
Epoch [1/3], Step [600/1645], Loss: 70.9821
Epoch [1/3], Step [700/1645], Loss: 64.2436
Epoch [1/3], Step [800/1645], Loss: 67.1752
Epoch [1/3], Step [900/1645], Loss: 69.2359
Epoch [1/3], Step [1000/1645], Loss: 68.4411
Epoch [1/3], Step [1100/1645], Loss: 65.3218
Epoch [1/3], Step [1200/1645], Loss: 64.5302
Epoch [1/3], Step [1300/1645], Loss: 59.7491
Epoch [1/3], Step [1400/1645], Loss: 59.6748
Epoch [1/3], Step [1500/1645], Loss: 60.1718
Epoch [1/3], Step [1600/1645], Loss: 46.4357
Epoch [1/3], Step [1645/1645], Loss: 52.3768
epoch 0 [train] Loss: 66.0263 
epoch 0 [eval] Loss: 48.2940 
Epoch [2/3], Step [100/1645], Loss: 48.1434
Epoch [2/3], Step [200/1645], Loss: 45.4923
Epoch [2/3], Step [300/1645], Loss: 39.5051
Epoch [2/3], Step [400/1645], Loss: 46.2952
Epoch [