In [8]:
import os
import numpy as np
import time
import matplotlib.pyplot as plt
import pandas as pd
from skimage import io, transform
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torchvision

from PIL import Image

import PIL


# Load data

In [9]:
class imagDataset(Dataset):
    """
    Load the image , data set
    """

    def __init__(self, csv_file, root_dir, transform = None):
        self.label_list = pd.read_csv(csv_file)
        self.root_dir   = root_dir
        self.transform  = transform

    def __len__(self):
        return len(self.label_list)

    def __getitem__(self, item):
        if( torch.is_tensor(item)):
            item = item.tolist()

        img_name = os.path.join( self.root_dir, self.label_list.values[item,0])
        image = Image.open(img_name)
        image = image.convert('RGB')
        label = self.label_list.values[item,1]
        sample = {'image':image, 'label':label}

        if(self.transform):
            image = self.transform(image)
            sample = {'image':image, 'label':label}
    
        return sample

In [10]:

def get_data_set(path, fig_size, batch_size):
    """
    path: path to the directory of data 
    fig_size: resize the figure to this size
    bath_size: size of batch 
    return dataloader for train and val
    """

  
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    
    # transformation for train and val data set 
    trans = transforms.Compose([
                transforms.RandomRotation(degrees = 90),
                transforms.RandomResizedCrop(fig_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])
    trans_val = transforms.Compose([
                transforms.Resize(300),
                transforms.CenterCrop(fig_size),
                transforms.ToTensor(),
                normalize,
            ])
    csv_path = os.path.join(path , "train_labels.csv")
    img_path = os.path.join(path , "train_set")
    train_data = imagDataset(csv_path, img_path, transform=trans)

    csv_path = os.path.join(path , "val_labels.csv")
    img_path = os.path.join(path , "val_set")
    val_data = imagDataset(csv_path, img_path, transform=trans_val)
   

    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
    val_dataloader   = DataLoader(val_data,   batch_size=batch_size, shuffle=True, num_workers=4)

    return [ train_dataloader ,val_dataloader ]


# Check accuracy

In [11]:


class AverageMeter(object):
    """Computes and stores the average and current value"""
    """ From pytorch example files """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

        
def check_accuracy( dataloader, num_of_batch,  model, device):
    """

    :param dataloader:   data loader for val se
    :param num_of_batch: num of batch used to check accu
    :param model: 
    :param device: cpu or gpu 
    :return: top1 and top3 accuracy 
    """
    


    num_correct   =  0 #top1 
    num_correct_3 = 0  #top3
    num_samples   = 0
    model.eval()  # set model to evaluation mode
    
    
    with torch.no_grad():
        for i_batch, sample_batched in enumerate(dataloader) :
            if( i_batch == num_of_batch):
                break
            x = sample_batched['image'].to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = sample_batched['label'].to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1) 
            _,preds=scores.topk(3,1) # top three 
            num_correct += (preds[:,0] == y).sum()  # top 1 
            num_correct_3 += (preds[:,0] == y).sum() + (preds[:,1] == y).sum() + (preds[:,2] == y).sum()  # top 3
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc  = float(num_correct) / num_samples
        acc3 = float(num_correct_3)/num_samples
        print('Got %d / %d correct (%.2f), top3 correct(%.2f)' % (num_correct, num_samples, 100 * acc, 100 * acc3) )
        return [acc, acc3]

# Label smoothing loss

In [12]:
# lable smoothing method to compute loss, code from Internet 
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

# Chain model and pretrained model

In [None]:
def initilize_pretrained_model(model_name, num_classes): # get pretrianed model 
    if( model_name == 'res50'):
        model = torchvision.models.resnet50(pretrained=True)
        model.fc   = nn.Linear(in_feature, num_classes) # modify last layer 
    elif( ( model_name == 'res101') ):
        model = torchvision.models.resnet101(pretrained=True)
        model.fc   = nn.Linear(in_feature, num_classes) # modify last layer )
    elif( model_name == 'resnext'):
        model = resnext101_32x8d(pretrained= True)
        model.fc   = nn.Linear(in_feature, num_classes) # modify last layer )
    else:
        print("can't find model", model_name)
    return model


class chain_model(nn.Module):  # get combination of pre_train model and three layer FNN
    def __init__(self, pre_trained, num_classes):
        super(chain_model, self).__init__()
        self.pre_trained = pre_trained # pre trained model 
        
        for param in self.pre_trained.parameters():
            param.requires_grad = False # don't do gradient descent for bottom part at the beginning
                    
        in_feature = self.pre_trained.fc.in_features
        
        feature_1 = 2000
        feature_2 = 2000
        feature_3 = num_classes
        
        self.pre_trained.fc = nn.Linear(in_feature, feature_1)
        self.rl1 = nn.LeakyReLU()
        self.fc1 = nn.Linear(feature_1, feature_2)
        self.rl2 = nn.LeakyReLU()
        self.fc2 = nn.Linear(feature_2, feature_3)
    
        
    
    def forward(self, x):
        x = self.pre_trained(x)
        x = self.rl1(x)
        x = self.fc1(x)
        x = self.rl2(x)
        x = self.fc2(x)
        
        return x 
        
        
        

def initilize_pretrained_chain_model(model_name, num_classes): # initalize chianned model
    # pretrained model + 3 fc layer 
    if( model_name == 'res50'):
        pre_trained = torchvision.models.resnet50(pretrained=True)
    elif( ( model_name == 'res101') ):
        pre_trained = torchvision.models.resnet101(pretrained=True)
    elif( ( model_name == 'resnext') ):
        pre_trained = torchvision.models.resnext101_32x8d(pretrained=True)
    else:
        print("can't find model", model_name)
        
    model = chain_model(pre_trained, num_classes)
    return model


def initilize_optimizer(model, lr , reg, if_train_full_model): # initalize corresponding optimizer 
    """
    if_train_full_model = True.  Optimize all the layers
    if_train_full_model = False. Optimize all the layers with requires_grad = True
    
    """
    
    parameter = model.parameters()

    
    if( not if_train_full_model ): # only train para w/ requires_grad == True 
        params_to_update = []
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
        optimizer = optim.SGD(params_to_update, lr=lr,nesterov =True, momentum=0.9, weight_decay = reg)
        return optimizer 
    
    # train full model set all requires_grad true 
    for name,param in model.named_parameters():
        param.requires_grad = True
    
    params_to_update = []
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
                
    optimizer = optim.SGD(params_to_update, lr=lr,nesterov =True, momentum=0.9, weight_decay = reg)


    return optimizer 
    
        

# Train

In [13]:
# Main training 
def train(save_path, data, model, optim_, epochs, i_save, num_of_batch_for_val, device,if_search_para = False):
    """
    # train the model 
    :param save_path:  save result to this path 
    :param data: [train_data, val_data], data loader 
    :param model:
    :param optim: [optimizer, scheduler_warmup]
    :param epochs: list that denot range of epoch 
    :param i_save: save reuslt every i_save iter
    :param num_of_batch_for_val:  num of batches used for validation
    :param device:
    :return:
    """
    
    dtype = torch.float
    
    # load data, model, optimizer 
    train_data, val_data = data
    model = model.to(device=device) 
    optimizer, scheduler_warmup = optim_ 
    
    
         
    loss_list = []   # save loss 
    val_list = []    # save val 
    val3_list = []   # save val top 3
    batch_time = AverageMeter() # calcualte average time 
    losses = AverageMeter()     # calcualte average loss
    
    loss_layer = LabelSmoothingLoss(251, 0.1) # use lable smoothing to compute loss 

    end = time.time()
    for e in epochs:
        print("epoch %d"%e)
        for param_group in optimizer.param_groups:
            print("reg = ", param_group['weight_decay'])
            print("lr = ", param_group['lr'])
        
        scheduler_warmup.step()
            
        for i_batch, sample_batched in enumerate(train_data):
                
            model.train()  # put model to training mode

            x = sample_batched['image'].to(device=device, dtype=dtype)  
            y = sample_batched['label'].to(device=device, dtype=torch.long)
            scores = model(x)
            loss = loss_layer(scores, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            losses.update(loss.item(), len( sample_batched ) )

            batch_time.update( time.time() - end, len(sample_batched) )
            end = time.time()
            
            
            
            if(i_batch % 5 == 0): # print every five epochs 
                print('Iteration %d, loss = %.4f, avg loss = %.4f, avg time = %.4f' % (i_batch, loss.item(), losses.avg, batch_time.avg))
                
                loss_list.append( [e, i_batch, loss.item()] ) # save 
                
                
                
            if (i_batch % i_save == 0 and i_batch != 0 ) : # check accuracy and save the result  
                [val_acc,val3_acc] = check_accuracy(val_data, num_of_batch=num_of_batch_for_val, model=model, device=device)

                val_list.append( [e,i_batch, val_acc] ) # save 
                val3_list.append( [e,i_batch, val3_acc])
                
                if( if_finetune_lr):
                    return (losses.avg, val_acc, val3_acc)

                check_point = {'model': model, 'optim': (optimizer, scheduler_warmup), 'val_acc':val_acc }
                torch.save( check_point,os.path.join(save_path, "model_%d"%e) )
            
        print("epoch %d"%e)
        for param_group in optimizer.param_groups: # print the parameters 
            print("reg = ", param_group['weight_decay'])
            print("lr = ", param_group['lr'])
        
        # save the result for this epoch
        [val_acc, val3_acc] = check_accuracy(val_data, num_of_batch=num_of_batch_for_val, model=model, device=device)
        check_point = {'model': model, 'optim': (optimizer, scheduler_warmup), 'val_acc':val_acc }
        torch.save( check_point,os.path.join(save_path, "model_%d"%e) )
        
        val_list.append( [e,i_batch, val_acc] ) 
        val3_list.append( [e,i_batch, val3_acc] ) 
        
        df = pd.DataFrame(np.array(loss_list),columns=['epoch', 'batch', 'loss'])
        df.to_csv(os.path.join(save_path, "loss_%d.csv"%e), index=False)
                  
        df = pd.DataFrame(np.array(val_list),columns=['epoch', 'batch', 'val'])
        df.to_csv(os.path.join(save_path, "val_%d.csv"%e), index=False)
        
        df = pd.DataFrame(np.array(val3_list),columns=['epoch', 'batch', 'val'])
        df.to_csv(os.path.join(save_path, "val3_%d.csv"%e), index=False)


In [14]:
# load previous model 

def load_model(path):
    print(path)
    if( not os.path.isfile(path)):
        print(" model doesn't exist")
        return None
    
    print(path)
    check_point = torch.load(path)

    model      = check_point['model']
    optim_      = check_point['optim']
    val_acc    = check_point['val_acc']
    print("load model")
    print(" val_acc = %.4f"%( val_acc))

    return (model, optim_)


# Chain model

# Main

In [None]:
# read model 

USE_GPU = True
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('using device:', device)

fig_size = 224
batch_size = 16


root_path = os.path.join( os.getcwd(), 'data' )

[ train_data, val_data ] = get_data_set(root_path, fig_size, batch_size)


In [18]:
learning_rate = 1e-2 # initatial learning rate 
reg = 0.0
if_train_full_model = False # set all requires_grad true 

load_ = load_model( "../working/resnet/model_7")  # load model 
if( load_ is None): # if can't load, initialize model 
    model = initilize_pretrained_chain_model('res101', 251)
    optimizer = initilize_optimizer(model, learning_rate, reg, if_train_full_model)
    scheduler_warmup = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) #step decay lr 
    optim_ = [optimizer, scheduler_warmup]

    
data = [train_data, val_data]

save_path = "../working/resnet" # path to save result    
    
epochs = range(8,10)  # training epochs 
optimizer.zero_grad()
optimizer.step()

# main training procedure 
train(save_path, data, model, optim_, epochs, i_save = 1, num_of_batch_for_val = 1, device = device)
    

../working/resnet/model_7
 model doesn't exist


Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /Users/huhaoyu/.cache/torch/checkpoints/resnet101-5d3b4d8f.pth


HBox(children=(FloatProgress(value=0.0, max=178728960.0), HTML(value='')))


epoch 8
reg =  0.0
lr =  0.01
Iteration 0, loss = 5.5247, avg loss = 5.5247, avg time = 7.0974
Iteration 5, loss = 5.5164, avg loss = 5.5239, avg time = 6.0057
Iteration 10, loss = 5.5200, avg loss = 5.5171, avg time = 5.8814
Iteration 15, loss = 5.4959, avg loss = 5.5224, avg time = 5.9027
Iteration 20, loss = 5.4920, avg loss = 5.5258, avg time = 6.1011
Iteration 25, loss = 5.5688, avg loss = 5.5246, avg time = 6.0378
Iteration 30, loss = 5.5117, avg loss = 5.5283, avg time = 6.0192


Traceback (most recent call last):
  File "/anaconda3/envs/py3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/anaconda3/envs/py3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/anaconda3/envs/py3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/anaconda3/envs/py3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 