In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline
import numpy as np
from collections import defaultdict
import collections
import os
import cv2
import pandas as pd
import PIL
from PIL import Image
from shutil import copy
from shutil import copytree, rmtree
from tqdm.notebook import tqdm
import json

In [2]:
# Import necessary PyTorch libraries
import torch
from torch import nn
from torch import optim
import torch.utils.data as data
import torch.nn.functional as F
from torchvision import datasets, transforms, models

In [3]:
batch_size = 64
mini_data_set = False

In [4]:
import pandas as pd
import numpy as np
train_df = pd.read_hdf("../data-frames/train_df.h5")
val_df = pd.read_hdf("../data-frames/val_df.h5")
test_df = pd.read_hdf("../data-frames/test_df.h5")
if mini_data_set:
    train_df = train_df[:128]
    val_df = val_df[:128]
    test_df = test_df[:128]

In [5]:
col_names = list(train_df.columns.values)
ing_names = col_names[:-3]
targets = ing_names

In [6]:
image_transform = transforms.Compose([transforms.Resize((384,384)),
                                       transforms.ToTensor()])

In [7]:
class CustomDataLoader(data.Dataset):
    ''' Data wrapper for pytorch's data loader function '''
    def __init__(self, image_df):
        self.dataset = image_df

    def __getitem__(self, index):
        c_row = self.dataset.iloc[index]
        target_arr = []
        for item in c_row[targets].values:
            target_arr.append(item)
        #print(target_arr)
        image_path, target = c_row['path'], torch.from_numpy(np.array(target_arr)).float()  #image and target
        #read as rgb image, resize and convert to range 0 to 1
        image = cv2.imread(image_path, 1)
        image = PIL.Image.fromarray(image)
        image = image_transform(image) 
        return image, target, c_row['class_id']

    def __len__(self):
        return self.dataset.shape[0] 

In [8]:
augmentation_transforms = transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.75, 1.0)),
        transforms.RandomRotation(degrees=(0,45)),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        transforms.RandomHorizontalFlip(.5),
        transforms.RandomPerspective(distortion_scale=0.25, p=.5),
        transforms.RandomVerticalFlip(.5),
        transforms.RandomGrayscale(.5), 
        transforms.RandomAdjustSharpness(p=0.5,sharpness_factor=0.5),
        transforms.Resize((384,384)),
        transforms.ToTensor()
        ])

In [9]:
class AugmentedDataLoader(data.Dataset):
    ''' Data wrapper for pytorch's data loader function '''
    def __init__(self, image_df):
        self.dataset = image_df

    def __getitem__(self, index):
        c_row = self.dataset.iloc[index]
        target_arr = []
        for item in c_row[targets].values:
            target_arr.append(item)
        #print(target_arr)
        image_path, target = c_row['path'], torch.from_numpy(np.array(target_arr)).float()  #image and target
        #read as rgb image, resize and convert to range 0 to 1
        image = cv2.imread(image_path, 1)
        image = PIL.Image.fromarray(image)
        image = augmentation_transforms(image) 
        return image, target, c_row['class_id']

    def __len__(self):
        return self.dataset.shape[0] 

In [10]:
train_dataset = CustomDataLoader(train_df)
# train_loader = torch.utils.data.DataLoader(train_dataset,shuffle=True, batch_size=batch_size, pin_memory=True)
# create and then concatenate the augmented dataset
augmented_train_dataset = AugmentedDataLoader(train_df)
concat_train_dataset = data.ConcatDataset(datasets=[train_dataset, augmented_train_dataset])
train_loader = torch.utils.data.DataLoader(concat_train_dataset,shuffle=True, batch_size=batch_size, 
                                           pin_memory=True)



val_dataset = CustomDataLoader(val_df)
val_loader = torch.utils.data.DataLoader(val_dataset,shuffle=True, batch_size=batch_size, pin_memory=True)

test_dataset = CustomDataLoader(test_df)
test_loader = torch.utils.data.DataLoader(test_dataset,shuffle=True, batch_size=batch_size, pin_memory=True)

In [11]:
concat_train_dataset

<torch.utils.data.dataset.ConcatDataset at 0x2143c9bcf70>

In [12]:
# Analyzing the shape of one batch
train_images, train_labels, class_ids = next(iter(train_loader))
test_images, test_labels, class_ids = next(iter(val_loader))
print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
print(len(class_ids))

torch.Size([64, 3, 384, 384])
torch.Size([64, 227])
torch.Size([64, 3, 384, 384])
torch.Size([64, 227])
128
128
128
64


In [51]:
# Load the Resnet-50 model pretraned on ImageNet 
import warnings
warnings.filterwarnings("ignore", category=UserWarning) 
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(targets))

In [52]:
ct = 0
for name, child in model.named_children():
    ct += 1
    if ct < 8:
        for name2, params in child.named_parameters():
            params.requires_grad = False

In [53]:
def check_freeze(model):
    for name ,layer in model._modules.items():
        s = []
        for l in layer.parameters():
          s.append(l.requires_grad)
        print(name ,all(s))
check_freeze(model)

conv1 False
bn1 False
relu True
maxpool True
layer1 False
layer2 False
layer3 False
layer4 True
avgpool True
fc True


In [54]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device="cpu"
print(device)

cuda:0


In [55]:
# move the model to device
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [56]:
from torchsummary import summary
model.cuda()
summary(model, (3, 384, 384), batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 64, 192, 192]           9,408
       BatchNorm2d-2         [64, 64, 192, 192]             128
              ReLU-3         [64, 64, 192, 192]               0
         MaxPool2d-4           [64, 64, 96, 96]               0
            Conv2d-5           [64, 64, 96, 96]           4,096
       BatchNorm2d-6           [64, 64, 96, 96]             128
              ReLU-7           [64, 64, 96, 96]               0
            Conv2d-8           [64, 64, 96, 96]          36,864
       BatchNorm2d-9           [64, 64, 96, 96]             128
             ReLU-10           [64, 64, 96, 96]               0
           Conv2d-11          [64, 256, 96, 96]          16,384
      BatchNorm2d-12          [64, 256, 96, 96]             512
           Conv2d-13          [64, 256, 96, 96]          16,384
      BatchNorm2d-14          [64, 256,

  total_output += np.prod(summary[layer]["output_shape"])


In [57]:
from torch.optim import lr_scheduler
# specify loss function (categorical cross-entropy)
criterion = nn.BCEWithLogitsLoss()

# specify optimizer (stochastic gradient descent with momentum)
# optimizer = optim.SGD(model.fc.parameters(), lr=0.01, momentum=0.9)
#optimizer = optim.Adam(model.fc.parameters(), lr=0.001, betas=[0.9, 0.999])  
optimizer = torch.optim.Adam(model.parameters())

# specify learning rate scheduler (if there is no further decrease in loss for next 5 epochs 
# then lower the learning rate by 0.1)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 5)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=3, eta_min=0.005 )
#scheduler=None

In [58]:
def hamming_score(y_true, y_pred, normalize=True, sample_weight=None):
    '''
    Compute the Hamming score (a.k.a. label-based accuracy) for the multi-label case
    https://stackoverflow.com/q/32239577/395857
    '''
    acc_list = []
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i])[0] )
        set_pred = set( np.where(y_pred[i])[0] )
        #print('\nset_true: {0}'.format(set_true))
        #print('set_pred: {0}'.format(set_pred))
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/\
                    float( len(set_true.union(set_pred)) )
        #print('tmp_a: {0}'.format(tmp_a))
        acc_list.append(tmp_a)
    return np.mean(acc_list)

In [59]:
from sklearn.metrics import f1_score, recall_score, precision_score
def calculate_scores(all_outputs, all_targets, scores_dict):
    all_outputs = np.array(all_outputs)
    all_targets = np.array(all_targets)
    f1score_samples = f1_score(y_true=all_targets, y_pred=all_outputs, average='samples')
    f1score_macro = f1_score(y_true=all_targets, y_pred=all_outputs, average='macro')
    f1score_weighted = f1_score(y_true=all_targets, y_pred=all_outputs, average='weighted')
    recall = recall_score(y_true=all_targets, y_pred=all_outputs, average='samples')
    prec = precision_score(y_true=all_targets, y_pred=all_outputs, average='samples')
    hamming = hamming_score(y_true=all_targets, y_pred=all_outputs)
    
    scores_dict["f1score_samples"].append(f1score_samples)
    scores_dict["f1score_macro"].append(f1score_macro)
    scores_dict["f1score_weighted"].append(f1score_weighted)
    scores_dict["recall"].append(recall)
    scores_dict["prec"].append(prec)
    scores_dict["hamming"].append(hamming)


In [62]:
def train(model, dataloader, optimizer, criterion, scores_dict, scheduler=None):
    model.train()
    # Record total loss
    total_loss = 0.
    total_accuracy = 0.
    total = 0.
    all_outputs = []
    all_targets = []
    for inputs, labels,class_ids in tqdm(dataloader, desc='Training'): 
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        # predict targets
        preds = torch.sigmoid(output).data >= 0.5
        target_data = (labels==1.0)
        
      
        total_loss += (loss.item())
        accuracy = torch.sum((preds == target_data.to(device)).to(torch.float)).item()
        total_accuracy += accuracy
        
        total_batch = (labels.size(0) * labels.size(1))
        total += total_batch
        
        # collect predictions and targets
        for arr1,arr2 in zip(preds, target_data):
            all_outputs.append(list(arr1.cpu().numpy()))
            all_targets.append(list(arr2.cpu().numpy()))

    calculate_scores(all_outputs, all_targets,scores_dict)
    return  total_accuracy / float(total), total_loss / len(dataloader)

In [63]:
def evaluate(model, dataloader, criterion, scores_dict):
    model.eval()
    total_loss = 0.
    total_accuracy = 0.
    all_outputs = []
    all_targets = []
    all_class_ids = []
    total = 0.
    with torch.no_grad():
        for inputs, labels, class_ids in tqdm(dataloader, desc='Validation'):
            inputs, labels, = inputs.to(device), labels.to(device)
            all_class_ids.extend(class_ids.cpu().numpy())
            output = model(inputs)
            loss = criterion(output, labels)
                 
            # predict targets
            preds = torch.sigmoid(output).data >= 0.5
            preds = preds.to(torch.float32)
            target_data = (labels==1.0)
           
            total_loss += (loss.item())
            accuracy = torch.sum((preds == target_data.to(device)).to(torch.float)).item()
            total_accuracy += accuracy
            
            total_batch = (labels.size(0) * labels.size(1))
            total += total_batch
            
            #collect predictions and targets
            for arr1,arr2 in zip(preds, target_data):
                all_outputs.append(list(arr1.cpu().numpy()))
                all_targets.append(list(arr2.cpu().numpy()))
                
    calculate_scores(all_outputs, all_targets, scores_dict)              
    return  total_accuracy / float(total), total_loss / len(dataloader), all_outputs, all_targets, all_class_ids

In [60]:
check_point_path = r'../checkpoints/food-101.pt'
train_scores_path = r'../checkpoints/train_scores.json'
val_scores_path = r'../checkpoints/val_scores.json'

In [61]:
def initialize_scores_dict(scores_dict):
    scores_dict["f1score_samples"]=[]
    scores_dict["f1score_macro"]=[]
    scores_dict["f1score_weighted"]=[]
    scores_dict["recall"]=[]
    scores_dict["prec"]=[]
    scores_dict["hamming"]=[]
    scores_dict["avg_accuracy"]=[]
    scores_dict["avg_losses"]=[]

In [32]:
# Train model
EPOCHS=5
valid_loss_min = np.Inf
train_scores_dict = dict()
initialize_scores_dict(train_scores_dict)
val_scores_dict = dict()
initialize_scores_dict(val_scores_dict)
for epoch_idx in range(EPOCHS):
    print("-----------------------------------")
    print("Epoch %d" % (epoch_idx+1))
    print("-----------------------------------")
    
    avg_train_accuracy, avg_train_loss = train(model, train_loader, optimizer, criterion, train_scores_dict)
    print("Training Accuracy: %.4f. Training Loss: %.4f. " % (avg_train_accuracy, avg_train_loss))

    avg_val_accuracy, avg_val_loss, all_outputs, all_targets, all_class_ids = evaluate(model, val_loader, criterion, val_scores_dict)
    print("Validation Accuracy: %.4f. Validation Loss: %.4f. " % (avg_val_accuracy, avg_val_loss))
    
       
    if avg_val_loss <= valid_loss_min:
            checkpoint = {"model": model,
                      "criterion": criterion,
                      "epochs": epoch_idx,
                      "optimizer_state": optimizer.state_dict(),
                      "model_state": model.state_dict(),
                      "valid_loss_min": avg_val_loss}
            print("Validation loss decreased,  Saving model")
            torch.save(checkpoint, check_point_path)
            valid_loss_min=avg_val_loss
    
    train_scores_dict["avg_accuracy"].append(avg_train_accuracy)
    train_scores_dict["avg_losses"].append(avg_train_loss)
    val_scores_dict["avg_accuracy"].append(avg_val_accuracy)
    val_scores_dict["avg_losses"].append(avg_val_loss)
    
# save scores
with open(train_scores_path, 'w') as t_handle:
    json.dump(train_scores_dict, t_handle)
with open(val_scores_path, 'w') as v_handle:
    json.dump(val_scores_dict, v_handle)

    
 

-----------------------------------
Epoch 1
-----------------------------------


Training:   0%|          | 0/4 [00:00<?, ?it/s]

Training Accuracy: 0.8831. Training Loss: 0.3062. 


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

Validation Accuracy: 0.9799. Validation Loss: 0.1171. 
Validation loss decreased,  Saving model
-----------------------------------
Epoch 2
-----------------------------------


Training:   0%|          | 0/4 [00:00<?, ?it/s]

Training Accuracy: 1.0000. Training Loss: 0.0105. 


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

Validation Accuracy: 0.9799. Validation Loss: 0.1538. 
-----------------------------------
Epoch 3
-----------------------------------


Training:   0%|          | 0/4 [00:00<?, ?it/s]

Training Accuracy: 1.0000. Training Loss: 0.0007. 


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

Validation Accuracy: 0.9799. Validation Loss: 0.2171. 
-----------------------------------
Epoch 4
-----------------------------------


Training:   0%|          | 0/4 [00:00<?, ?it/s]

Training Accuracy: 1.0000. Training Loss: 0.0001. 


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

Validation Accuracy: 0.9799. Validation Loss: 0.2378. 
-----------------------------------
Epoch 5
-----------------------------------


Training:   0%|          | 0/4 [00:00<?, ?it/s]

Training Accuracy: 1.0000. Training Loss: 0.0000. 


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

Validation Accuracy: 0.9799. Validation Loss: 0.2396. 


In [64]:
with open(train_scores_path, 'r') as t_handle:
    train_scores_dict = json.load(t_handle)
with open(val_scores_path, 'r') as v_handle:
    val_scores_dict = json.load(v_handle)
print(train_scores_dict)
print(val_scores_dict)

{'f1score_samples': [0.760278380296568, 1.0, 1.0, 1.0, 1.0], 'f1score_macro': [0.02858276717597583, 0.030837004405286344, 0.030837004405286344, 0.030837004405286344, 0.030837004405286344], 'f1score_weighted': [0.9268983069923591, 1.0, 1.0, 1.0, 1.0], 'recall': [0.8677455357142857, 1.0, 1.0, 1.0, 1.0], 'prec': [0.7496322765175103, 1.0, 1.0, 1.0, 1.0], 'hamming': [0.7493910407272044, 1.0, 1.0, 1.0, 1.0], 'avg_accuracy': [0.8830878303964758, 1.0, 1.0, 1.0, 1.0], 'avg_losses': [0.30619468726217747, 0.010536933259572834, 0.0007287617190741003, 0.00012676171900238842, 4.0379452457273146e-05]}
{'f1score_samples': [0.6963541666666667, 0.6963541666666666, 0.6963541666666667, 0.6963541666666668, 0.6963541666666667], 'f1score_macro': [0.0250862611488466, 0.0250862611488466, 0.0250862611488466, 0.0250862611488466, 0.0250862611488466], 'f1score_weighted': [0.5617424616007516, 0.5617424616007516, 0.5617424616007516, 0.5617424616007516, 0.5617424616007516], 'recall': [0.689453125, 0.689453125, 0.6894

In [65]:
def check_freeze(model):
    for name ,layer in model._modules.items():
        s = []
        for l in layer.parameters():
          s.append(l.requires_grad)
        print(name ,all(s))

In [66]:
# Inference
import warnings
warnings.filterwarnings("ignore", category=UserWarning) 
test_model = models.resnet50(pretrained=True)
num_ftrs = test_model.fc.in_features
test_model.fc = nn.Linear(num_ftrs, len(targets))

save_path = r'./checkpoints/food-101.pt'
checkpoint = torch.load(save_path)
test_model.load_state_dict(checkpoint['model_state'])
test_model.to(device)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [67]:
check_freeze(test_model)

conv1 True
bn1 True
relu True
maxpool True
layer1 True
layer2 True
layer3 True
layer4 True
avgpool True
fc True


In [68]:
test_scores_dict = dict()
initialize_scores_dict(test_scores_dict)
avg_test_accuracy, avg_test_loss, all_outputs, all_targets, all_class_ids = evaluate(test_model, test_loader, criterion, test_scores_dict)


Validation:   0%|          | 0/395 [00:00<?, ?it/s]

In [69]:
test_scores_dict

{'f1score_samples': [0.8038613519177972],
 'f1score_macro': [0.8223669901338875],
 'f1score_weighted': [0.8391371353984649],
 'recall': [0.7918676595106183],
 'prec': [0.8541864510103001],
 'hamming': [0.7603885281627365],
 'avg_accuracy': [],
 'avg_losses': []}

In [70]:
print(len(all_class_ids))
len(all_targets)
unique_classes = set(all_class_ids)
all_targets = np.array(all_targets)
all_outputs = np.array(all_outputs)
all_class_ids = np.array(all_class_ids)
food_type_dict=dict()
for entry in unique_classes:
    class_indices = [i for i, e in enumerate(all_class_ids) if e == entry]
    class_targets = all_targets[class_indices]
    class_outputs = all_outputs[class_indices]
    food_type_dict[entry]=f1_score(y_true=class_targets, y_pred=class_outputs, average='weighted')

food_type_dict = {k: v for k, v in sorted(food_type_dict.items(), key=lambda item: item[1])}
food_type_dict

25250


{39: 0.5517733630081957,
 77: 0.5550570611449737,
 0: 0.6279234196927453,
 82: 0.6413243318772154,
 93: 0.7149085913760341,
 59: 0.7218627088369649,
 56: 0.7338429991613579,
 8: 0.7367424174774294,
 67: 0.7412380756375411,
 37: 0.7516114829583519,
 50: 0.7741754059449272,
 99: 0.7744369683629663,
 5: 0.7819973399252615,
 46: 0.789743097015692,
 49: 0.7979732462321973,
 57: 0.8001899389317134,
 87: 0.8005197376624624,
 84: 0.8040777187481831,
 9: 0.8091415750117049,
 47: 0.8125684249509275,
 10: 0.8138622093697752,
 15: 0.8146330808829365,
 53: 0.8194405745685454,
 18: 0.8237230640257336,
 36: 0.8276561438263274,
 58: 0.828034723383405,
 22: 0.8300966870348713,
 66: 0.8361542579146333,
 62: 0.8369515795436642,
 73: 0.8373640908663847,
 26: 0.8484663502828405,
 85: 0.8488961143950757,
 16: 0.8490842374640399,
 80: 0.8527618704674506,
 42: 0.8534819788001898,
 98: 0.8591539252546864,
 19: 0.8659187710798463,
 4: 0.8683378261620622,
 89: 0.8690079239784705,
 11: 0.8783456476446986,
 96: 0.