In [1]:
from __future__ import print_function, division
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import random
from scipy.io import loadmat

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.sampler import WeightedRandomSampler
from torch.optim import lr_scheduler

from PIL import Image

import time
import utils
import torch
import torch.utils.data
import torchvision

seed = 10
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
from utils import Load_Images

In [None]:
import utils

In [None]:

root_dir = "data/cars_train/"
car_annotations_path = "data/devkit/cars_train_annos.mat"
car_metadata_path = "data/devkit/cars_meta.mat"

#Load Meta Data
meta_data = loadmat(car_metadata_path)
meta_data = np.concatenate(meta_data["class_names"][0])

nb_classes = len(meta_data)

dataset = Load_Images(root_dir = root_dir, annotations_path=car_annotations_path, seed=seed, train_split=0.8)


#vgg16_tl.py --data-folder="data/cars_train/" --meta-file="data/devkit/cars_meta.mat" --annotation-file="data/devkit/cars_train_annos.mat" --nb_epochs=2 --batch_size=15 --nb_classes=196 --enable_cuda 

In [None]:
  --annotation-file ANNOTATION
                        annotation file path
  --meta-fild META      meta data file path
  --training-split [TRAINING_SPLIT]
                        training and validation split. Default at .8
  --nb_epochs [EPOCHS]  Number of epochs to train the model
  --batch_size [BATCH_SIZE]
                        batch size
  --nb_classes NB_CLASSES
                        Number of classes
  --enable_cuda         Start using CUDA

In [None]:
class car_dataset(Dataset):
    
    def __init__(self, files, root_dir, meta_data, image_transform=None):
        
        self.root_dir = root_dir
        self.image_transform = image_transform
        
        #image file names
        self.image_files = [file[-1][0] for file in files]
        
        #Class ID
        #id needs to be adjusted by 1, for pytorch NLLosss 
        self.id = [file[-2][0] - 1 for file in files]
        
        #Class Name
        self.class_name = [meta_data[file[-2][0] - 1][0] for file in files]
        
        #Get Car Year
        self.carYear, self.carYear_ID = utils.get_Year(self.class_name)
        
        #Get Car Maker
        self.carMaker, self.carMaker_ID = utils.get_Maker(self.class_name)
        
        #Get Car Type
        self.carType, self.carType_ID = utils.get_Type(self.class_name)
        
    def __len__(self):
        return len(self.id)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.image_files[idx])
        img = Image.open(img_path)
        
        if self.image_transform:
            img = self.image_transform(img)
        
        target = torch.from_numpy(np.array(self.id[idx]))[0]

        sample = {'Image':img, 'class_ID':target, "class_name":self.class_name[idx],
                 'year_ID':self.carYear_ID[idx], 'maker_ID':self.carMaker_ID[idx],
                 'type_ID':self.carType_ID[idx]}
        
        return sample

In [None]:
class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
    """Samples elements randomly from a given list of indices for imbalanced dataset
    Arguments:
        indices (list, optional): a list of indices
        num_samples (int, optional): number of samples to draw
    """

    def __init__(self, dataset, class_type):
                      
        self.indices = list(range(len(dataset)))
        
        self.num_samples = len(self.indices) 
        # distribution of classes in the dataset 
        label_to_count = {}
        for idx in self.indices:
            label = self._get_label(dataset, idx, class_type)
            if label in label_to_count:
                label_to_count[label] += 1
            else:
                label_to_count[label] = 1
        # weight for each sample
        weights = [1.0 / label_to_count[self._get_label(dataset, idx, class_type)] for idx in self.indices]
        self.weights = torch.DoubleTensor(weights)

    def _get_label(self, dataset, idx, class_type):
        return dataset[idx][class_type].item()
                
    def __iter__(self):
        return (self.indices[i] for i in torch.multinomial(
                self.weights, self.num_samples, replacement=True))

    def __len__(self):
        return self.num_samples

In [None]:
#Training Weighted Random Sampler

"""targets = [i[-2][0][0] for i in dataset["training"]]
class_sample_counts=[len(np.where(targets == t)[0]) for t in np.unique(targets)]
weight = 1. / np.array(class_sample_counts)
samples_weight = np.array([weight[t-1] for t in targets])
samples_weight = torch.from_numpy(samples_weight)
samples_weight = samples_weight.double()
training_sampler = WeightedRandomSampler(samples_weight, num_samples=len(samples_weight), replacement=False)
"""
#batch size
bs = 15

image_transformers = {'train': transforms.Compose([transforms.Resize((244,244)),
                                                   transforms.RandomRotation(degrees=50),
                                                   transforms.RandomHorizontalFlip(0.8),
                                                   transforms.RandomPerspective(),
                                                   transforms.RandomResizedCrop(size=((244,244)), scale = (0.8, 1.0)),
                                                   transforms.ColorJitter(brightness=0.8, contrast=0.8),
                                                   transforms.ToTensor()]),
                      'validation': transforms.Compose([transforms.Resize((244,244)),
                                                       transforms.ToTensor()
                                                       ])
                     }

#
training_data = car_dataset(dataset["training"],
                            root_dir = root_dir,
                            meta_data = meta_data,
                            image_transform = image_transformers["train"]
                           )
train_loader = torch.utils.data.DataLoader(training_data, batch_size=bs, 
                                           sampler=ImbalancedDatasetSampler(training_data, "year_ID"))

validation_data = car_dataset(dataset["validation"], 
                             root_dir = root_dir,
                             meta_data = meta_data,
                             image_transform  = image_transformers["validation"])
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=bs,
                                                shuffle=False)

In [None]:
training_data[0]

In [None]:
training_ids = [i["year_ID"] for i in training_data]
#validation_ids = [i["class_name"] for i in validation_data]

In [None]:
len(pd.Series(training_ids).value_counts())

In [None]:
nb_classes = 16

In [None]:
torch.cuda.empty_cache() 
print("GPU: {}".format(torch.cuda.get_device_name(0)))

In [None]:
device = torch.device("cuda:0")
#device = torch.device("cpu")

In [None]:
## Load the model based on VGG19
vgg_based = torchvision.models.vgg19(pretrained=True)

'''
for param in vgg_based.parameters():
    param.requires_grad = False
'''


for idx,param in enumerate(vgg_based.parameters()):
    if idx <= 35:
        param.requires_grad = False
    
# Modify the last layer
number_features = vgg_based.classifier[6].in_features
features = list(vgg_based.classifier.children())[:-1] # Remove last layer
features.extend([torch.nn.Linear(number_features, nb_classes)])
vgg_based.classifier = torch.nn.Sequential(*features)

vgg_based = vgg_based.to(device)

print(vgg_based)
#torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0)
#
criterion = torch.nn.CrossEntropyLoss()

optimizer_ft = torch.optim.Adam(vgg_based.parameters(), lr= 0.0001, weight_decay=1e-7)

In [None]:
#optimizer_ft = torch.optim.Adam(vgg_based.parameters(), lr= 0.001, weight_decay=1e-5)
#torch.optim.Adam(vgg_based.parameters(), lr= 0.0001, weight_decay=1e-7)

In [None]:
def train_model(model, criterion, optimizer, num_epochs=1):
    since = time.time()
    history = []
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 30)

        training_loss = 0
        validation_loss = 0
        
        training_accuracy = 0
        validation_accuracy = 0

        # Iterate over data.
        for batch_idx, data in enumerate(train_loader):
            inputs = data["Image"]
            labels = data["year_ID"]
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels = labels.long()
            
            optimizer.zero_grad()

            with torch.set_grad_enabled(True):
                outputs  = model(inputs)
                loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()
            
            # Compute the total loss for the batch and add it to valid_loss
            training_loss += loss.item() * inputs.size(0)
            
            #train accuracy
            (max_vals, arg_maxs) = torch.max(outputs, dim=1) 
            correct_counts = arg_maxs.eq(labels.data.view_as(arg_maxs))

            acc = torch.mean(correct_counts.type(torch.FloatTensor))

            
            # Compute total accuracy in the whole batch and add to valid_acc
            training_accuracy += acc.item() * inputs.size(0)

            
        #get accuracy
        with torch.no_grad():
            
            model.eval()
            
            for batch_idx, data in enumerate(validation_loader):
                inputs = data["Image"]
                labels = data["year_ID"]
                inputs = inputs.to(device)
                labels = labels.to(device)
                labels = labels.long()

                outputs = model(inputs)

                loss = criterion(outputs, labels)

                validation_loss += loss.item() * inputs.size(0)

                #train accuracy
                (max_vals, arg_maxs) = torch.max(outputs, dim=1) 
                correct_counts = arg_maxs.eq(labels.data.view_as(arg_maxs))

                # Convert correct_counts to float and then compute the mean
                acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                validation_accuracy += acc.item() * inputs.size(0)

        # Find average training loss and training accuracy
        avg_train_loss = training_loss/len(training_data)
        avg_train_acc = training_accuracy/float(len(training_data))

        # Find average training loss and training accuracy
        avg_valid_loss = validation_loss/len(validation_data)
        avg_valid_acc = validation_accuracy/float(len(validation_data))
        history.append([avg_train_loss, avg_train_acc, avg_valid_loss, avg_valid_acc])
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%".format(epoch + 1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100))
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))


    return model, history

In [None]:
mods, history = train_model(vgg_based, criterion, optimizer_ft, num_epochs=20)

In [None]:
with torch.no_grad():
    yhats = []  
    ys = []
    mods.eval()

    for batch_idx, data in enumerate(validation_loader):
        inputs = data["Image"]
        labels = data["class_ID"]
        ys.append(labels)
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels = labels.long()
        yhats.append(mods(inputs))

In [None]:
preds = []
for i in yhats:
    for g in i:
        _, ind = torch.max(g, 0)
        preds.append(ind.item())

In [None]:
targets

In [None]:
preds

In [None]:
values, indices = torch.max(yhats[0], 0)
print(values, indices)

In [None]:
ys

In [None]:
targets = []
for i in ys:
    for g in i.numpy():
        targets.append(g)

In [None]:
targets
im

In [None]:
import pandas as pd

In [None]:
pd.DataFrame(history, columns=["avg_train_loss", "avg_train_acc", "avg_valid_loss", "avg_valid_acc"])[["avg_valid_acc", "avg_train_acc"]].plot()

In [None]:
def predict(model, test_image_name):
     
    transform = image_transforms['test']
 
    test_image = Image.open(test_image_name)
    plt.imshow(test_image)
     
    test_image_tensor = transform(test_image)
 
    if torch.cuda.is_available():
        test_image_tensor = test_image_tensor.view(1, 3, 224, 224).cuda()
    else:
        test_image_tensor = test_image_tensor.view(1, 3, 224, 224)
     
    with torch.no_grad():
        model.eval()
        # Model outputs log probabilities
        out = model(test_image_tensor)
        ps = torch.exp(out)
        topk, topclass = ps.topk(1, dim=1)
        print("Output class :  ", idx_to_class[topclass.cpu().numpy()[0][0]])

In [None]:
import torch
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

class CustomDatasetFromCSV(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path)
        self.labels = pd.get_dummies(self.data['emotion']).as_matrix()
        self.height = 48
        self.width = 48
        self.transform = transform

    def __getitem__(self, index):
        # This method should return only 1 sample and label 
        # (according to "index"), not the whole dataset
        # So probably something like this for you:
        pixel_sequence = self.data['pixels'][index]
        face = [int(pixel) for pixel in pixel_sequence.split(' ')]
        face = np.asarray(face).reshape(self.width, self.height)
        face = cv2.resize(face.astype('uint8'), (self.width, self.height))
        label = self.labels[index]

        return face, label

    def __len__(self):
        return len(self.labels)


dataset = CustomDatasetFromCSV(my_path)
batch_size = 16
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)

# Usage Example:
num_epochs = 10
for epoch in range(num_epochs):
    # Train:   
    for batch_index, (faces, labels) in enumerate(train_loader):
        # ...

In [None]:
class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
    """Samples elements randomly from a given list of indices for imbalanced dataset
    Arguments:
        indices (list, optional): a list of indices
        num_samples (int, optional): number of samples to draw
    """

    def __init__(self, dataset, indices=None, num_samples=None):
                
        # if indices is not provided, 
        # all elements in the dataset will be considered
        self.indices = list(range(len(dataset))) \
            if indices is None else indices
            
        # if num_samples is not provided, 
        # draw `len(indices)` samples in each iteration
        self.num_samples = len(self.indices) \
            if num_samples is None else num_samples
            
        # distribution of classes in the dataset 
        label_to_count = {}
        for idx in self.indices:
            label = self._get_label(dataset, idx)
            if label in label_to_count:
                label_to_count[label] += 1
            else:
                label_to_count[label] = 1
                
        # weight for each sample
        weights = [1.0 / label_to_count[self._get_label(dataset, idx)]
                   for idx in self.indices]
        self.weights = torch.DoubleTensor(weights)

    def _get_label(self, dataset, idx):
        dataset_type = type(dataset)
        if dataset_type is torchvision.datasets.MNIST:
            return dataset.train_labels[idx].item()
        elif dataset_type is torchvision.datasets.ImageFolder:
            return dataset.imgs[idx][1]
        else:
            raise NotImplementedError
                
    def __iter__(self):
        return (self.indices[i] for i in torch.multinomial(
            self.weights, self.num_samples, replacement=True))

    def __len__(self):
        return self.num_samples

In [None]:

data_dir = "alien_pred"
input_shape = 224
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

#data transformation
data_transforms = {
   'train': transforms.Compose([
       transforms.CenterCrop(input_shape),
       transforms.ToTensor(),
       transforms.Normalize(mean, std)
   ]),
   'validation': transforms.Compose([
       transforms.CenterCrop(input_shape),
       transforms.ToTensor(),
       transforms.Normalize(mean, std)
   ]),
}

image_datasets = {
   x: datasets.ImageFolder(
       os.path.join(data_dir, x),
       transform=data_transforms[x]
   )
   for x in ['train', 'validation']
}

dataloaders = {
   x: torch.utils.data.DataLoader(
       image_datasets[x], batch_size=32,
       shuffle=True, num_workers=4
   )
   for x in ['train', 'validation']
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'validation']}

print(dataset_sizes)
class_names = image_datasets['train'].classes








criterion = torch.nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(vgg_based.parameters(), lr=0.001, momentum=0.9)


def train_model(model, criterion, optimizer, num_epochs=25):
   since = time.time()

   for epoch in range(num_epochs):
       print('Epoch {}/{}'.format(epoch, num_epochs - 1))
       print('-' * 10)

       #set model to trainable
       # model.train()

       train_loss = 0

       # Iterate over data.
       for i, data in enumerate(dataloaders['train']):
           inputs , labels = data
           inputs = inputs.to(device)
           labels = labels.to(device)

           optimizer.zero_grad()
          
           with torch.set_grad_enabled(True):
               outputs  = model(inputs)
               loss = criterion(outputs, labels)

           loss.backward()
           optimizer.step()

           train_loss += loss.item() * inputs.size(0)

           print('{} Loss: {:.4f}'.format(
               'train', train_loss / dataset_sizes['train']))
          
   time_elapsed = time.time() - since
   print('Training complete in {:.0f}m {:.0f}s'.format(
       time_elapsed // 60, time_elapsed % 60))

   return model

def visualize_model(model, num_images=6):
   was_training = model.training
   model.eval()
   images_so_far = 0
   fig = plt.figure()

   with torch.no_grad():
       for i, (inputs, labels) in enumerate(dataloaders['validation']):
           inputs = inputs.to(device)
           labels = labels.to(device)

           outputs = model(inputs)
           _, preds = torch.max(outputs, 1)

           for j in range(inputs.size()[0]):
               images_so_far += 1
               ax = plt.subplot(num_images//2, 2, images_so_far)
               ax.axis('off')
               ax.set_title('predicted: {} truth: {}'.format(class_names[preds[j]], class_names[labels[j]]))
               img = inputs.cpu().data[j].numpy().transpose((1, 2, 0))
               img = std * img + mean
               ax.imshow(img)

               if images_so_far == num_images:
                   model.train(mode=was_training)
                   return
       model.train(mode=was_training)
    
vgg_based = train_model(vgg_based, criterion, optimizer_ft, num_epochs=25)

visualize_model(vgg_based)