In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models,transforms
from scipy.io import loadmat
import numpy as np
import os
from PIL import Image
from sklearn.preprocessing import OneHotEncoder

In [2]:
model = models.resnet50(pretrained=True)
model._modules['fc'] = nn.Linear(in_features=2048, out_features=196, bias=True)
model.cuda();

In [3]:
# model

In [4]:
classname_file = 'devkit/cars_meta.mat'
data = loadmat(classname_file)
data = np.array(data['class_names'])
data[0].shape

(196,)

In [5]:
train_file = 'devkit/cars_train_annos.mat'
# test_file = 'devkit/cars_test_annos.mat'

data = loadmat(train_file)
data = np.array(data['annotations'])
val_number = 6000
train_data = data[0][:val_number]
val_data = data[0][val_number:]
# test_data = loadmat(test_file)
# test_data = np.array(test_data['annotations'])

In [6]:
train_data.shape
# test_data[0]

(6000,)

In [7]:
def get_labels(data):
    labels = []
    for d in data:
        _,_,_,_,label,_ = d
        labels.append(label)
    labels = np.array(labels).squeeze()
#     labels = labels.reshape(-1,1)
#     enc = OneHotEncoder()
#     labels = enc.fit_transform(labels).toarray()
    return labels

train_labels = get_labels(train_data)
val_labels = get_labels(val_data)

In [8]:
train_labels.shape

(6000,)

In [9]:
class CarDataset(Dataset):
    """Car dataset."""

    def __init__(self, labels, root_dir, transform=None):
        """
        Args:
            labels (numpy array): array of class labels of the cars.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.labels = labels
        self.root_dir = root_dir
        self.files = os.listdir(self.root_dir)
        self.transform = transform

    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,self.files[idx])
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return np.array([self.labels[idx]]), image

In [10]:
### load data
train_img_dir = "cars_train"
val_img_dir = "cars_val"

transform = transforms.Compose([
                        transforms.Resize((256,256)),
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
])
train_loader = DataLoader(CarDataset(train_labels,train_img_dir,transform), batch_size=4, shuffle=True, drop_last=True)
val_loader = DataLoader(CarDataset(val_labels,val_img_dir,transform), batch_size=4, shuffle=True, drop_last=True)

In [11]:
criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), 0.002,
                            momentum=0.9,
                            weight_decay=1e-4)

In [19]:
def accuracy(output, target):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        batch_size = target.size(0)

        _, pred = output.topk(1, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        correct_k = correct[:1].view(-1).float().sum(0, keepdim=True)
        res = correct_k.mul_(100.0 / batch_size)
        return res

In [25]:
for epoch in range(20):
    model.train()
    train_loss = 0
    train_accuracy=0
    for counter, (label, img) in enumerate(train_loader):
        label = label.cuda()
        img = img.cuda()
        out = model(img)
        loss = criterion(out,label.long().squeeze())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        acc = accuracy(out,label.long().squeeze())
        train_loss += loss
        train_accuracy += acc
#         print(counter)
    train_loss = train_loss/len(train_loader)
    train_accuracy = train_accuracy/len(train_loader)

    print('for epoch {},train loss is {} and train accuracy is {}'
                  .format(epoch, train_loss, train_accuracy))

    model.eval()
    val_loss = 0
    val_accuracy = 0
    for label, img in val_loader:
        label = label.cuda()
        img = img.cuda()
        with torch.no_grad():
            out = model(img)
            loss = criterion(out,label.long().squeeze())
            acc = accuracy(out,label.long().squeeze())
        val_loss += loss
        val_accuracy += acc
    val_loss = val_loss/len(val_loader)
    val_accuracy = val_accuracy/len(val_loader)

    print('for epoch {},val loss is {} and val accuracy is {}'
                  .format(epoch, val_loss, val_accuracy))

for epoch 0,train loss is 1.0623408555984497 and train accuracy is tensor([73.5667], device='cuda:0')
for epoch 0,val loss is 1.4957767724990845 and val accuracy is tensor([64.8787], device='cuda:0')
for epoch 1,train loss is 1.0063170194625854 and train accuracy is tensor([74.0167], device='cuda:0')
for epoch 1,val loss is 1.4115499258041382 and val accuracy is tensor([66.5112], device='cuda:0')
for epoch 2,train loss is 1.0685181617736816 and train accuracy is tensor([72.9833], device='cuda:0')
for epoch 2,val loss is 1.2874196767807007 and val accuracy is tensor([68.1903], device='cuda:0')
for epoch 3,train loss is 1.0044759511947632 and train accuracy is tensor([74.3500], device='cuda:0')
for epoch 3,val loss is 1.2833750247955322 and val accuracy is tensor([69.9160], device='cuda:0')
for epoch 4,train loss is 0.9807437062263489 and train accuracy is tensor([75.2667], device='cuda:0')
for epoch 4,val loss is 1.3199161291122437 and val accuracy is tensor([68.7034], device='cuda:0')


KeyboardInterrupt: 

In [27]:
torch.save(model.state_dict(), 'first_attempt.pkl')