In [1]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision
import matplotlib
import torch.nn as nn
import torch.optim as optim
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
plt.ion()   # interactive mode

In [2]:
class LoadDataset(Dataset):
    """Loads a dataset and applies relevant transformations."""

    def __init__(self, csv_file, root_dir, imgcol, labelcol, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.dataframe = pd.read_csv(csv_file)
        # if necessary, create dummies for a certain column
        self.dataframe = pd.get_dummies(self.dataframe, columns=['Label'])
        self.root_dir = root_dir
        self.transform = transform
        self.imgcol = imgcol
        self.labelcol = labelcol

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.dataframe.iloc[idx, self.imgcol]) # fill with correct column
        image = io.imread(img_name, as_gray=False, pilmode="RGB") 
        label = self.dataframe.iloc[idx, self.labelcol] #fill with correct column of label
        label = np.array(float(label))
        sample = {'image': image, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [49]:
class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(image, (new_h, new_w))
        return {'image': img, 'label': label}
    
class CenterCrop(object):
    """Crop randomly the image in a sample.

    Args:
        output_size (tuple or int): Desired output size. If int, square crop
            is made.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        new_h, new_w = self.output_size

        top = np.random.randint(0, (h - new_h))
        left = np.random.randint(0, (w - new_w))

        image = image[top: top + new_h,
                      left: left + new_w]

        return {'image': image, 'label': label}
    
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        image = image.transpose((2, 0, 1))

        return {'image': torch.from_numpy(image),
                'label': torch.from_numpy(label)}
    
   
class Normalize(object):
    """
    Args:
        mean (sequence): Sequence of means for each channel.
        std (sequence): Sequence of standard deviations for each channel.
        inplace(bool,optional): Bool to make this operation in-place.

    """

    def __init__(self, mean, std, inplace=False):
        self.mean = mean
        self.std = std
        self.inplace = inplace

    def __call__(self, sample):

        image, label = sample['image'], sample['label']

        dtype = image.dtype
        self.mean = torch.as_tensor(self.mean, dtype=dtype, device=image.device)
        self.std = torch.as_tensor(self.std, dtype=dtype, device=image.device)
        if (self.std == 0).any():
            raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
        if self.mean.ndim == 1:
            self.mean = self.mean[:, None, None]
        if self.std.ndim == 1:
            self.std = self.std[:, None, None]
        image.sub_(self.mean).div_(self.std)
        return {'image': image, 'label': label}

In [57]:
normalize = Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

transformed_dataset = LoadDataset(csv_file='data_metadata.csv',
                                    root_dir='data/train/1',
                                      imgcol=1,
                                      labelcol = 5,
                                           transform=transforms.Compose([
                                               Rescale(256),
                                               CenterCrop(224),
                                               ToTensor(),
                                               normalize
                                           ]))

In [60]:
batch_size = 10
dataloader = DataLoader(transformed_dataset, batch_size = batch_size, shuffle = True)


In [61]:
dataiter= iter(dataloader)
data = dataiter.next()
features = data["image"]
labels = data["label"]
print(features.shape,labels.shape, len(features), len(labels))


torch.Size([10, 3, 224, 224]) torch.Size([10]) 10 10


In [85]:
transformed_dataset[12]

{'image': tensor([[[-0.3397, -0.3054, -0.2883,  ...,  0.3126,  0.3111,  0.3593],
          [-0.2714, -0.2635, -0.2171,  ...,  0.4773,  0.4728,  0.4408],
          [-0.2045, -0.2217, -0.2171,  ...,  0.5083,  0.4149,  0.3729],
          ...,
          [-0.2339, -0.1107,  0.0171,  ...,  0.4695,  0.4679,  0.4679],
          [-0.1355, -0.0265,  0.1146,  ...,  0.5499,  0.4799,  0.4637],
          [-0.0646,  0.0802,  0.1907,  ...,  0.5684,  0.5193,  0.4902]],
 
         [[-0.2178, -0.1828, -0.1653,  ...,  0.4490,  0.4475,  0.4968],
          [-0.1480, -0.1400, -0.0924,  ...,  0.6174,  0.6128,  0.5801],
          [-0.0796, -0.0972, -0.0924,  ...,  0.6491,  0.5537,  0.5107],
          ...,
          [-0.1097,  0.0163,  0.1469,  ...,  0.6094,  0.6078,  0.6078],
          [-0.0091,  0.1023,  0.2466,  ...,  0.6917,  0.6201,  0.6035],
          [ 0.0635,  0.2114,  0.3244,  ...,  0.7106,  0.6604,  0.6306]],
 
         [[ 0.0054,  0.0403,  0.0577,  ...,  0.6693,  0.6677,  0.7168],
          [ 0.0749,

In [87]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size= 5, stride=1, padding=2)
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 1, padding = 0)
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5, stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 1, padding = 0)
        self.fc1 = nn.Linear(in_features = 16 * 222 * 222, out_features = 2) # outfeatues = Number of classes

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 222 * 222)
        x = self.fc1(x) 
        return x

net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
net = net.to(device)




In [90]:
import torchvision.models as models
model = models.alexnet(pretrained=True).cuda()
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 2)
model = model.to(device)


In [93]:
for epoch in range(5):  # loop over the dataset multiple times
    print("epoch " + str(epoch+1))
    running_loss = 0.0
    running_corrects = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs = data["image"].to(device).float()
        labels = data["label"].to(device).long()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        running_corrects += torch.sum(predicted == labels.data)
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f accuracy: %.3f' % (epoch + 1, i + 1, running_loss / 100, running_corrects / 100))
            running_loss = 0.0
            running_corrects = 0.0

print('Finished Training')

epoch 1
[1,   100] loss: 0.604 accuracy: 7.350
[1,   200] loss: 0.595 accuracy: 7.390
[1,   300] loss: 0.634 accuracy: 6.870
[1,   400] loss: 0.612 accuracy: 7.120
[1,   500] loss: 0.596 accuracy: 7.190
epoch 2
[2,   100] loss: 0.594 accuracy: 7.290
[2,   200] loss: 0.591 accuracy: 7.430
[2,   300] loss: 0.630 accuracy: 6.970


KeyboardInterrupt: 

In [73]:
torch.save(net.state_dict(), './cifar_net.pth')


In [90]:
correct = 0
total = 0
with torch.no_grad():
    for data in dataloader:
        inputs = data["image"].to(device).float()
        labels = data["label"].to(device).long()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the train images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the  test images: 90 %


In [81]:
outputs.data

tensor([[ 2.9492, -2.9909],
        [ 1.7901, -1.8474],
        [ 1.9008, -1.9491],
        [ 1.0942, -1.1554],
        [ 2.0068, -2.0782],
        [ 2.3491, -2.3672],
        [ 1.2740, -1.3063],
        [ 1.6492, -1.6642],
        [-0.7391,  0.6873],
        [-2.2627,  2.2208]], device='cuda:0')