In [0]:
import os
from torch.utils.data import Dataset, DataLoader
import csv
from PIL import Image
import numpy as np
import torch
from torch.autograd import Variable


# TODO create a class for each dataset EXCEPT MNIST (this is already built into pytorch)
# If the dataset comes pre-split into train/test we should write a separate class for each.

# For example the street view house number might look something like this...
# The cropped version of the dataset is in a weird *.mat format, see https://stackoverflow.com/questions/29185493/read-svhn-dataset-in-python for instructions to load with numpy
class SVHNDataset(Dataset):
    """`SVHN <http://ufldl.stanford.edu/housenumbers/>`_ Dataset.
    Note: The SVHN dataset assigns the label `10` to the digit `0`. However, in this Dataset,
    we assign the label `0` to the digit `0` to be compatible with PyTorch loss functions which
    expect the class labels to be in the range `[0, C-1]`
    Args:
        data_dir (string): directory of dataset where directory
            ``SVHN`` exists.
        split (string): One of {'train', 'test', 'extra'}.
            Accordingly dataset is selected. 'extra' is Extra training set.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
    """
    filename = ""
    filepath = ""
    split_list = {
        'train': "train_32x32.mat",
        'test': "test_32x32.mat",
        'extra': "extra_32x32.mat"}

    def __init__(self, data_dir='./datasets', split='train',
                 transform=None, target_transform=None, download=False):
        self.data_dir = os.path.expanduser(data_dir)
        self.transform = transform
        self.target_transform = target_transform
        self.split = split  # training set or test set or extra set

        if self.split not in self.split_list:
            raise ValueError('Wrong split entered! Please use split="train" '
                             'or split="extra" or split="test"')

        self.filename = self.split_list[split]
        self.filepath = os.path.join(self.data_dir, self.filename)
        
        # import here rather than at top of file because this is
        # an optional dependency for torchvision
        import scipy.io as sio
        
        
        # judge if .mat exist
        if not os.path.isfile(self.filepath):
            raise RuntimeError('Dataset not found or corrupted.' +
                ' You can use fetch_data.sh to download it')
        
        # reading(loading) mat file as array
        loaded_mat = sio.loadmat(self.filepath)

        self.data = loaded_mat['X']
        # loading from the .mat file gives an np array of type np.uint8
        # converting to np.int64, so that we have a LongTensor after
        # the conversion from the numpy array
        # the squeeze is needed to obtain a 1D tensor
        self.labels = loaded_mat['y'].astype(np.int64).squeeze()

        # the svhn dataset assigns the class label "10" to the digit 0
        # this makes it inconsistent with several loss functions
        # which expect the class labels to be in the range [0, C-1]
        np.place(self.labels, self.labels == 10, 0)
        self.data = np.transpose(self.data, (3, 2, 0, 1))

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], self.labels[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(np.transpose(img, (1, 2, 0)))

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)
        
        return img, target

    def __len__(self):
        return len(self.data)
    
class EmojiDataset(Dataset):
    '''
    Dataset of 1 million bitmoji images.
    start_idx - image number dataset should start at
    end_idx - data number where dataset ends
    '''
    def __init__(self, data_dir, start_idx=0, end_idx=1000000, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.data_len = end_idx - start_idx
    
    def __getitem__(self, idx):
        """
        Args:
            index (int): Index
        """
        img_name = os.path.join(self.data_dir, 'emoji_{}.png'.format(idx))
        img = Image.open(img_name)
        img = img.convert('RGB') # b/c it's a png

        if self.transform is not None:
            img = self.transform(img)
                                   
        return img

    def __len__(self):
        return self.data_len    

class CelebADataset(Dataset):
    '''
    CelebA face image dataset. This is the aligned and cropped version. 
    data_dir - directory of image data
    ann_dir - directory of annotation data
    split - either 'train', 'eval', or 'test'
    '''
    def __init__(self, data_dir, ann_dir, split, transform=None):
                
        data_splits = ['train', 'eval', 'test']
        self.data_dir = data_dir
        self.transform = transform
        
        split = data_splits.index(split)
        split_data = []
        with open(os.path.join(ann_dir, 'list_eval_partition.txt')) as split_file:
            reader = csv.reader(split_file, delimiter=' ')
            for row in reader:
                split_data.append(row)
        bbox_data = []
        with open(os.path.join(ann_dir, 'list_bbox_celeba.txt')) as bbox_file:
            reader = csv.reader(bbox_file, delimiter=' ', skipinitialspace=True)
            test_row = next(reader) # header row
            test_row = next(reader) # header row
            for row in reader:
                bbox_data.append(row)
                
        split_data = np.array(split_data)
        bbox_data = np.array(bbox_data)
        split_inds = np.where(split_data[:,1] == str(split))[0]
        
        self.split_info = split_data[split_inds, :]
        self.bbox_info = bbox_data[split_inds, :]
        self.data_len = self.split_info.shape[0]

    def __getitem__(self, idx):
        """
        Args:
            index (int): Index
        """
        img_name = os.path.join(self.data_dir, self.split_info[idx, 0])
        img = Image.open(img_name)
        
        if self.transform is not None:
            img = self.transform(img)
                           
        return img

    def __len__(self):
        return self.data_len
    
class MSCeleb1MDataset(Dataset):
    '''
    MS-Celeb-1M face image dataset. This is the aligned and cropped version. 
    data_dir - directory of data. This directory should contain annotation files and a subdirectory for image data.
    split - either 'train' or 'test'
    '''
    def __init__(self, data_dir, split, transform=None):
        data_splits = ['train', 'test']
        self.transform = transform
        
        split = data_splits.index(split)
        if split == 0:
            info_path = 'train_data_info.txt'
            self.data_path = os.path.join(data_dir, 'images_train/')
        elif split == 1:
            info_path = 'test_data_info.txt'
            self.data_path = os.path.join(data_dir, 'images_test/')
        
        info_data = []
        with open(os.path.join(data_dir, info_path)) as info_file:
            reader = csv.reader(info_file, delimiter=' ')
            for row in reader:
                info_data.append(row)
                
        self.info = np.array(info_data)
        self.data_len = self.info.shape[0]

    def __getitem__(self, idx):
        """
        Args:
            index (int): Index
        """
        img_name = os.path.join(self.data_path, self.info[idx, 0])
        img = Image.open(img_name)
        
        if self.transform is not None:
            img = self.transform(img)
                       
        return img

    def __len__(self):
        return self.data_len
    
class ResizeTransform(object):
    ''' Resizes a PIL image to (size, size) to feed into OpenFace net and returns a torch tensor.'''
    def __init__(self, size):
        self.size = size
        
    def __call__(self, sample):
        img = sample.resize((self.size, self.size), Image.BILINEAR)
        img = np.transpose(img, (2, 0, 1))
        img = img.astype(np.float32) / 255.0
        return torch.from_numpy(img)
    
class ZeroPadBottom(object):
    ''' Zero pads batch of image tensor Variables on bottom to given size. Input (B, C, H, W) - padded on H axis. '''
    def __init__(self, size, use_gpu=True):
        self.size = size
        self.use_gpu = use_gpu
        
    def __call__(self, sample):
        B, C, H, W = sample.size()
        diff = self.size - H
        padding = Variable(torch.zeros(B, C, diff, W), requires_grad=False)
        if self.use_gpu:
            padding = padding.cuda()
        zero_padded = torch.cat((sample, padding), dim=2)
        return zero_padded
    
class NormalizeRangeTanh(object):
    ''' Normalizes a tensor with values from [0, 1] to [-1, 1]. '''
    def __init__(self):
        pass
    
    def __call__(self, sample):
        sample = sample * 2.0 - 1.0
        return sample
    
class UnNormalizeRangeTanh(object):
    ''' Unnormalizes a tensor with values from [-1, 1] to [0, 1]. '''
    def __init__(self):
        pass
    
    def __call__(self, sample):
        sample = (sample + 1.0) * 0.5
        return sample
        
    
class UnNormalize(object):
    ''' from https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/3'''
    def __init__(self, mean, std):
        mean_arr = []
        for dim in range(len(mean)):
            mean_arr.append(dim)
        std_arr = []
        for dim in range(len(std)):
            std_arr.append(dim)
        self.mean = torch.Tensor(mean_arr).view(1, len(mean), 1, 1)
        self.std = torch.Tensor(std_arr).view(1, len(std), 1, 1)

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (B, C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        tensor *= self.std
        tensor += self.mean
        return tensor

In [0]:
import copy
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torchvision.transforms as transforms
import time
import os

In [0]:
SVHN_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
s_train_set = torchvision.datasets.SVHN(root = './SVHN/', split='extra',download = True, transform = SVHN_transform)
s_train_loader = torch.utils.data.DataLoader(s_train_set, batch_size=128,
                                          shuffle=True, num_workers=8)
s_test_set = torchvision.datasets.SVHN(root = './SVHN/', split='test', download = True, transform = SVHN_transform)
s_test_loader = torch.utils.data.DataLoader(s_test_set, batch_size=128,
                                         shuffle=False, num_workers=8)
        

Using downloaded and verified file: ./SVHN/extra_32x32.mat
Using downloaded and verified file: ./SVHN/test_32x32.mat


In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from skimage import color
from skimage import io
import cv2
import torch.nn as nn
import torch.nn.functional as F
import keras
from torchsummary import summary
import torch
import tensorflow

Using TensorFlow backend.


In [0]:
class Reshape(nn.Module):
    def __init__(self, *args):
        super(Reshape, self).__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)
class F_me(nn.Module):
	'''
	MNIST digit classifier.
	'''
	def __init__(self):
		super(F_me, self).__init__()
		self.use_gpu = True
		self.classify = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),               
                nn.Conv2d(64, 128, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),
                nn.Conv2d(128, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),
                nn.Conv2d(256, 128, kernel_size=4, padding=0),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(4),
                Reshape(-1,128),
                nn.Linear(128, 10),
                nn.Softmax(),
              )
		if self.use_gpu:        
			self.type(torch.cuda.FloatTensor)


	def forward(self, input):
		# TODO implement the forward pass
		return self.classify(input)



In [0]:
model = F_me()
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
              ReLU-2           [-1, 64, 32, 32]               0
         MaxPool2d-3           [-1, 64, 16, 16]               0
            Conv2d-4          [-1, 128, 16, 16]          73,856
              ReLU-5          [-1, 128, 16, 16]               0
         MaxPool2d-6            [-1, 128, 8, 8]               0
            Conv2d-7            [-1, 256, 8, 8]         295,168
              ReLU-8            [-1, 256, 8, 8]               0
         MaxPool2d-9            [-1, 256, 4, 4]               0
           Conv2d-10            [-1, 128, 1, 1]         524,416
             ReLU-11            [-1, 128, 1, 1]               0
        MaxPool2d-12            [-1, 128, 1, 1]               0
          Reshape-13                  [-1, 128]               0
           Linear-14                   

  input = module(input)


In [0]:
type(F_me)

type

In [0]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(model.parameters())

for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(s_train_loader, 0):
        # get the inputs
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

  input = module(input)


[1,  2000] loss: 1.818
[1,  4000] loss: 1.656
[2,  2000] loss: 1.586
[2,  4000] loss: 1.578
[3,  2000] loss: 1.554
[3,  4000] loss: 1.498
[4,  2000] loss: 1.494
[4,  4000] loss: 1.493
[5,  2000] loss: 1.491
[5,  4000] loss: 1.492
[6,  2000] loss: 1.489
[6,  4000] loss: 1.490
[7,  2000] loss: 1.489
[7,  4000] loss: 1.489
[8,  2000] loss: 1.488
[8,  4000] loss: 1.488
[9,  2000] loss: 1.487
[9,  4000] loss: 1.487
[10,  2000] loss: 1.487
[10,  4000] loss: 1.487
Finished Training


In [0]:
correct = 0
total = 0
with torch.no_grad():
    for data in s_test_loader:
        images, labels = data
        images = images.cuda()
        labels = labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print( (100 * correct / total))

  input = module(input)


93.3159188690842


In [0]:
torch.save({'state_dict': model.state_dict()},'f_model.tar')

In [0]:
class F(nn.Module):
	'''
	MNIST digit classifier.
	'''
	def __init__(self,):
		super(F, self).__init__()
		self.use_gpu = True
		self.classify = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
                nn.ReLU(inplace=True),
                #nn.LeakyReLU(0.2, inplace=True),
                
                nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
                nn.ReLU(inplace=True),
                #nn.LeakyReLU(0.2, inplace=True),

                nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
                nn.ReLU(inplace=True),
                #nn.LeakyReLU(0.2, inplace=True),
            
                nn.Conv2d(256, 128, kernel_size=4, stride=1, padding=0),
                nn.ReLU(inplace=True),
                #nn.LeakyReLU(0.2, inplace=True),

#                 Flatten(),
                #nn.Linear(128, 10)
                #nn.Conv2d(128, 10, kernel_size=1, stride=1, padding=0)
                Reshape(-1,128),
                nn.Linear(128, 10),
                nn.Softmax()
              )
		if self.use_gpu:        
			self.type(torch.cuda.FloatTensor)


	def forward(self, input):
		# TODO implement the forward pass
		return self.classify(input)
  
model2 = F()
summary(model2, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           1,792
              ReLU-2           [-1, 64, 16, 16]               0
            Conv2d-3            [-1, 128, 8, 8]          73,856
              ReLU-4            [-1, 128, 8, 8]               0
            Conv2d-5            [-1, 256, 4, 4]         295,168
              ReLU-6            [-1, 256, 4, 4]               0
            Conv2d-7            [-1, 128, 1, 1]         524,416
              ReLU-8            [-1, 128, 1, 1]               0
           Reshape-9                  [-1, 128]               0
           Linear-10                   [-1, 10]           1,290
          Softmax-11                   [-1, 10]               0
Total params: 896,522
Trainable params: 896,522
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/

  input = module(input)


In [0]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(model2.parameters())

for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(s_train_loader, 0):
        # get the inputs
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

  input = module(input)


[1,  2000] loss: 1.902
[1,  4000] loss: 1.651
[2,  2000] loss: 1.602
[2,  4000] loss: 1.594
[3,  2000] loss: 1.587
[3,  4000] loss: 1.584
[4,  2000] loss: 1.580
[4,  4000] loss: 1.579
[5,  2000] loss: 1.576
[5,  4000] loss: 1.575
[6,  2000] loss: 1.573
[6,  4000] loss: 1.573
[7,  2000] loss: 1.562
[7,  4000] loss: 1.499
[8,  2000] loss: 1.496
[8,  4000] loss: 1.495
[9,  2000] loss: 1.493
[9,  4000] loss: 1.493
[10,  2000] loss: 1.491
[10,  4000] loss: 1.492
Finished Training


In [0]:
correct = 0
total = 0
with torch.no_grad():
    for data in s_test_loader:
        images, labels = data
        images = images.cuda()
        labels = labels.cuda()
        outputs = model2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

  input = module(input)


Accuracy of the network on the 10000 test images: 91 %
