In [None]:
import torch
import torchvision
from torchvision import transforms, datasets, models
from torch.autograd import Variable
import numpy as np
import io
import pickle
import os
import sys
import glob
import shutil
from skimage import io, transform
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torchvision import transforms, models
from torch.autograd import Variable

In [2]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

We load a VGG pretrained model and only select the first 23 layers.

In [4]:
vgg16 = models.vgg16(pretrained=True)
newmodel = torch.nn.Sequential(*(list(vgg16.features[:24])))

We set up the dataloaders and their augmentations here

In [9]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    #transforms.RandomVerticalFlip(),
    transforms.RandomRotation((-20,20)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ], #default
    std  = [ 0.229, 0.224, 0.225 ]) # default
    #transforms.Normalize(mean = [ 0.71, 0.67, 0.59 ],
    #std  = [ 0.064, 0.059, 0.05]),
    
    ])

transform2 = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ], #default
    std  = [ 0.229, 0.224, 0.225 ]) # default
    #transforms.Normalize(mean = [ 0.71, 0.67, 0.59 ],
    #std  = [ 0.064, 0.059, 0.05]),
    
    ])


# transform = transforms.Compose([
#     transforms.Resize(224),
#     transforms.ToTensor(),
#     transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ], #default
#     std  = [ 0.229, 0.224, 0.225 ]), # default
#     #transforms.Normalize(mean = [ 0.71, 0.67, 0.59 ],
#     #std  = [ 0.064, 0.059, 0.05]),
    
#     ])

trainData = datasets.ImageFolder('../data/processed/classifier_training', transform)
trainLoader = torch.utils.data.DataLoader(dataset=trainData, batch_size=1, shuffle=True)

Here we extract vector encodings for drawings and photos from the VGG model.

In [10]:
drawing_encodings = []
photo_encodings = []
for images, labels in trainLoader:
    images = Variable(images)
    labels = Variable(labels)
    encoding = newmodel(images)
    encoding = encoding.detach().numpy().flatten()
    if labels == 0:
        drawing_encodings.append(encoding)
    else:
        photo_encodings.append(encoding)
        
photos = np.asarray(photo_encodings)
drawings = np.asarray(drawing_encodings)

Next we use these encodings to train a Linear SVC classifier

In [11]:
clf = svm.LinearSVC(random_state=0)
X = np.concatenate((drawings, photos),axis=0)
y = np.concatenate((np.zeros(len(drawing_encodings)), np.ones(len(photo_encodings))),axis=0)

# TODO SAVE CLASSIFIER

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)
clf.fit(X_train, y_train)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
     verbose=0)

In [13]:
outscore = cross_val_score(clf,X,y, cv=10)
print('our accuracy is estimated at: ' + str(outscore)) #layer 24

our accuracy is estimated at: [0.96078431 0.9        0.92       0.84       0.94       0.84
 0.92       0.92       0.87755102 0.93877551]


In [17]:
np.round(np.mean(outscore), 2)
np.round(np.std(outscore), 2)

0.04

In [16]:
filename = '../data/processed/finalized_photo_illustration_model.sav'
pickle.dump(clf, open(filename, 'wb'))

# Predicting type of image
Here we predict which type of image (photo or drawing) we are dealing with. <br>
The images are then placed in folders belonging to this type. 

In [14]:
predictData = ImageFolderWithPaths('../data/processed/gender_/', transform2)
predictLoader = torch.utils.data.DataLoader(dataset=predictData, batch_size=1, shuffle=False)

In [16]:
# check if dir exists
for dir_ in ['test', 'train', 'validation']:
    os.mkdir('../data/processed/gender_/' + dir_ + '/f_photo')
    os.mkdir('../data/processed/gender_/' + dir_ + '/m_photo')
    os.mkdir('../data/processed/gender_/' + dir_ + '/f_drawing')
    os.mkdir('../data/processed/gender_/' + dir_ + '/m_drawing')

In [17]:
for images, labels, paths in predictLoader:
    path = ' '.join(paths)
    path_ = path.split('/')
    images = Variable(images)
    encoding = newmodel(images)
    encoding = encoding.detach().numpy().flatten()
    y_predict = clf.predict((np.asarray(encoding).reshape(1,-1)))
    if y_predict == 0:
        newpath ='../data/processed/gender_/' + path_[4] + '/' + path_[5] + '_drawing/' + path_[6]
        shutil.copy(path, newpath)
    else:
        newpath = '../data/processed/gender_/' + path_[4] + '/' + path_[5] + '_photo/' + path_[6]
        shutil.copy(path, newpath)
        

In [20]:
img_path = '../data/processed/KB_faces/1978/KBNRC01-000026215-mpeg21-a0080.jpg'

preprocess_img = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
    ])

vgg16 = models.vgg16(pretrained=True)
newmodel = torch.nn.Sequential(*(list(vgg16.features[:24])))

filename = "finalized_photo_illustration_model.sav.sav"
clf = pickle.load(open(filename, 'rb'))

In [21]:
img = Image.open(img_path)
img_tensor = preprocess_img(img)
img_tensor.unsqueeze_(0)
images = Variable(img_tensor)
encoding = newmodel(images)
encoding = encoding.detach().numpy().flatten()
prediction = clf.predict((np.asarray(encoding).reshape(1,-1)))

In [22]:
prediction

array([0.])