In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision

!pip install --no-cache-dir -I pillow
!wget https://www.dropbox.com/s/zxedrp2ar549v84/FDDB.tar.gz?dl=0

In [0]:
!ls

In [0]:
! tar -xf FDDB.tar.gz\?dl\=0

In [0]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as dsets

from PIL import Image
from sklearn.model_selection import train_test_split
import pickle
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
# Change this path to your FDDB Dataset path
FDDB_path = 'FDDB'

annotation_folder = FDDB_path+'/FDDB-folds/'
data_folder = FDDB_path+'/originalPics/'

X_train = []
Y_train = []
img_size = 100
max_count = 50

def get_coords(line):
    line = line.replace('\n','')   
    line = line.split(' ')
    line.remove('')
    line = [float(i) for i in line]    
    [major_axis_radius, minor_axis_radius, angle, center_x, center_y, _] = line

    # The FDDB dataset gives the face annotation in the form of an ellipse.
    # We take it's end points and consider it as a rectangle 
    x = int(center_x - minor_axis_radius)
    y = int(center_y - major_axis_radius)
    w = int(2*minor_axis_radius)
    h = int(2*major_axis_radius)
    
    return [x, y, w, h]


for index in range(1, 11):
    print("index = ", index)
    file_name = 'FDDB-fold-'+str(index).zfill(2)+'-ellipseList.txt'
    file_path = FDDB_path+'/FDDB-folds/'+file_name
    fp = open(file_path, 'r')
    lines = fp.readlines()
    count = 0
    for i in range (len(lines)):
        line = lines[i].replace('\n','')
        
        # Images which have only one face
        if(line == '1'):
            image_path = lines[i-1].replace('\n','')+'.jpg'
            image_path = data_folder+image_path
            line_coords = lines[i+1]
            [x,y,w,h] = get_coords(line_coords)
            img = Image.open(image_path)
            [W, H] = img.size
            
            img_face = img.crop((x,y,x+w,y+h))

            # crop face image
            # crop random area from original image not contatining any face              
            img_nonface = img.crop((x+w, y+h, W, H))
            
            img_face = img_face.resize((img_size,img_size))
            img_nonface = img_nonface.resize((img_size,img_size))
            
            img_face = np.array(img_face).astype('float32')
            img_nonface = np.array(img_nonface).astype('float32')
            
            # Check is the image is RGB
            if( (img_face.shape == (img_size,img_size, 3)) and ((img_nonface.shape == (img_size,img_size, 3))) and (count != max_count) ):
                X_train.append(img_face)
                Y_train.append(0)
                X_train.append(img_nonface) 
                Y_train.append(1)
                count += 1
               
               
X_train = np.array(X_train)
Y_train = np.array(Y_train)

#X_train = X_train[:,:,:,np.newaxis]


X_train, X_CV, Y_train, Y_CV = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)  



print(X_train.shape, Y_train.shape)
print(X_CV.shape, Y_CV.shape)


index =  1
index =  2
index =  3
index =  4
index =  5
index =  6
index =  7
index =  8
index =  9
index =  10
(800, 100, 100, 3) (800,)
(200, 100, 100, 3) (200,)


In [3]:
X_train = np.moveaxis(X_train, 3, 1)
X_CV = np.moveaxis(X_CV, 3, 1)
print(X_train.shape, Y_train.shape)
print(X_CV.shape, Y_CV.shape)

(800, 3, 100, 100) (800,)
(200, 3, 100, 100) (200,)


In [0]:
class MyDataset(Dataset):
    def __init__(self, X, Y, transform=None):
        self.X = X
        self.Y = Y 
        self.transform = transform
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, index):
        return (self.X[index], self.Y[index])

In [0]:
train_dataset = MyDataset(X_train, Y_train)
test_dataset = MyDataset(X_CV, Y_CV)

In [0]:
batch_size = 64
epochs = 10

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 
                                           batch_size = batch_size, 
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset, 
                                          batch_size = batch_size, 
                                          shuffle = False)

In [0]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
       
        
        self.fc1 = nn.Linear(32 * 25 * 25, 64)
        self.act1 = nn.ReLU()
        
        self.fc2 = nn.Linear(64, 2)
        
        
    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc1(out)
        out = self.act1(out)
        out = self.fc2(out)
        
        return out

In [0]:
model = CNNModel()

In [0]:
criterion = nn.CrossEntropyLoss()

In [0]:
learning_rate = 0.0001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [11]:
print(model)

CNNModel(
  (cnn1): Conv2d (3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (cnn2): Conv2d (16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear(in_features=20000, out_features=64)
  (act1): ReLU()
  (fc2): Linear(in_features=64, out_features=2)
)


In [12]:
print(len(list(model.parameters())))

8


In [13]:
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

torch.Size([16, 3, 5, 5])
torch.Size([16])
torch.Size([32, 16, 5, 5])
torch.Size([32])
torch.Size([64, 20000])
torch.Size([64])
torch.Size([2, 64])
torch.Size([2])


In [15]:
for epoch in range(epochs):
    epoch += 1
    for i,(images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()    
    total = 0
    correct = 0
    
    for images, labels in test_loader:
        images = Variable(images)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (labels == predicted).sum()
    accuracy = 100.0 * correct / total
    print('Epoch: {} Loss: {} Accuracy: {}'.format(epoch, loss.data[0], accuracy))
    

Epoch: 1 Loss: 0.18475979566574097 Accuracy: 93.0
Epoch: 2 Loss: 0.3373740315437317 Accuracy: 94.0
Epoch: 3 Loss: 0.203416645526886 Accuracy: 90.5
Epoch: 4 Loss: 0.16922907531261444 Accuracy: 93.5
Epoch: 5 Loss: 0.16625669598579407 Accuracy: 94.0
Epoch: 6 Loss: 0.1356663852930069 Accuracy: 93.0
Epoch: 7 Loss: 0.17945149540901184 Accuracy: 92.5
Epoch: 8 Loss: 0.11183133721351624 Accuracy: 91.0
Epoch: 9 Loss: 0.4622647166252136 Accuracy: 94.0
Epoch: 10 Loss: 0.205274298787117 Accuracy: 93.5
