In [113]:
import numpy as np
import os

In [114]:
from PIL import Image
path = 'images/train/'



In [115]:
expressions = [name for name in os.listdir('images/train') if os.path.isdir('images/train')]


In [116]:
train_set=[]
labels=[]
labels_plain=[]
k=0
for idx, expression in enumerate(expressions):
    img_path='images/train/'+expression+'/'
    for i in os.listdir(img_path):
        train_set.append(np.asarray(Image.open(img_path+i)))
        labels.append(np.zeros(len(expressions)))
        labels_plain.append(idx)
        labels[k][idx]=1
        k=k+1
train_set=np.array(train_set)       
labels=np.array(labels)  
labels_plain=np.array(labels_plain)  

In [117]:
print(labels.shape[1])

7


In [118]:
print(train_set.shape)


(28821, 48, 48)


In [119]:
train_set=train_set.reshape(train_set.shape[0],train_set.shape[2]*train_set.shape[1])
train_set=train_set/255.


In [152]:
def define_layers(set,labels):
    layers=[set.shape[1],50,40,30,labels.shape[1]] 
    return layers

In [153]:
layers = define_layers(train_set,labels)
params={} # parameters

In [176]:
def initialize(layers):
    L=len(layers)-1
    for l in range(1,L+1):
        params['W'+str(l)]=np.random.randn(layers[l],layers[l-1])*np.sqrt(2/layers[l-1]) #using he initialization
        params['b'+str(l)]=np.zeros((layers[l],1))*np.sqrt(2/layers[l-1])
    return params

In [177]:
def relu(x):
    return (np.maximum(0,x))
    

In [178]:
def softmax(x):
    x = np.exp(x)/np.sum(np.exp(x),axis=0,keepdims=True)

    return x

In [179]:
def adam(parameters):
    L = len(parameters) // 2 # number of layers in the neural networks
    v = {}
    s = {}
    
    for l in range(L):
        v['dW' + str(l+1)] = np.zeros((parameters['W' + str(l+1)].shape[0], parameters['W' + str(l+1)].shape[1]))
        v['db' + str(l+1)] = np.zeros((parameters['b' + str(l+1)].shape[0], parameters['b' + str(l+1)].shape[1]))
        s['dW' + str(l+1)] = np.zeros((parameters['W' + str(l+1)].shape[0], parameters['W' + str(l+1)].shape[1]))
        s['db' + str(l+1)] = np.zeros((parameters['b' + str(l+1)].shape[0], parameters['b' + str(l+1)].shape[1]))
    return v,s
    

In [180]:
def forward_propagation(params,X):

    z1 = np.dot(params['W1'],np.transpose(X))+params['b1']
    a1 = relu(z1)
    z2 = np.dot(params['W2'],a1)+params['b2']
    a2 = relu(z2)
    z3 = np.dot(params['W3'],a2)+params['b3']
    a3 = relu(z3)
    z4 = np.dot(params['W4'],a3)+params['b4']
    a4 = softmax(z4)
    
    cache = (z1, a1,z2, a2, z3, a3, z4, a4)
    return cache ,a4

In [181]:
def compute_cost(Y,a,lambd, params):
    m=Y.shape[1]
    L2_regularization_cost = lambd/(2*m)*(np.sum(np.square(params['W1']))+np.sum(np.square(params['W2']))+np.sum(np.square(params['W3']))+np.sum(np.square(params['W4'])))
    a = np.clip(a, 1e-15, 1 - 1e-15) #to avoid log(0) returning infinite cost
    softmax_cost = -np.sum(Y*np.log(np.transpose(a)))/m 

    cost = softmax_cost + L2_regularization_cost
    return cost

In [182]:
def relu_derivative(x):
    for i in np.nditer(x):
        if i>0: i=1 
        else: i = 0
    return i

In [183]:
def back_propagation(X,Y,cache,params,lambd):
    m=X.shape[1]
    (z1, a1, z2, a2, z3, a3, z4, a4)=cache
    dz4 = a4-np.transpose(Y)
    dW4=np.dot(dz4,np.transpose(a3))/m + (lambd/m)*params['W4']
    db4 = np.sum(dz4,axis=1,keepdims=True)/m
    
    dz3= np.dot(np.transpose(params['W4']),dz4)*relu_derivative(dz4)
    dW3=np.dot(dz3,np.transpose(a2))/m + (lambd/m)*params['W3']
    db3 = np.sum(dz3,axis=1,keepdims=True)/m
    
    dz2= np.dot(np.transpose(params['W3']),dz3)*relu_derivative(dz3)
    dW2=np.dot(dz2,np.transpose(a1))/m + + (lambd/m)*params['W2']
    db2 = np.sum(dz2,axis=1,keepdims=True)/m
    
    dz1= np.dot(np.transpose(params['W2']),dz2)*relu_derivative(dz2)
    dW1=np.dot(dz1,X)/m + + (lambd/m)*params['W1']
    db1 = np.sum(dz1,axis=1,keepdims=True)/m
    
    grads = {
        'dW4': dW4 , 'db4':db4,
        'dW3': dW3 , 'db3':db3,
        'dW2': dW2 , 'db2':db2,
        'dW1': dW1 , 'db1':db1,
    }
    return grads

In [184]:
def update_parameters(params,grads,lr,v,s,t,beta1 = 0.9, beta2 = 0.999,  epsilon = 1e-8):
    L=len(params)//2
    
    v_corrected = {}
    s_corrected = {}    
    for l in range(L):
        
        v['dW' + str(l+1)] = beta1*v['dW' + str(l+1)] + (1 - beta1)*grads['dW' + str(l+1)]
        v['db' + str(l+1)] = beta1*v['db' + str(l+1)] + (1 - beta1)*grads['db' + str(l+1)]

        v_corrected['dW' + str(l+1)] = v['dW' + str(l+1)]/(1 - beta1**t)
        v_corrected['db' + str(l+1)] = v['db' + str(l+1)]/(1 - beta1**t)

        s['dW' + str(l+1)] = beta2*s['dW' + str(l+1)] + (1 - beta2)*np.square(grads['dW' + str(l+1)])
        s['db' + str(l+1)] = beta2*s['db' + str(l+1)] + (1 - beta2)*np.square(grads['db' + str(l+1)])

        s_corrected['dW' + str(l+1)] = s['dW' + str(l+1)]/(1 - beta2**t)
        s_corrected['db' + str(l+1)] = s['db' + str(l+1)]/(1 - beta2**t)

        params['W' + str(l+1)] = params['W' + str(l+1)] - lr*v_corrected['dW' + str(l+1)]/(np.sqrt(s_corrected['dW' + str(l+1)])+epsilon)
        params['b' + str(l+1)] = params['b' + str(l+1)] - lr*v_corrected['db' + str(l+1)]/(np.sqrt(s_corrected['db' + str(l+1)])+epsilon)
    
    return params, v,s
    
    


In [192]:
def model(X, Y, params,layers, lr, iterations = 500, lambd=0.6):
    params=initialize(layers)
    v,s = adam(params)
   

    for i in range(iterations):
        cache, a4=forward_propagation(params,X)
        cost = compute_cost(a4,Y,lambd,params)
        grads = back_propagation(X,Y,cache,params,lambd)
        params,v,s = update_parameters(params,grads,lr,v,s,t=2)
        if i%10==0:
            print('Cost after iteration %i: %f'%(i,cost))
    return params
    

In [186]:
validation_set=[]
labels_validation=[]
k=0
for idx, expression in enumerate(expressions):
    img_path='images/validation/'+expression+'/'
    for i in os.listdir(img_path):
        validation_set.append(np.asarray(Image.open(img_path+i)))
        labels_validation.append(np.zeros(len(expressions)))
        labels_validation[k][idx]=1
        k=k+1
validation_set=np.array(validation_set)       
labels_validation=np.array(labels_validation) 

In [187]:
validation_set=validation_set.reshape(validation_set.shape[0],validation_set.shape[2]*validation_set.shape[1])
validation_set=validation_set/255.

In [188]:
print(validation_set.shape)

(7066, 2304)


In [194]:
params = model(train_set,labels,params,layers=layers,lr=0.02)

Cost after iteration 0: 29.670789
Cost after iteration 10: 29.132905
Cost after iteration 20: 28.931836
Cost after iteration 30: 28.796598
Cost after iteration 40: 28.732376
Cost after iteration 50: 28.692312
Cost after iteration 60: 28.665573
Cost after iteration 70: 28.649507
Cost after iteration 80: 28.636502
Cost after iteration 90: 28.623877
Cost after iteration 100: 28.614059
Cost after iteration 110: 28.607525
Cost after iteration 120: 28.602376
Cost after iteration 130: 28.597554
Cost after iteration 140: 28.593226
Cost after iteration 150: 28.589592
Cost after iteration 160: 28.586529
Cost after iteration 170: 28.583855
Cost after iteration 180: 28.581487
Cost after iteration 190: 28.579394
Cost after iteration 200: 28.577543
Cost after iteration 210: 28.575898
Cost after iteration 220: 28.574432
Cost after iteration 230: 28.573123
Cost after iteration 240: 28.571951
Cost after iteration 250: 28.570898
Cost after iteration 260: 28.569951
Cost after iteration 270: 28.569098
Cos

In [190]:
def predict(params,X):
    prediction=[]
    cache, a4=forward_propagation(params,X)
    prediction=np.argmax(a4,axis=0)
    return prediction


In [195]:
preditcions = predict(params,train_set)
accuracy = np.sum(labels_plain==preditcions)/len(labels_plain)
print(accuracy)
#0.2274

0.2485687519517019
