# Emotion Detection using Open CV, CNN and VGG-16 Transfer Learning

In [64]:
import cv2 

import pandas as pd
import numpy as np
import os

from keras.utils import  np_utils

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

from keras.models import Sequential
from keras.layers import Dense,Flatten,Dropout
from keras.layers import Conv2D,MaxPooling2D


from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

In [65]:
cam = cv2.VideoCapture(0)

# Create emotion dataset

In [3]:
def create_emotions_dataset(category, emotion_code):
    
    currFrame=0
    while(True):

        if currFrame>300:
            break
            
        #ret holds boolean value if read correctly or not
        ret, frame= cam.read()
        cv2.imwrite('data1/'+category+'/'+emotion_code+ str(currFrame) + '.jpg', frame)

        currFrame= currFrame +1

        #waitkey 1 will wait for keyPress for just 1 millisecond and it will continue to refresh and read frame
        if cv2.waitKey(1) == ord('q'):
            break

    cam.release()
    cv2.destroyAllWindows()
    

In [4]:
#create_emotions_dataset('Neutral','ne')
#create_emotions_dataset('Angry','an')
#create_emotions_dataset('Sad','sa')
create_emotions_dataset('Smile','sm')

In [5]:
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

In [66]:
categories= os.listdir('data1')

labels=[i for i in range(len(categories))]
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
label_dict=dict(zip(categories,labels)) #empty dictionary

print(categories)
print(label_dict)

['Angry', 'Neutral', 'Sad', 'Smile']
{'Angry': 0, 'Neutral': 1, 'Sad': 2, 'Smile': 3}


In [67]:
data=[]
target=[]
img_size=100

for category in categories:
    images= os.listdir('data1/'+category)
    print(category,"has ",len(images)," images")

    for image in images:
        multi= 0
        img_path = 'data1/'+category+'/'+ image
        img= cv2.imread(img_path)

        #it will only be counted as a valid face if the number of responses for this face is higher than minNeighbors
        faces = face_cascade.detectMultiScale(img, minNeighbors=10, minSize=(64,64))

        try:
            for (x,y,w,h) in faces:
                multi = multi +1
                sub_face = img[y:y + h, x:x + w]
                cv2.imwrite('dataset1/'+category+'_face/'+ image, sub_face)

                gray=cv2.cvtColor(sub_face,cv2.COLOR_BGR2GRAY)           
                resized=cv2.resize(gray,(img_size,img_size))
                data.append(resized)
                target.append(label_dict[category])
            
            if multi !=1:
                print(str(multi) + " "+ image)
                
        except Exception as e:
            print('Exception:',e)

Angry has  300  images
Neutral has  300  images
Sad has  300  images
Smile has  300  images


In [68]:
len(data)

1200

In [69]:
pd.Series(target).value_counts()

3    300
2    300
1    300
0    300
dtype: int64

In [70]:
data= np.array(data)
data.shape

(1200, 100, 100)

In [71]:
#Lets see the first image
data[0]

array([[ 84,  76,  74, ...,  69,  74, 112],
       [ 75,  70,  70, ...,  69,  69, 102],
       [ 67,  66,  66, ...,  66,  74, 103],
       ...,
       [ 72,  66,  60, ...,  42,  44,  44],
       [ 73,  62,  58, ...,  44,  45,  45],
       [ 66,  61,  57, ...,  42,  46,  47]], dtype=uint8)

In [72]:
#Normalize the entrie data
data= data/255.0
data[0]

array([[0.32941176, 0.29803922, 0.29019608, ..., 0.27058824, 0.29019608,
        0.43921569],
       [0.29411765, 0.2745098 , 0.2745098 , ..., 0.27058824, 0.27058824,
        0.4       ],
       [0.2627451 , 0.25882353, 0.25882353, ..., 0.25882353, 0.29019608,
        0.40392157],
       ...,
       [0.28235294, 0.25882353, 0.23529412, ..., 0.16470588, 0.17254902,
        0.17254902],
       [0.28627451, 0.24313725, 0.22745098, ..., 0.17254902, 0.17647059,
        0.17647059],
       [0.25882353, 0.23921569, 0.22352941, ..., 0.16470588, 0.18039216,
        0.18431373]])

In [73]:
#prepare the data shape for the CNN model
data = np.reshape(data, (data.shape[0], img_size, img_size, 1))
data.shape

(1200, 100, 100, 1)

In [74]:
target= np.array(target)

In [75]:
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.2, random_state=1)
pd.Series(y_train).value_counts()

2    248
1    239
3    238
0    235
dtype: int64

In [76]:
y_train = np_utils.to_categorical(y_train)
#y_test = np_utils.to_categorical(y_test)

In [94]:
model= Sequential()
kernel_size=(3,3)

model.add(Conv2D(filters= 32, kernel_size= kernel_size ,activation='relu', input_shape= (img_size,img_size,1)))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))

model.add(Conv2D(filters= 64, kernel_size= kernel_size ,activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))

model.add(Conv2D(filters= 64, kernel_size= kernel_size ,activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))

model.add(Conv2D(filters= 32, kernel_size= kernel_size ,activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy', metrics =['accuracy'])

In [78]:
model.fit(x_train,y_train, epochs=15, validation_split=0.2)

Train on 768 samples, validate on 192 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.callbacks.History at 0x1c0d08e89b0>

In [79]:
y_pred= model.predict(x_test)

y_classes = y_pred.argmax(axis=-1)

y_classes

array([2, 2, 3, 0, 3, 0, 2, 1, 1, 2, 0, 0, 1, 0, 2, 1, 3, 0, 1, 3, 3, 1,
       3, 1, 0, 2, 1, 0, 3, 1, 3, 3, 3, 2, 0, 0, 1, 3, 3, 0, 1, 1, 0, 0,
       2, 1, 2, 3, 2, 1, 3, 1, 2, 0, 1, 2, 1, 1, 1, 3, 1, 0, 0, 2, 0, 1,
       3, 3, 3, 3, 1, 2, 1, 3, 0, 3, 0, 0, 2, 1, 3, 1, 0, 1, 1, 0, 2, 2,
       2, 1, 3, 1, 3, 2, 0, 3, 0, 2, 2, 1, 3, 2, 3, 1, 1, 0, 1, 1, 0, 3,
       3, 2, 3, 0, 3, 0, 3, 0, 1, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 2, 2, 3,
       1, 0, 0, 2, 0, 3, 0, 0, 2, 1, 3, 3, 0, 3, 3, 2, 1, 3, 2, 1, 2, 1,
       3, 2, 0, 1, 0, 2, 2, 0, 3, 1, 0, 3, 1, 0, 0, 2, 3, 2, 1, 2, 3, 3,
       2, 2, 0, 1, 2, 3, 0, 3, 1, 1, 1, 2, 2, 3, 3, 3, 1, 0, 1, 2, 3, 0,
       3, 2, 0, 2, 3, 0, 2, 1, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0, 3, 1, 0, 1,
       3, 2, 0, 2, 0, 1, 1, 3, 3, 1, 3, 2, 3, 3, 0, 0, 2, 3, 2, 1],
      dtype=int64)

In [80]:
y_test

array([2, 2, 3, 0, 3, 0, 2, 1, 1, 2, 0, 0, 1, 0, 2, 1, 3, 0, 1, 3, 3, 1,
       3, 1, 0, 2, 1, 0, 3, 1, 3, 3, 3, 2, 0, 0, 1, 3, 3, 0, 1, 1, 0, 0,
       2, 1, 2, 3, 2, 1, 3, 1, 2, 0, 1, 2, 1, 1, 1, 3, 1, 0, 0, 2, 0, 1,
       3, 3, 3, 3, 1, 2, 1, 3, 0, 3, 0, 0, 2, 1, 3, 1, 0, 1, 1, 0, 2, 2,
       2, 1, 3, 1, 3, 2, 0, 3, 0, 2, 2, 1, 3, 2, 3, 1, 1, 0, 1, 1, 0, 3,
       3, 2, 3, 0, 3, 0, 3, 0, 1, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 2, 2, 3,
       1, 0, 0, 2, 0, 3, 0, 0, 2, 1, 3, 3, 0, 3, 3, 2, 1, 3, 2, 1, 2, 1,
       3, 2, 0, 1, 0, 2, 2, 0, 3, 1, 0, 3, 1, 0, 0, 2, 3, 2, 1, 2, 3, 3,
       2, 2, 0, 1, 2, 3, 0, 3, 1, 1, 1, 2, 2, 3, 3, 3, 1, 0, 1, 2, 3, 0,
       3, 2, 0, 2, 3, 0, 2, 1, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0, 3, 1, 0, 1,
       3, 2, 0, 2, 0, 1, 1, 3, 3, 1, 3, 2, 3, 3, 0, 0, 2, 3, 2, 1])

In [81]:
accuracy_score(y_test, y_classes)

1.0

In [82]:
y_test_m = np_utils.to_categorical(y_test)
score= model.evaluate(x_test,y_test_m)



In [83]:
score

[0.00030502786103170365, 1.0]

In [100]:
cap = cv2.VideoCapture(0)

dict_opp={0:'Angry',1: 'Neutral',2:'Sad',3:'Smile'}

while(cap.isOpened()):
    ret, frame = cap.read()
    
    faces = face_cascade.detectMultiScale(frame,minNeighbors=10, minSize=(64,64))    
    try:
            for (x,y,w,h) in faces:
                sub_face = frame[y:y + h, x:x + w]
                gray=cv2.cvtColor(sub_face,cv2.COLOR_BGR2GRAY)           

                resized=cv2.resize(gray,(img_size,img_size))
                normalized=resized/255.0
                reshaped=np.reshape(normalized,(1,img_size,img_size,1))

                result = model.predict(reshaped)
                y_class = np.argmax(result,axis=1)[0]
                    
                cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
                cv2.putText(frame, dict_opp[y_class], (x, y-10),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2)

                
    except Exception as e:
            print('Exception:',e)

    cv2.imshow('frame',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [87]:
model.save('cnn_model.h5')  # creates a HDF5 file 'my_model.h5'

# Image Data Generator

In [95]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)


training_set = train_datagen.flow_from_directory('dataset1',
                                                 target_size = (img_size, img_size),
                                                 batch_size = 32,
                                                 color_mode='grayscale',
                                                 class_mode = 'categorical')


# fit the model
model.fit_generator(
  training_set,
  epochs=10)

Found 1204 images belonging to 4 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1c0c6202828>

In [101]:
cap = cv2.VideoCapture(0)

dict_opp={0:'Angry',1: 'Neutral',2:'Sad',3:'Smile'}

while(cap.isOpened()):
    ret, frame = cap.read()
    
    faces = face_cascade.detectMultiScale(frame,minNeighbors=10, minSize=(64,64))    
    try:
            for (x,y,w,h) in faces:
                sub_face = frame[y:y + h, x:x + w]
                gray=cv2.cvtColor(sub_face,cv2.COLOR_BGR2GRAY)           

                resized=cv2.resize(gray,(img_size,img_size))
                normalized=resized/255.0
                reshaped=np.reshape(normalized,(1,img_size,img_size,1))

                result = model.predict(reshaped)
                y_class = np.argmax(result,axis=1)[0]
                    
                cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
                cv2.putText(frame, dict_opp[y_class], (x, y-10),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2)

                
    except Exception as e:
            print('Exception:',e)

    cv2.imshow('frame',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# VGG

In [109]:
# do not include top layer that was built for imagenet classification
vgg = VGG16(input_shape=[224,224,3] , weights='imagenet', include_top=False)

# don't train existing weights
for layer in vgg.layers:
    layer.trainable = False

In [110]:
vgg.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [111]:
x = Flatten()(vgg.output)
#x = Dense(128, activation='relu')(x)
prediction = Dense(len(categories), activation='softmax')(x)

# create a model object
vgg_m = Model(inputs=vgg.input, outputs=prediction)

In [112]:
vgg_m.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [113]:
vgg_m.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [114]:
img_size=224
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)


training_set = train_datagen.flow_from_directory('dataset1',
                                                 target_size = (img_size, img_size),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')


# fit the model
vgg_m.fit_generator(
  training_set,
  epochs=5)

Found 1204 images belonging to 4 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x1c1012110f0>

Spotify Songs

In [4]:
songs= pd.read_csv('Spotify_songs.csv',index_col=0)
songs.head()

Unnamed: 0,name,length,danceability,acousticness,energy,instrumentalness,liveness,valence,loudness,speechiness,tempo,result
54,The Odyssey,223424,0.252,0.694,0.291,0.951,0.105,0.146,-16.362,0.0391,78.662,calm
21,Feel It Still,162092,0.799,0.0427,0.797,7.5e-05,0.0907,0.758,-5.157,0.0624,79.088,happy
16,Another One of Those Days,246041,0.603,0.449,0.546,0.0,0.0851,0.375,-9.365,0.0267,134.966,sad
67,"10,000 Hours (with Justin Bieber)",167693,0.654,0.153,0.63,0.0,0.111,0.43,-4.644,0.0259,89.991,happy
31,Million Reasons,205280,0.666,0.494,0.423,0.0,0.106,0.154,-8.012,0.043,129.89,sad


In [5]:
songs[songs['result']=='happy'][['name']].iloc[0][0]

'Feel It Still'

In [115]:
cap = cv2.VideoCapture(0)

dict_opp={0:'Angry',1: 'Neutral',2:'Sad',3:'Smile'}

while(cap.isOpened()):
    ret, frame = cap.read()
    
    faces = face_cascade.detectMultiScale(frame,minNeighbors=10, minSize=(64,64))    
    try:
            for (x,y,w,h) in faces:
                sub_face = frame[y:y + h, x:x + w]

                resized=cv2.resize(sub_face,(img_size,img_size))
                normalized=resized/255.0
                reshaped=np.reshape(normalized,(1,img_size,img_size,3))

                result = vgg_m.predict(reshaped)
                y_class = np.argmax(result,axis=1)[0]
                    
                cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
                cv2.putText(frame, dict_opp[y_class], (x, y-10),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2)

                # for demo purpose first song is taken , in actual random songs would be selected 
                # after the song is played for its duration(minutes)
                if(dict_opp[y_class]=='Smile'):
                    display_txt=songs[songs['result']=='happy'][['name']].iloc[0][0]
                elif(dict_opp[y_class]=='Sad'):
                    display_txt=songs[songs['result']=='sad'][['name']].iloc[0][0]   
                elif(dict_opp[y_class]=='Neutral'):
                    display_txt=songs[songs['result']=='neutral'][['name']].iloc[0][0]
                elif(dict_opp[y_class]=='Angry'):
                    display_txt=songs[songs['result']=='calm'][['name']].iloc[0][0]
                    
                cv2.putText(frame, display_txt + ' will be played', (x-100, y+200),cv2.FONT_HERSHEY_SIMPLEX,0.5,(255,255,255),2)
                
    except Exception as e:
            print('Exception:',e)

    cv2.imshow('frame',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()