In [2]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from zipfile import ZipFile
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
import collections
from google.colab.patches import cv2_imshow
import glob
from skimage.transform import resize


Extracting the train and test datasets

In [3]:
TrainFile = "/content/data.zip"

with ZipFile(TrainFile, 'r') as zip:
	zip.printdir()
	print('Extracting all the files now...')
	zip.extractall()
	print('Done!')

File Name                                             Modified             Size
data/farida/                                   2023-05-04 16:55:14            0
data/farida/10.jpeg                            2023-04-29 17:32:36       154769
data/farida/8.jpeg                             2023-04-29 17:32:44       191340
data/farida/WhatsApp Image 2023-05-04 at 3.35.40 PM.jpeg 2023-05-04 15:38:24        67007
data/farida/WhatsApp Image 2023-05-04 at 3.41.09 PM.jpeg 2023-05-04 15:41:30       243209
data/jumana/                                   2023-05-04 15:38:06            0
data/jumana/5.jpeg                             2023-04-29 17:34:02        91490
data/jumana/6.jpeg                             2023-04-29 17:34:02        72694
data/jumana/7.jpeg                             2023-04-29 17:34:02        82340
data/jumana/WhatsApp Image 2022-02-18 at 5.57.32 PM.jpeg 2022-02-18 19:03:42       160873
data/jumana/WhatsApp Image 2023-01-26 at 3.10.28 AM.jpeg 2023-01-26 04:10:48       145507


Haar Cascade function: takes an image finds the faces and returns them cropped 


In [4]:
def haarCascade(img):
 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
 eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 faces = face_cascade.detectMultiScale(gray, 1.3, 5,minSize=(30, 30))
 
 for (x,y,w,h) in faces:
    img = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
    roi_color = img[y:y+h, x:x+w]
    newimg=np.array(roi_color)
    return newimg


In [5]:
path = "/content/data/*/*"
arr=[]
target_face_size=(388,388)

for file in glob.glob(path):
  if('jumana' in file):
    mytuple=(resize(haarCascade(cv2.imread(file)), target_face_size),'jumana')
    arr.append(mytuple)
  elif('farida' in file):
    mytuple=(resize(haarCascade(cv2.imread(file)), target_face_size),'farida')
    arr.append(mytuple)
  elif('maher'in file):
    mytuple=(resize(haarCascade(cv2.imread(file)),target_face_size),'maher')
    arr.append(mytuple)
  elif('khaled'in file):
    mytuple=(resize(haarCascade(cv2.imread(file)),target_face_size),'khaled')
    arr.append(mytuple)
  elif('kroush'in file):
    mytuple=(resize(haarCascade(cv2.imread(file)),target_face_size),'kroush')
    arr.append(mytuple)

   
df = pd.DataFrame(arr,columns=['Image', 'label'])
df

Unnamed: 0,Image,label
0,"[[[0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0,...",farida
1,"[[[1.4157049750033398e-06, 0.9999993882756281,...",farida
2,"[[[2.014554801044556e-06, 0.9999926003852498, ...",farida
3,"[[[0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0,...",farida
4,"[[[0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0,...",jumana
5,"[[[0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0,...",jumana
6,"[[[9.404875252516209e-25, 1.0, 1.4587153452882...",jumana
7,"[[[8.455598692295072e-24, 0.9999999999999998, ...",jumana
8,"[[[3.645986932191094e-12, 0.9999999999926645, ...",jumana
9,"[[[0.0, 0.9999999999999998, 0.0], [0.0, 0.9999...",kroush


In [6]:
for i in range(18):
 print(df.Image[i].shape)

(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)
(388, 388, 3)


orientations:This parameter determines the number of histogram channels in the HOG feature vector.

pixels_per_cell: The size of the cell over which to calculate the histograms and determines the size of the spatial binning

cells_per_block: The number of cells to include in each block of the HOG feature vector.

block_norm:The default is 'L2-Hys' which applies L2 normalization followed by a square-root operation.

In [10]:
import numpy as np
from skimage import feature
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

images = df['Image']
labels = df['label']


features = []
for i in range(len(images)):
    # Extracting the HOG features from the images
    hog = feature.hog(images[i], orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', channel_axis=2)
    features.append(hog)
features = np.array(features)

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

#SVM model
SVM = LinearSVC(random_state=42)
SVM.fit(X_train, y_train)

# Test SVM classifier
accuracy = SVM.score(X_test, y_test)
print("Accuracy: ",accuracy * 100)

Accuracy:  50.0


*we try predicting the labels of all images of the df and compare to actual labels*

In [11]:
for i in range(19):
  img = df.Image[i]
  hog = feature.hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', channel_axis=2)
  hog = hog.reshape(1, -1)
  label = SVM.predict(hog)
  print("Predicted label: ",label)


Predicted label:  ['jumana']
Predicted label:  ['farida']
Predicted label:  ['farida']
Predicted label:  ['farida']
Predicted label:  ['jumana']
Predicted label:  ['jumana']
Predicted label:  ['jumana']
Predicted label:  ['jumana']
Predicted label:  ['jumana']
Predicted label:  ['kroush']
Predicted label:  ['kroush']
Predicted label:  ['khaled']
Predicted label:  ['khaled']
Predicted label:  ['khaled']
Predicted label:  ['khaled']
Predicted label:  ['khaled']
Predicted label:  ['maher']
Predicted label:  ['maher']
Predicted label:  ['maher']
