# MAJOR PROJECT ML B1

In [30]:
!pip install PyWavelets



In [31]:
!pip install opencv-python



In [32]:
!pip install seaborn



In [33]:
import numpy as np
import cv2
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline

In [34]:
face_cascade = cv2.CascadeClassifier('./opencv/haarcascades/haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('./opencv/haarcascades/haarcascade_eye.xml')

In [35]:
def get_cropped_image_if_2_eyes(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.05, 3)
    for (x,y,w,h) in faces:
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = img[y:y+h, x:x+w]
        eyes = eye_cascade.detectMultiScale(roi_gray)
        if len(eyes) >= 2:
            return roi_color

In [36]:
path_to_data = "./humandataset/"
path_to_cr_data = "./humandataset/cropped/"

In [37]:
import os
img_dirs = []
for entry in os.scandir(path_to_data):
    if entry.is_dir():
        img_dirs.append(entry.path)

In [38]:
img_dirs

['./humandataset/cropped',
 './humandataset/Indian faces',
 './humandataset/Other country faces']

In [39]:
import shutil
if os.path.exists(path_to_cr_data):
     shutil.rmtree(path_to_cr_data)
os.mkdir(path_to_cr_data)

In [40]:
#cropped folder will be created and in that all the the existing folders will be repeated and will have only the human
#image faces

cropped_image_dirs = []
celebrity_file_names_dict = {}

for img_dir in img_dirs:
    count = 1
    celebrity_name = img_dir.split('/')[-1]
    print(celebrity_name)
    
    celebrity_file_names_dict[celebrity_name] = []
    
    for entry in os.scandir(img_dir):
        roi_color = get_cropped_image_if_2_eyes(entry.path)
        if roi_color is not None:
            cropped_folder = path_to_cr_data + celebrity_name
            if not os.path.exists(cropped_folder):
                os.makedirs(cropped_folder)
                cropped_image_dirs.append(cropped_folder)
                print("Generating cropped images in folder: ",cropped_folder)
                
            cropped_file_name = celebrity_name + str(count) + ".png"
            cropped_file_path = cropped_folder + "/" + cropped_file_name 
            
            cv2.imwrite(cropped_file_path, roi_color)
            celebrity_file_names_dict[celebrity_name].append(cropped_file_path)
            count += 1  

print("Done")

cropped
Indian faces
Generating cropped images in folder:  ./humandataset/cropped/Indian faces
Other country faces
Generating cropped images in folder:  ./humandataset/cropped/Other country faces
Done


In [41]:
import numpy as np
import pywt
import cv2    

def w2d(img, mode='haar', level=1):
    imArray = img
    #Datatype conversions
    #convert to grayscale
    imArray = cv2.cvtColor( imArray,cv2.COLOR_RGB2GRAY )
    #convert to float
    imArray =  np.float32(imArray)   
    imArray /= 255;
    # compute coefficients 
    coeffs=pywt.wavedec2(imArray, mode, level=level)

    #Process Coefficients
    coeffs_H=list(coeffs)  
    coeffs_H[0] *= 0;  

    # reconstruction
    imArray_H=pywt.waverec2(coeffs_H, mode);
    imArray_H *= 255;
    imArray_H =  np.uint8(imArray_H)

    return imArray_H

In [42]:
class_dict = {}
count = 0
if (len(celebrity_file_names_dict)==3):
    celebrity_file_names_dict.pop('cropped')
for celebrity_name in celebrity_file_names_dict.keys():
    class_dict[celebrity_name] = count
    count = count+1
class_dict

{'Indian faces': 0, 'Other country faces': 1}

In [43]:
x, y = [], []
for celebrity_name, training_files in celebrity_file_names_dict.items():
    for training_image in training_files:
        img = cv2.imread(training_image)
        scalled_raw_img = cv2.resize(img, (32, 32))
        img_har = w2d(img,'db1',5)
        scalled_img_har = cv2.resize(img_har, (32, 32))
        combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
        x.append(combined_img)
        y.append(class_dict[celebrity_name]) 
        
x = np.array(x).reshape(len(x),4096).astype(float)
x

array([[ 23.,  38.,  45., ..., 252., 236., 234.],
       [  6.,   1.,   2., ...,   5.,   2.,   2.],
       [ 83., 107., 134., ..., 216., 215., 254.],
       ...,
       [ 57.,  66.,  75., ...,   3.,   7., 167.],
       [103., 121., 165., ..., 189.,  59.,  37.],
       [191., 181., 193., ...,  38.,  35.,  19.]])

In [44]:
y

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [45]:
#splitting data into training and test dataset
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
x_train

array([[ 70.,  82., 118., ..., 252.,   0.,   0.],
       [ 59., 194., 142., ..., 175.,  29.,  55.],
       [ 49.,  55.,  74., ..., 238., 241., 233.],
       ...,
       [ 63.,  68.,  83., ...,  60., 225.,  38.],
       [ 47.,  43.,  49., ..., 242., 235.,   4.],
       [ 64.,  71.,  96., ...,   0.,   1.,   0.]])

In [62]:
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel = 'rbf', C = 10))])
pipe.fit(x_train, y_train)
pipe.score(x_test, y_test)

0.7303921568627451

In [47]:
print(classification_report(y_test, pipe.predict(x_test)))

              precision    recall  f1-score   support

           0       0.71      0.83      0.77       109
           1       0.76      0.61      0.68        95

    accuracy                           0.73       204
   macro avg       0.74      0.72      0.72       204
weighted avg       0.74      0.73      0.73       204



In [48]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [49]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto',probability=True),
        'params' : {
            'svc__C': [1,10,100,1000],
            'svc__kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'randomforestclassifier__n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'logisticregression__C': [1,5,10]
        }
    }
}

In [50]:
#Should get 6 times done
scores = []
best_estimators = {}
import pandas as pd
for algo, mp in model_params.items():
    pipe = make_pipeline(StandardScaler(), mp['model'])
    clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    print("Done")
    clf.fit(x_train, y_train)
    print("Done")
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_

Done
Done
Done
Done
Done
Done


In [51]:
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.745092,"{'svc__C': 100, 'svc__kernel': 'rbf'}"
1,random_forest,0.666609,{'randomforestclassifier__n_estimators': 10}
2,logistic_regression,0.709606,{'logisticregression__C': 10}


In [52]:
y_pred=clf.predict(x_test)
y_pred

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 1])

In [53]:
from sklearn.metrics import accuracy_score,confusion_matrix
accuracy_score(y_pred,y_test)

0.7303921568627451

In [54]:
#saving model using pickle
import pickle
pickle.dump(clf,open('human images model.p','wb'))


In [55]:
model=pickle.load(open('human images model.p','rb'))

In [None]:
import shutil
import os.path
from os import path
import ntpath
import os
from matplotlib import pyplot as plt
flat_data=[]
urls=[]
flag=0
indianfolder="C:/Users/sathwik palakurty/Desktop/Human Faces Major Project B1/model/humandataset/Indian faces"
otherfolder="C:/Users/sathwik palakurty/Desktop/Human Faces Major Project B1/model/humandataset/Other country faces"
key_list=list(class_dict.keys())
val_list=list(class_dict.values())
n=int(input('How many images do you want to insert?'))
for i in range(0,n):
    url=input('Enter your path of your downloaded image->')
    file=ntpath.basename(url)
    img = cv2.imread(url)
    scalled_raw_img = cv2.resize(img, (32, 32))
    img_har = w2d(img,'db1',5)
    scalled_img_har = cv2.resize(img_har, (32, 32))
    combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
    flat_data.append(combined_img)
    flat_data = np.array(flat_data).reshape(len(flat_data),4096).astype(float)
    y_out=model.predict(flat_data)
    position=val_list.index(y_out[0])
    y_out=key_list[position]
    print(f' PREDICTED OUTPUT FOR {i+1} IMAGE: {y_out}')
    plt.imshow(img)
    if(y_out=='Indian faces'):
        for filename in os.listdir(indianfolder):
            if(filename==file):
                print("This file already exists in indian faces folder")
                flag=1        
        if(flag==0):
            print("Saving image in indian faces folder")
            shutil.move(url,indianfolder)
            print("Done")
    
    if(y_out=="Other country faces"):
        flag=0
        for filename in os.listdir(otherfolder):
            if(filename==file):
                print("This file already exists in other country faces folder")
                flag=1
        if(flag==0):
            print("Saving image in other country faces folder")
            shutil.move(url,otherfolder)
            print("Done")
    flat_data=[]

How many images do you want to insert?4
Enter your path of your downloaded image->C:/Users/sathwik palakurty/Desktop/ziraffe.jpg
 PREDICTED OUTPUT FOR 1 IMAGE: Other country faces
Saving image in other country faces folder
Done
Enter your path of your downloaded image->C:/Users/sathwik palakurty/Desktop/ricky.jpg
 PREDICTED OUTPUT FOR 2 IMAGE: Other country faces
Saving image in other country faces folder
Done
Enter your path of your downloaded image->C:/Users/sathwik palakurty/Desktop/marycom.jpg
 PREDICTED OUTPUT FOR 3 IMAGE: Indian faces
Saving image in indian faces folder
Done
