In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import cv2
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
face_cascade = cv2.CascadeClassifier('./opencv/haarcascades/haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('./opencv/haarcascades/haarcascade_eye.xml')


## Defining a function to crop the given proper image 

In [3]:
def get_cropped_image_if_2_eyes(image_path):
    img = cv2.imread(image_path)
    if img is not None:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)
        for (x,y,w,h) in faces:
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = img[y:y+h, x:x+w]
            eyes = eye_cascade.detectMultiScale(roi_gray)
            if len(eyes) >= 2:
                return roi_color

## Code to crop all the images and save them

In [496]:
path_to_data = "./dataset/full_images/"
path_to_cr_data = "./dataset/cropped/"

In [497]:
import os
import shutil
if os.path.exists(path_to_cr_data):
     shutil.rmtree(path_to_cr_data)

In [498]:
img_dirs = []
for entry in os.scandir(path_to_data):
    if entry.is_dir():
        img_dirs.append(entry.path)
img_dirs


['./dataset/full_images/other', './dataset/full_images/person']

In [499]:
os.mkdir(path_to_cr_data)

In [8]:
# for img_dir in img_dirs:
#     count = 1
#     individual_name = img_dir.split('/')[-1]
#     print(individual_name)

In [500]:
individual_file_names_dict = {}
for img_dir in img_dirs:
    count = 1
    individual_name = img_dir.split('/')[-1]
    
    individual_file_names_dict[individual_name] = []
    
    cropped_individual_folder = path_to_cr_data + individual_name
    os.mkdir(cropped_individual_folder)
    
    for entry in os.scandir(img_dir):
        roi_color = get_cropped_image_if_2_eyes(entry.path)
        if roi_color is not None:          
            cropped_file_name = "image_cropped" + str(count) + ".png"
            cropped_file_path = cropped_individual_folder + '/' + cropped_file_name 
            
            cv2.imwrite(cropped_file_path, roi_color)
            individual_file_names_dict[individual_name].append(cropped_file_path)
            count += 1    

In [501]:
individual_to_number_dict = {"person" : 1, "other" : 0}

# Data Preprocessing

## Wavelet Transformation to extract features from the cropped images

In [11]:
import pywt

In [12]:
def w2d(img, mode='haar', level=1):
    imArray = img
    #Datatype conversions
    #convert to grayscale
    imArray = cv2.cvtColor( imArray,cv2.COLOR_RGB2GRAY )
    #convert to float
    imArray =  np.float32(imArray)   
    imArray /= 255;
    # compute coefficients 
    coeffs=pywt.wavedec2(imArray, mode, level=level)

    #Process Coefficients
    coeffs_H=list(coeffs)  
    coeffs_H[0] *= 0;  

    # reconstruction
    imArray_H=pywt.waverec2(coeffs_H, mode);
    imArray_H *= 255;
    imArray_H =  np.uint8(imArray_H)

    return imArray_H

## Images in cropped folder can be used for model training. We will use these raw images along with wavelet transformed images to train our classifier. Let's prepare X and y now

In [502]:
X, y = [], []

for cropped_dir in os.scandir(path_to_cr_data):
    individual_name = cropped_dir.path.split('/')[-1]
    
    for training_image in os.scandir(cropped_dir):
        img = cv2.imread(training_image.path)
        scalled_raw_img = cv2.resize(img, (32, 32))
        img_har = w2d(img,'db1',5)
        scalled_img_har = cv2.resize(img_har, (32, 32))
        combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
        X.append(combined_img)
        y.append(individual_to_number_dict[individual_name]) 


In [503]:
X

[array([[ 12],
        [ 12],
        [ 25],
        ...,
        [131],
        [  7],
        [248]], dtype=uint8),
 array([[201],
        [203],
        [204],
        ...,
        [ 13],
        [ 48],
        [241]], dtype=uint8),
 array([[106],
        [ 82],
        [ 82],
        ...,
        [  7],
        [253],
        [211]], dtype=uint8),
 array([[  2],
        [  2],
        [ 11],
        ...,
        [126],
        [177],
        [151]], dtype=uint8),
 array([[ 34],
        [ 49],
        [ 55],
        ...,
        [ 25],
        [ 38],
        [245]], dtype=uint8),
 array([[145],
        [145],
        [145],
        ...,
        [193],
        [207],
        [197]], dtype=uint8),
 array([[ 22],
        [ 22],
        [ 22],
        ...,
        [ 63],
        [  4],
        [194]], dtype=uint8),
 array([[ 9],
        [ 5],
        [17],
        ...,
        [12],
        [69],
        [35]], dtype=uint8),
 array([[ 56],
        [ 55],
        [ 54],
        ...,
    

In [504]:
y

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [505]:
y_df = pd.DataFrame(y)
y_df.value_counts()

1    840
0    577
dtype: int64

In [506]:
X = np.array(X).reshape(len(X),4096).astype(float)
X.shape

(1417, 4096)

In [18]:
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier

In [19]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto',probability=True),
        'params' : {
            'svc__C': [1,10,100,1000],
            'svc__kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(random_state=42),
        'params' : {
            'randomforestclassifier__n_estimators': [5,10,20],
            'randomforestclassifier__max_features': ['auto', 'sqrt', 'log2'],
            'randomforestclassifier__bootstrap': [True],
            'randomforestclassifier__min_samples_leaf': [1,2,3, 4, 5],
            'randomforestclassifier__min_samples_split': [3, 4, 6, 8, 10, 12]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(multi_class='auto'),
        'params': {
#             'logisticregression__solver': ['newton-cg', 'lbfgs', 'liblinear'],
            'logisticregression__solver': ['liblinear'],
            'logisticregression__C': [1,5,10]
#             'logisticregression__penalty': [None,'l1','l2','elasticnet']
        }
    },
    'kneighborsclassifier': {
        'model': KNeighborsClassifier(),
        'params': {
            'kneighborsclassifier__n_neighbors': list(range(1,30))
#             'kneighborsclassifier__p': [1,2],
#             'kneighborsclassifier__leaf_size': list(range(1,50))
        }
    },
    'adaboostclassifier': {
        'model': AdaBoostClassifier(),
        'params': {
            'adaboostclassifier__n_estimators': [5,10,20]
        }
    },
    'gaussiannb': {
        'model': GaussianNB(),
        'params': {
            'gaussiannb__var_smoothing': np.logspace(0,-9, num=100)
        }
    }

    
}

In [20]:
scores = []
best_estimators = {}

for algo, mp in model_params.items():
    pipe = make_pipeline(QuantileTransformer(n_quantiles=320), mp['model'])
    clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    clf.fit(X,y)
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Unnamed: 0,model,best_score,best_params
0,svm,0.996279,"{'svc__C': 10, 'svc__kernel': 'rbf'}"
1,random_forest,1.0,"{'randomforestclassifier__bootstrap': True, 'r..."
2,logistic_regression,0.988846,"{'logisticregression__C': 1, 'logisticregressi..."
3,kneighborsclassifier,0.987916,{'kneighborsclassifier__n_neighbors': 1}
4,adaboostclassifier,0.971176,{'adaboostclassifier__n_estimators': 20}
5,gaussiannb,0.977683,{'gaussiannb__var_smoothing': 0.15199110829529...


In [21]:
best_estimators

{'svm': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('svc', SVC(C=10, gamma='auto', probability=True))]),
 'random_forest': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('randomforestclassifier',
                  RandomForestClassifier(max_features='log2',
                                         min_samples_split=8, n_estimators=20,
                                         random_state=42))]),
 'logistic_regression': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('logisticregression',
                  LogisticRegression(C=1, solver='liblinear'))]),
 'kneighborsclassifier': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('kneighborsclassifier', KNeighborsClassifier(n_neighbors=1))]),
 'adaboostclassifier': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
 

In [22]:
df['best_params'][1]

{'randomforestclassifier__bootstrap': True,
 'randomforestclassifier__max_features': 'log2',
 'randomforestclassifier__min_samples_leaf': 1,
 'randomforestclassifier__min_samples_split': 8,
 'randomforestclassifier__n_estimators': 20}

In [23]:
clf = best_estimators['random_forest']

In [24]:
clf2 = best_estimators['logistic_regression']

In [25]:
clf2

In [816]:
path_to_data_test = "./images_to_classified/"
path_to_cr_data_test = "./images_to_classified_cr/"

In [817]:
if os.path.exists(path_to_cr_data_test):
    shutil.rmtree(path_to_cr_data_test)
os.mkdir(path_to_cr_data_test)

In [818]:
count = 1    
path2 = "./other_person2/"
for entry in os.scandir(path_to_data_test):
    roi_color = get_cropped_image_if_2_eyes(entry.path)
    if roi_color is not None:          
        cropped_file_name = "image_cropped_test" + str(count) + ".png"
        cropped_file_path = path_to_cr_data_test + cropped_file_name 
            
        cv2.imwrite(cropped_file_path, roi_color)
        count += 1    

In [819]:
X_test, y_test = [], []

path2 = "./person/"
for test_image in os.scandir(path_to_cr_data_test):
    img = cv2.imread(test_image.path)
    scalled_raw_img = cv2.resize(img, (32, 32))
    img_har = w2d(img,'db1',5)
    scalled_img_har = cv2.resize(img_har, (32, 32))
    combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
    X_test.append(combined_img)
    y_test.append(1) 


In [820]:
X_test = np.array(X_test).reshape(len(X_test),4096).astype(float)
X_test.shape

(111, 4096)

In [31]:
clf.score(X_test,y_test)

0.0

In [32]:
clf.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [270]:
clf2.score(X_test,y_test)

1.0

In [271]:
clf2.predict(X_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [258]:
clf2.predict_proba(X_test)

array([[0.04663687, 0.95336313],
       [0.06009313, 0.93990687],
       [0.14925212, 0.85074788],
       [0.0910443 , 0.9089557 ],
       [0.0910443 , 0.9089557 ],
       [0.07661555, 0.92338445],
       [0.12394792, 0.87605208],
       [0.0555973 , 0.9444027 ],
       [0.0555973 , 0.9444027 ],
       [0.08022762, 0.91977238],
       [0.08022762, 0.91977238],
       [0.0344996 , 0.9655004 ],
       [0.0344996 , 0.9655004 ],
       [0.14158335, 0.85841665],
       [0.06009313, 0.93990687],
       [0.07661555, 0.92338445],
       [0.14158335, 0.85841665],
       [0.06306868, 0.93693132],
       [0.06306868, 0.93693132],
       [0.05650292, 0.94349708],
       [0.05650292, 0.94349708],
       [0.05400509, 0.94599491],
       [0.05400509, 0.94599491],
       [0.02948095, 0.97051905],
       [0.03484364, 0.96515636],
       [0.02948095, 0.97051905],
       [0.13475213, 0.86524787],
       [0.04107514, 0.95892486],
       [0.04107514, 0.95892486],
       [0.04809619, 0.95190381],
       [0.

In [35]:
clf.score(X,y)

1.0

In [36]:
clf2.score(X,y)

1.0

In [37]:
clf3 = best_estimators['adaboostclassifier']

In [38]:
clf3.score(X_test,y_test)

0.06504065040650407

In [39]:
clf3.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [40]:
clf5 = best_estimators['kneighborsclassifier']

In [41]:
clf5.score(X_test,y_test)

0.0

In [42]:
clf5.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## h

In [43]:
from sklearn.pipeline import Pipeline
pipe_extra = Pipeline([('scaler', QuantileTransformer(n_quantiles=320)), ('randomforest', RandomForestClassifier(random_state=42, bootstrap=True,
 max_features='auto',
 min_samples_leaf= 1,
 min_samples_split=3,
 n_estimators=10))])
pipe_extra.fit(X,y)
pipe_extra.predict(X_test)

  warn(


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [44]:
model_params2 = {
    'adaboostclassifier' : {
        'model' : AdaBoostClassifier(),
        'params' : {
            'adaboostclassifier__n_estimators' : [100, 200, 300, 400, 500],
            'adaboostclassifier__random_state' : [42]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(multi_class='auto'),
        'params': {
#             'logisticregression__solver': ['newton-cg', 'lbfgs', 'liblinear'],
            'logisticregression__solver': ['liblinear'],
            'logisticregression__C': [1,5,10],
            'logisticregression__penalty': ['l1']
        }
    }
}

In [45]:
scores2 = []
best_estimators2 = {}
for algo, mp in model_params2.items():
    pipe4 = make_pipeline(StandardScaler(), mp['model'])
    clf4 =  GridSearchCV(pipe4, mp['params'], cv=10, return_train_score=False)
    clf4.fit(X,y)
    scores2.append({
        'model': algo,
        'best_score': clf4.best_score_,
        'best_params': clf4.best_params_
    })
    best_estimators2[algo] = clf4.best_estimator_
    
df = pd.DataFrame(scores2,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,adaboostclassifier,1.0,"{'adaboostclassifier__n_estimators': 500, 'ada..."
1,logistic_regression,0.998139,"{'logisticregression__C': 10, 'logisticregress..."


## Getting accuracy with extra test_data

In [46]:
# from sklearn.pipeline import Pipeline
# params2 = {'logisticregression__solver': ['newton-cg', 'lbfgs', 'liblinear'],
#             'logisticregression__C': [1,5,10],
#             'logisticregression__penalty': [None,'l1','l2','elasticnet']}
# pipe2 = Pipeline([('scaler', QuantileTransformer(n_quantiles=320)), ('logisticregression', LogisticRegression())])
# clf2 =  GridSearchCV(pipe2, param_grid=params2, cv=5, return_train_score=False)
# clf2.fit(X,y)
# df_lg = pd.DataFrame({
#     'model': 'logisticregression',
#     'best_score': clf2.best_score_,
#     'best_params': clf2.best_params_
# })
# df_lg

In [47]:
# best_estimator2 = clf2.best_estimator_

In [48]:
# best_estimator2

In [49]:
# test_dir = './test_images/'
# testimages_list = []

# for entry in os.scandir(test_dir):
#     testimages_list.append(entry.path)

In [50]:
# count=1
# path_croppedimgs= []
# for entry in os.scandir(test_dir):
#         roi_color = get_cropped_image_if_2_eyes(entry.path)
#         if roi_color is not None:
#             cropped_folder = test_dir + 'croppedtest_img'
#             if not os.path.exists(cropped_folder):
#                 os.makedirs(cropped_folder)
                
#             cropped_file_name = str(count) + ".png"
#             cropped_file_path = cropped_folder + "/" + cropped_file_name 
            
#             cv2.imwrite(cropped_file_path, roi_color)
#             path_croppedimgs.append(cropped_file_path)
#             count += 1    

In [51]:
# cropped_img2 = []

# for image in os.scandir(cropped_folder):
#     cropped_img2.append(image.path)
    

In [52]:
# from sklearn.metrics import confusion_matrix

# X_test_img = []

# for training_image in cropped_img2:
#         img = cv2.imread(training_image)
#         scalled_raw_img = cv2.resize(img, (32, 32))
#         img_har = w2d(img,'db1',5)
#         scalled_img_har = cv2.resize(img_har, (32, 32))
#         combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
#         X_test_img.append(combined_img)
        
# X_test_img = np.array(X_test_img).reshape(len(cropped_img2),4096).astype(float)


In [53]:
# X_test_img[0]

In [54]:
# class_dict

In [55]:
# y_test_img = np.array([2,2,2,2,2,2,5,5,5,5,5,3,3,3,0,3,3,3,3,6,6,6,6,6,6,1,1,1,1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,2,6,6])

In [56]:
# len(y_test_img)

In [57]:
# y_test_img

In [58]:
# res_extra = best_estimator2.predict(X_test_img)

In [59]:
# res_extra

In [60]:
# best_estimator2.score(X_test_img,y_test_img)

In [61]:
# fpr, tpr, thresholds = metrics.roc_curve(y_test_img, res_extra, pos_label=2)
# metrics.auc(fpr, tpr)

## Getting accuracy with train-test split 

In [62]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [63]:
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X, y, random_state=0)

lnreg = LogisticRegression(C=1, penalty='l1', solver='liblinear', random_state=42)
pipe_tt = Pipeline([('scaler', StandardScaler()), ('logisticregression', lnreg)])
pipe_tt.fit(X_train_split, y_train_split)

In [281]:
pipe_tt.score(X_test_split, y_test_split)

0.9962962962962963

In [282]:
pipe_tt.predict(X_test_split)

array([0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 1])

In [283]:
pipe_tt.score(X_test, y_test)

1.0

In [284]:
pipe_tt.predict(X_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [507]:
X_train_split2, X_test_split2, y_train_split2, y_test_split2 = train_test_split(X, y, random_state=42)

xgbst = XGBClassifier(max_depth=3, min_child_weight=3, scale_pos_weight=3)
pipe_tt_xgb = Pipeline([('scaler', StandardScaler()), ('xgbclassifier', xgbst)])
pipe_tt_xgb.fit(X_train_split, y_train_split)

In [508]:
pipe_tt_xgb.score(X_test_split2, y_test_split2)

0.9549295774647887

In [808]:
pipe_tt_xgb.score(X_test, y_test)

0.9743589743589743

In [809]:
pipe_tt_xgb.predict(X_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1])

In [810]:
pred_prob1 =pipe_tt_xgb.predict_proba(X_test)

In [811]:
pred_prob1

array([[0.04162866, 0.95837134],
       [0.07014704, 0.92985296],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.03948158, 0.9605184 ],
       [0.03948158, 0.9605184 ],
       [0.07352239, 0.9264776 ],
       [0.07352239, 0.9264776 ],
       [0.07014704, 0.92985296],
       [0.11275601, 0.887244  ],
       [0.11275601, 0.887244  ],
       [0.10089684, 0.89910316],
       [0.10089684, 0.89910316],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.04849869, 0.9515013 ],
       [0.05103594, 0.94896406],
       [0.05103594, 0.94896406],
       [0.02180082, 0.9781992 ],
       [0.03948158, 0.9605184 ],
       [0.03948158, 0.9605184 ],
       [0.07352239, 0.9264776 ],
       [0.07352239, 0.9264776 ],
       [0.22851634, 0.77148366],
       [0.22851634, 0.77148366],
       [0.

In [812]:
arr_person_prob = pred_prob1[:,(1)]
arr_person_prob

array([0.95837134, 0.92985296, 0.9515013 , 0.9515013 , 0.9515013 ,
       0.9515013 , 0.9515013 , 0.9515013 , 0.9605184 , 0.9605184 ,
       0.9264776 , 0.9264776 , 0.92985296, 0.887244  , 0.887244  ,
       0.89910316, 0.89910316, 0.9515013 , 0.9515013 , 0.9515013 ,
       0.9515013 , 0.94896406, 0.94896406, 0.9781992 , 0.9605184 ,
       0.9605184 , 0.9264776 , 0.9264776 , 0.77148366, 0.77148366,
       0.89098066, 0.89098066, 0.9264776 , 0.9264776 , 0.9781992 ,
       0.9245334 , 0.9245334 , 0.9264776 , 0.9264776 , 0.887244  ,
       0.887244  , 0.92342657, 0.92342657, 0.887244  , 0.887244  ,
       0.9681616 , 0.9264776 , 0.9264776 , 0.9264776 , 0.9264776 ,
       0.9264776 , 0.9264776 , 0.79040277, 0.79040277, 0.9264776 ,
       0.9264776 , 0.9681616 , 0.94281596, 0.94281596, 0.9264776 ,
       0.9264776 , 0.878213  , 0.878213  , 0.94281596, 0.94246376,
       0.94246376, 0.82874393, 0.82874393, 0.9211957 , 0.88551784,
       0.88551784, 0.9362694 , 0.9362694 , 0.93625206, 0.93625

In [813]:
result_array1 = []
for i in arr_person_prob:
    if i>0.70:
        result_array1.append(1)
    else:
        result_array1.append(0)

In [814]:
result_array1

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [815]:
avg = sum(result_array1)/sum(y_test)
avg

0.9487179487179487

In [77]:
# from sklearn import metrics

In [79]:
# auc_score1 = metrics.roc_auc_score(y_test, pred_prob1[:,1])
# print(auc_score1)

In [None]:
# fpr, tpr, thresholds = metrics.roc_curve(y_test, res_tt, pos_label=2)
# metrics.auc(fpr, tpr)

## Save the trained model

In [824]:
!pip install joblib
import joblib 
# Save the model as a pickle in a file 
joblib.dump(pipe_tt_xgb, 'saved_model.pkl') 



['saved_model.pkl']

## Save class dictionary

In [825]:
import json
with open("class_dictionary.json","w") as f:
    f.write(json.dumps(individual_to_number_dict))