In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
import cv2
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline

In [39]:
face_cascade = cv2.CascadeClassifier('./opencv/haarcascades/haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('./opencv/haarcascades/haarcascade_eye.xml')


array([[ 48, 107, 188, 188]])

## Defining a function to crop the given proper image 

In [44]:
def get_cropped_image_if_2_eyes(image_path):
    img = cv2.imread(image_path)
#     print(img.shape)
    if img is not None:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)
        for (x,y,w,h) in faces:
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = img[y:y+h, x:x+w]
            eyes = eye_cascade.detectMultiScale(roi_gray)
            if len(eyes) >= 2:
                return roi_color

In [48]:
cropped_img = get_cropped_image_if_2_eyes('./test_images/amitabh_testimage.jpg')
if cropped_img is not None:
    plt.imshow(cropped_img, cmap='gray')


## Code to crop all the images and save them

In [49]:
path_to_data = "./dataset/"
path_to_cr_data = "./dataset/cropped/"

In [50]:
import os
import shutil
if os.path.exists(path_to_cr_data):
     shutil.rmtree(path_to_cr_data)

In [51]:
img_dirs = []
for entry in os.scandir(path_to_data):
    if entry.is_dir():
        img_dirs.append(entry.path)
img_dirs


['./dataset/amitabh_bachchan',
 './dataset/hrithik_roshan',
 './dataset/kangana_ranaut',
 './dataset/kareena_kapoor',
 './dataset/kim_kardashian',
 './dataset/sharukh_khan',
 './dataset/varun_dhawan']

In [52]:
if os.path.exists(path_to_cr_data):
     shutil.rmtree(path_to_cr_data)
os.mkdir(path_to_cr_data)


In [53]:
cropped_image_dirs = []
celebrity_file_names_dict = {}

for img_dir in img_dirs:
    count = 1
    celebrity_name = img_dir.split('/')[-1]
    print(celebrity_name)
    
    celebrity_file_names_dict[celebrity_name] = []
    
    for entry in os.scandir(img_dir):
        roi_color = get_cropped_image_if_2_eyes(entry.path)
        if roi_color is not None:
            cropped_folder = path_to_cr_data + celebrity_name
            if not os.path.exists(cropped_folder):
                os.makedirs(cropped_folder)
                cropped_image_dirs.append(cropped_folder)
                
            cropped_file_name = celebrity_name + str(count) + ".png"
            cropped_file_path = cropped_folder + "/" + cropped_file_name 
            
            cv2.imwrite(cropped_file_path, roi_color)
            celebrity_file_names_dict[celebrity_name].append(cropped_file_path)
            count += 1    

amitabh_bachchan
hrithik_roshan
kangana_ranaut
kareena_kapoor
kim_kardashian
sharukh_khan
varun_dhawan


In [54]:
cropped_image_dirs

['./dataset/cropped/amitabh_bachchan',
 './dataset/cropped/hrithik_roshan',
 './dataset/cropped/kangana_ranaut',
 './dataset/cropped/kareena_kapoor',
 './dataset/cropped/kim_kardashian',
 './dataset/cropped/sharukh_khan',
 './dataset/cropped/varun_dhawan']

## Manually delete the face images which are not of that celebrity

# Data Preprocessing

## Wavelet Transformation to extract features from the cropped images

In [58]:
import pywt

In [59]:
def w2d(img, mode='haar', level=1):
    imArray = img
    #Datatype conversions
    #convert to grayscale
    imArray = cv2.cvtColor( imArray,cv2.COLOR_RGB2GRAY )
    #convert to float
    imArray =  np.float32(imArray)   
    imArray /= 255;
    # compute coefficients 
    coeffs=pywt.wavedec2(imArray, mode, level=level)

    #Process Coefficients
    coeffs_H=list(coeffs)  
    coeffs_H[0] *= 0;  

    # reconstruction
    imArray_H=pywt.waverec2(coeffs_H, mode);
    imArray_H *= 255;
    imArray_H =  np.uint8(imArray_H)

    return imArray_H

In [63]:
celebrity_file_names_dict = {}
for img_dir in cropped_image_dirs:
    celebrity_name = img_dir.split('/')[-1]
    file_list = []
    for entry in os.scandir(img_dir):
        file_list.append(entry.path)
    celebrity_file_names_dict[celebrity_name] = file_list

## Images in cropped folder can be used for model training. We will use these raw images along with wavelet transformed images to train our classifier. Let's prepare X and y now

In [64]:
class_dict = {}
count = 0
for celebrity_name in celebrity_file_names_dict.keys():
    class_dict[celebrity_name] = count
    count = count + 1
class_dict

{'amitabh_bachchan': 0,
 'hrithik_roshan': 1,
 'kangana_ranaut': 2,
 'kareena_kapoor': 3,
 'kim_kardashian': 4,
 'sharukh_khan': 5,
 'varun_dhawan': 6}

In [65]:
X, y = [], []

for celebrity_name, training_files in celebrity_file_names_dict.items():
    for training_image in training_files:
        img = cv2.imread(training_image)
        scalled_raw_img = cv2.resize(img, (32, 32))
        img_har = w2d(img,'db1',5)
        scalled_img_har = cv2.resize(img_har, (32, 32))
        combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
        X.append(combined_img)
        y.append(class_dict[celebrity_name]) 


In [66]:
X

[array([[255],
        [255],
        [255],
        ...,
        [  0],
        [  0],
        [  0]], dtype=uint8),
 array([[255],
        [255],
        [255],
        ...,
        [236],
        [221],
        [ 69]], dtype=uint8),
 array([[120],
        [ 49],
        [ 59],
        ...,
        [  0],
        [  2],
        [  1]], dtype=uint8),
 array([[121],
        [161],
        [156],
        ...,
        [252],
        [247],
        [244]], dtype=uint8),
 array([[38],
        [34],
        [65],
        ...,
        [ 0],
        [ 0],
        [ 0]], dtype=uint8),
 array([[164],
        [179],
        [195],
        ...,
        [231],
        [  3],
        [ 16]], dtype=uint8),
 array([[163],
        [167],
        [178],
        ...,
        [254],
        [ 11],
        [253]], dtype=uint8),
 array([[ 16],
        [ 14],
        [ 12],
        ...,
        [255],
        [254],
        [253]], dtype=uint8),
 array([[152],
        [137],
        [135],
        ...,
    

In [67]:
y

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,


In [68]:
df = pd.DataFrame(y)

In [69]:
df.value_counts()

4    74
3    71
2    68
6    55
1    53
5    47
0    24
dtype: int64

In [70]:
X = np.array(X).reshape(len(X),4096).astype(float)
X.shape

(392, 4096)

In [71]:
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier

In [72]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto',probability=True),
        'params' : {
            'svc__C': [1,10,100,1000],
            'svc__kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(random_state=42),
        'params' : {
            'randomforestclassifier__n_estimators': [5,10,20],
            'randomforestclassifier__max_features': ['auto', 'sqrt', 'log2'],
            'randomforestclassifier__bootstrap': [True],
            'randomforestclassifier__min_samples_leaf': [1,2,3, 4, 5],
            'randomforestclassifier__min_samples_split': [3, 4, 6, 8, 10, 12]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(multi_class='auto'),
        'params': {
#             'logisticregression__solver': ['newton-cg', 'lbfgs', 'liblinear'],
            'logisticregression__solver': ['liblinear'],
            'logisticregression__C': [1,5,10]
#             'logisticregression__penalty': [None,'l1','l2','elasticnet']
        }
    },
    'kneighborsclassifier': {
        'model': KNeighborsClassifier(),
        'params': {
            'kneighborsclassifier__n_neighbors': list(range(1,30))
#             'kneighborsclassifier__p': [1,2],
#             'kneighborsclassifier__leaf_size': list(range(1,50))
        }
    },
    'adaboostclassifier': {
        'model': AdaBoostClassifier(),
        'params': {
            'adaboostclassifier__n_estimators': [5,10,20]
        }
    },
    'gaussiannb': {
        'model': GaussianNB(),
        'params': {
            'gaussiannb__var_smoothing': np.logspace(0,-9, num=100)
        }
    }

    
}

In [125]:
scores = []
best_estimators = {}

for algo, mp in model_params.items():
    pipe = make_pipeline(QuantileTransformer(n_quantiles=320), mp['model'])
    clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    clf.fit(X,y)
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Unnamed: 0,model,best_score,best_params
0,svm,0.705902,"{'svc__C': 100, 'svc__kernel': 'rbf'}"
1,random_forest,0.551762,"{'randomforestclassifier__bootstrap': True, 'r..."
2,logistic_regression,0.735321,"{'logisticregression__C': 5, 'logisticregressi..."
3,kneighborsclassifier,0.480699,{'kneighborsclassifier__n_neighbors': 1}
4,adaboostclassifier,0.26224,{'adaboostclassifier__n_estimators': 20}
5,gaussiannb,0.45146,{'gaussiannb__var_smoothing': 0.0657933224657568}


In [126]:
best_estimators

{'svm': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('svc', SVC(C=100, gamma='auto', probability=True))]),
 'random_forest': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('randomforestclassifier',
                  RandomForestClassifier(max_features='auto',
                                         min_samples_split=6, n_estimators=20,
                                         random_state=42))]),
 'logistic_regression': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('logisticregression',
                  LogisticRegression(C=5, solver='liblinear'))]),
 'kneighborsclassifier': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),
                 ('kneighborsclassifier', KNeighborsClassifier(n_neighbors=1))]),
 'adaboostclassifier': Pipeline(steps=[('quantiletransformer', QuantileTransformer(n_quantiles=320)),


In [130]:
df['best_params'][2]

{'logisticregression__C': 5, 'logisticregression__solver': 'liblinear'}

## Getting accuracy with extra test_data

In [120]:
from sklearn.pipeline import Pipeline
params2 = {'logisticregression__solver': ['newton-cg', 'lbfgs', 'liblinear'],
            'logisticregression__C': [1,5,10],
            'logisticregression__penalty': [None,'l1','l2','elasticnet']}
pipe2 = Pipeline([('scaler', QuantileTransformer(n_quantiles=320)), ('logisticregression', LogisticRegression())])
clf2 =  GridSearchCV(pipe2, param_grid=params2, cv=5, return_train_score=False)
clf2.fit(X,y)
df_lg = pd.DataFrame({
    'model': 'logisticregression',
    'best_score': clf2.best_score_,
    'best_params': clf2.best_params_
})
df_lg

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

120 fits failed out of a total of 180.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Python3105\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Python3105\lib\site-packages\sklearn\pipeline.py", line 382, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "C:\Python3105\lib\site-packages\sklearn\linear_model\_logistic.py", line 1091, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Python3105\lib\site-packages\sklearn\linear_model\_logistic.py", line 55, in _check_solver
    raise ValueError(
Va

Unnamed: 0,model,best_score,best_params
logisticregression__C,logisticregression,0.727361,5
logisticregression__penalty,logisticregression,0.727361,l2
logisticregression__solver,logisticregression,0.727361,lbfgs


In [121]:
best_estimator2 = clf2.best_estimator_

In [122]:
best_estimator2

In [77]:
test_dir = './test_images/'
testimages_list = []

for entry in os.scandir(test_dir):
    testimages_list.append(entry.path)

In [78]:
count=1
path_croppedimgs= []
for entry in os.scandir(test_dir):
        roi_color = get_cropped_image_if_2_eyes(entry.path)
        if roi_color is not None:
            cropped_folder = test_dir + 'croppedtest_img'
            if not os.path.exists(cropped_folder):
                os.makedirs(cropped_folder)
                
            cropped_file_name = str(count) + ".png"
            cropped_file_path = cropped_folder + "/" + cropped_file_name 
            
            cv2.imwrite(cropped_file_path, roi_color)
            path_croppedimgs.append(cropped_file_path)
            count += 1    

In [79]:
cropped_img2 = []

for image in os.scandir(cropped_folder):
    cropped_img2.append(image.path)
    

In [80]:
from sklearn.metrics import confusion_matrix

X_test_img = []

for training_image in cropped_img2:
        img = cv2.imread(training_image)
        scalled_raw_img = cv2.resize(img, (32, 32))
        img_har = w2d(img,'db1',5)
        scalled_img_har = cv2.resize(img_har, (32, 32))
        combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
        X_test_img.append(combined_img)
        
X_test_img = np.array(X_test_img).reshape(len(cropped_img2),4096).astype(float)


In [81]:
X_test_img[0]

array([ 52.,  59., 122., ...,  46.,  50.,  49.])

In [82]:
class_dict

{'amitabh_bachchan': 0,
 'hrithik_roshan': 1,
 'kangana_ranaut': 2,
 'kareena_kapoor': 3,
 'kim_kardashian': 4,
 'sharukh_khan': 5,
 'varun_dhawan': 6}

In [83]:
y_test_img = np.array([2,2,2,2,2,2,5,5,5,5,5,3,3,3,0,3,3,3,3,6,6,6,6,6,6,1,1,1,1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,2,6,6])

In [84]:
len(y_test_img)

47

In [85]:
y_test_img

array([2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 3, 3, 3, 0, 3, 3, 3, 3, 6, 6, 6,
       6, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 6, 6])

In [123]:
res_extra = best_estimator2.predict(X_test_img)

In [124]:
res_extra

array([2, 5, 5, 4, 3, 3, 4, 5, 3, 3, 3, 2, 3, 3, 4, 1, 6, 1, 6, 1, 1, 2,
       3, 1, 3, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 6, 2,
       2, 5, 2])

In [125]:
best_estimator2.score(X_test_img,y_test_img)

0.2978723404255319

In [88]:
fpr, tpr, thresholds = metrics.roc_curve(y_test_img, res_extra, pos_label=2)
metrics.auc(fpr, tpr)

NameError: name 'metrics' is not defined

## Getting accuracy with train-test split 

In [92]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [93]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

lnreg = LogisticRegression(C=5, penalty='l2', solver='lbfgs', random_state=42)
pipe_tt = Pipeline([('scaler', StandardScaler()), ('logisticregression', lnreg)])
pipe_tt.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [94]:
res_tt = pipe_tt.predict(X_test)

In [95]:
pipe_tt.score(X_test, y_test)

0.7551020408163265

In [96]:
np.array(y_test)

array([2, 4, 1, 6, 5, 4, 5, 5, 2, 2, 1, 3, 4, 2, 4, 6, 4, 1, 2, 5, 3, 2,
       1, 6, 5, 1, 5, 4, 3, 1, 2, 6, 1, 2, 3, 5, 1, 2, 3, 0, 0, 5, 5, 2,
       4, 4, 6, 2, 4, 2, 3, 0, 1, 1, 5, 6, 6, 3, 3, 1, 0, 3, 4, 0, 2, 3,
       0, 6, 0, 5, 4, 4, 4, 4, 2, 1, 1, 4, 3, 4, 4, 5, 3, 1, 0, 5, 3, 6,
       5, 4, 4, 1, 4, 3, 6, 2, 6, 3])

In [107]:
res_tt

array([2, 4, 1, 6, 5, 4, 1, 6, 2, 2, 1, 3, 1, 2, 4, 6, 2, 1, 2, 5, 3, 2,
       1, 1, 5, 1, 3, 4, 3, 1, 3, 6, 1, 4, 4, 5, 1, 2, 3, 0, 0, 5, 5, 2,
       2, 4, 1, 2, 4, 3, 3, 0, 1, 1, 5, 6, 1, 3, 3, 1, 0, 2, 3, 3, 4, 3,
       5, 6, 0, 1, 4, 4, 4, 2, 2, 1, 1, 4, 3, 4, 4, 5, 4, 6, 0, 5, 3, 6,
       3, 4, 4, 1, 4, 3, 6, 2, 1, 3])

In [112]:
res_tt2 = pipe_tt.predict(X_test_img)

In [113]:
pipe_tt.score(X_test_img, y_test_img)

0.2553191489361702

In [350]:
fpr, tpr, thresholds = metrics.roc_curve(y_test, res_tt, pos_label=2)
metrics.auc(fpr, tpr)

0.29216269841269843

## Save the trained model

In [126]:
!pip install joblib
import joblib 
# Save the model as a pickle in a file 
joblib.dump(best_estimator2, 'saved_model.pkl') 




[notice] A new release of pip available: 22.2.1 -> 22.2.2
[notice] To update, run: python.exe -m pip install --upgrade pip


['saved_model.pkl']

## Save class dictionary

In [127]:
import json
with open("class_dictionary.json","w") as f:
    f.write(json.dumps(class_dict))