**Implementing a Neural Net**

In [None]:
import pandas as pd
from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.feature_extraction import DictVectorizer

from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np

In [None]:
from google.colab import files
uploaded = files.upload()


Saving to_neural_net.csv to to_neural_net.csv


In [None]:
import io
face_recog = pd.read_csv(io.BytesIO(uploaded['to_neural_net.csv']))

In [None]:
face_recog['age_bins'] = pd.cut(x=face_recog['age'], 
                                bins=[0, 9, 29, 39, 49, 59, 69, 79, 89, 99, 130])
face_recog['age_encoded'] = pd.cut(x=face_recog['age'], 
                                bins=[0, 9, 29, 39, 49, 59, 69, 79, 89, 99, 130], 
                                labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
face_recog.head()

Unnamed: 0,age,ethnicity,gender,path,landmarks,nose2lip,lip2chin,len_between_eyes,left_eyelid_len,right_eyelid_len,nose_width,size_of_mouth,nose_len,bottom_lip_width,left_eye_size,right_eye_size,age_bins,age_encoded
0,1,Asian,Male,images\img0.jpg,"[[-7, 51], [-5, 81], [-1, 110], [5, 136], [18,...",0.127843,0.172767,0.237232,0.187906,0.179991,0.153473,0.274431,0.283629,0.060333,0.145301,0.109268,"(0, 9]",0
1,1,Asian,Male,images\img1.jpg,"[[6, 65], [10, 93], [17, 119], [24, 144], [35,...",0.11551,0.14611,0.275589,0.212916,0.20171,0.155617,0.248926,0.263344,0.078643,0.122885,0.08479,"(0, 9]",0
2,1,Asian,Male,images\img2.jpg,"[[11, 75], [12, 102], [17, 129], [23, 153], [3...",0.09378,0.21422,0.256911,0.193784,0.204183,0.151439,0.279955,0.26479,0.07723,0.121487,0.094426,"(0, 9]",0
3,1,Asian,Male,images\img3.jpg,"[[-6, 78], [-4, 105], [-1, 131], [3, 156], [13...",0.087586,0.22128,0.266966,0.17097,0.172216,0.18105,0.357491,0.256329,0.069628,0.121071,0.074953,"(0, 9]",0
4,1,Asian,Male,images\img4.jpg,"[[-4, 79], [-2, 104], [-1, 129], [0, 155], [8,...",0.093075,0.274444,0.275523,0.153957,0.150599,0.191064,0.422118,0.252346,0.054845,0.09594,0.049664,"(0, 9]",0


In [None]:
#Split the data into train test

train, test = train_test_split(face_recog, test_size=0.2)

**Use Neural Net to Predict Age**

In [None]:
#Use the encoded age for train and test labels
train_label = train["age_encoded"].values
test_label = test["age_encoded"].values
test_bins = test["age_bins"].values


In [None]:
#These are the bins for age
test_bins

[(29, 39], (9, 29], (9, 29], (9, 29], (29, 39], ..., (9, 29], (9, 29], (9, 29], (39, 49], (9, 29]]
Length: 4505
Categories (10, interval[int64]): [(0, 9] < (9, 29] < (29, 39] < (39, 49] ... (69, 79] < (79, 89] <
                                   (89, 99] < (99, 130]]

In [None]:
#drop all the unused columns for train and test data
train = train.drop(columns= ["age", "path", "landmarks", "age_bins", "age_encoded"])
test = test.drop(columns= ["age", "path", "landmarks", "age_bins", "age_encoded"])


In [None]:
train

Unnamed: 0,ethnicity,gender,nose2lip,lip2chin,len_between_eyes,left_eyelid_len,right_eyelid_len,nose_width,size_of_mouth,nose_len,bottom_lip_width,left_eye_size,right_eye_size
9153,Black,Female,0.085961,0.199681,0.304821,0.189048,0.189917,0.208529,0.422406,0.290376,0.108867,0.117434,0.072578
3775,Indian,Female,0.081170,0.205487,0.270880,0.157329,0.150953,0.192776,0.348992,0.307935,0.102566,0.128270,0.084681
19791,White,Male,0.097257,0.286406,0.246428,0.112761,0.130750,0.221662,0.394000,0.317598,0.056036,0.098445,0.056381
1621,Asian,Male,0.111072,0.260914,0.276424,0.181847,0.175167,0.175949,0.397107,0.287636,0.067666,0.092484,0.056388
5905,White,Female,0.080210,0.234181,0.292758,0.144601,0.139483,0.187774,0.347977,0.300376,0.083424,0.099021,0.055893
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19844,White,Male,0.148659,0.324462,0.276398,0.101365,0.088786,0.209758,0.359351,0.301872,0.046795,0.075677,0.040629
13402,Black,Male,0.114636,0.237090,0.262336,0.119661,0.115132,0.229560,0.372205,0.296395,0.087156,0.123257,0.080354
5386,Indian,Male,0.087886,0.212709,0.231815,0.122711,0.132164,0.179161,0.321341,0.303805,0.077610,0.113720,0.084179
10178,White,Male,0.100304,0.136163,0.253352,0.130540,0.131476,0.176500,0.352912,0.240250,0.067881,0.096281,0.062659


In [None]:
#encode ethnicity and gender variables for train data
encoder_e = LabelEncoder()
encoder_e.fit(train["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(train["gender"])
encoder_g.classes_

e_train = encoder_e.transform(train["ethnicity"])
g_train = encoder_g.transform(train["gender"])
train.insert(13, "ethnicity_encoded", e_train)
train.insert(14, "gender_encoded", g_train)
train = train.drop(columns= ["ethnicity", "gender"])


In [None]:
#encode ethnicity and gender variables for train data
encoder_e = LabelEncoder()
encoder_e.fit(test["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(test["gender"])
encoder_g.classes_

e_test = encoder_e.transform(test["ethnicity"])
g_test = encoder_g.transform(test["gender"])
test.insert(13, "ethnicity_encoded", e_test)
test.insert(14, "gender_encoded", g_test)
test = test.drop(columns= ["ethnicity", "gender"])

In [None]:
test

Unnamed: 0,nose2lip,lip2chin,len_between_eyes,left_eyelid_len,right_eyelid_len,nose_width,size_of_mouth,nose_len,bottom_lip_width,left_eye_size,right_eye_size,ethnicity_encoded,gender_encoded
10061,0.115568,0.227058,0.250648,0.155741,0.182729,0.185332,0.325621,0.282593,0.070832,0.129008,0.094848,3,0
212,0.098010,0.218729,0.218903,0.162711,0.158036,0.166827,0.328102,0.254107,0.075609,0.116169,0.090036,2,1
12289,0.064217,0.287702,0.242610,0.107301,0.121843,0.237160,0.425810,0.310214,0.063441,0.097812,0.064217,4,0
5317,0.077721,0.212329,0.262217,0.141553,0.153535,0.192489,0.513331,0.282323,0.070776,0.096648,0.066038,2,0
13681,0.097381,0.282108,0.246321,0.166804,0.157566,0.202813,0.415313,0.292649,0.068859,0.110228,0.069097,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22121,0.123448,0.251453,0.258238,0.072688,0.093452,0.219796,0.406644,0.330468,0.059838,0.091802,0.053276,3,0
5455,0.090643,0.256493,0.277824,0.158034,0.169184,0.217230,0.525350,0.277757,0.059908,0.083133,0.049016,4,0
7450,0.102423,0.212243,0.265333,0.109943,0.103129,0.178530,0.336520,0.283168,0.090574,0.128090,0.084563,4,0
11540,0.106772,0.227523,0.266318,0.157872,0.154427,0.206081,0.412039,0.269641,0.059566,0.072352,0.032803,0,1


In [None]:
#run a neural net 

clf = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=100, alpha=0.0001,
                     solver='lbfgs', verbose=10,  random_state=21,tol=0.000000001)
clf.fit(train, train_label)
pred = clf.predict(test)
print(accuracy_score(test_label, pred))

0.4514983351831299


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Since the accuracy is really low, we decided to look more into different methods to differentiate age. Furthermore, with the additional features, I think we can run a neural net to predict ethnicity. We can probably find better results from predicting ethnicity. 


In [None]:
#This shows you the best variables to use for the model
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

bestfeatures = SelectKBest(score_func=chi2, k=13)
fit = bestfeatures.fit(train, train_label)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(train.columns)

featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(20,'Score'))

                Specs       Score
11  ethnicity_encoded  899.185997
12     gender_encoded  452.768857
6       size_of_mouth   19.593100
10     right_eye_size   19.154289
1            lip2chin   16.788170
9       left_eye_size   15.264984
8    bottom_lip_width   10.824309
5          nose_width    8.891710
3     left_eyelid_len    7.656982
4    right_eyelid_len    7.185666
0            nose2lip    4.351754
7            nose_len    1.424096
2    len_between_eyes    0.255390


In [None]:
pd.DataFrame({"Predictions":pred, "Actual":test_label, "Bins": test_bins})

Unnamed: 0,Predictions,Actual,Bins
0,1,0,"(0, 9]"
1,0,0,"(0, 9]"
2,4,2,"(29, 39]"
3,1,1,"(9, 29]"
4,1,2,"(29, 39]"
...,...,...,...
4500,4,7,"(79, 89]"
4501,1,1,"(9, 29]"
4502,1,1,"(9, 29]"
4503,1,2,"(29, 39]"


**Using Neural Net to Predict ethnicity**

In [None]:
import random
random.seed(20)
train, test = train_test_split(face_recog, test_size=0.2)

In [None]:
train = train.drop(columns= ["path", "landmarks", "age_bins", "age_encoded"])
test = test.drop(columns= ["path", "landmarks", "age_bins", "age_encoded"])


In [None]:
train

Unnamed: 0,age,ethnicity,gender,nose2lip,lip2chin,len_between_eyes,left_eyelid_len,right_eyelid_len,nose_width,size_of_mouth,nose_len,bottom_lip_width,left_eye_size,right_eye_size
18283,54,Asian,Male,0.126927,0.278419,0.274986,0.169380,0.161800,0.175063,0.360294,0.257361,0.057775,0.089134,0.057775
22057,85,Asian,Male,0.108209,0.236260,0.250000,0.163055,0.173294,0.223354,0.367554,0.278193,0.063205,0.068438,0.037116
19901,61,Indian,Male,0.106471,0.306086,0.260048,0.124647,0.109861,0.225000,0.425118,0.282994,0.069530,0.083276,0.046193
18730,56,Indian,Male,0.090260,0.318023,0.336809,0.193716,0.197213,0.228026,0.378360,0.269385,0.052821,0.090876,0.053084
14743,4,Asian,Female,0.075431,0.230361,0.250020,0.155482,0.168407,0.192288,0.398997,0.260680,0.087044,0.096672,0.059972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12646,35,Indian,Female,0.065738,0.241198,0.248551,0.131477,0.132830,0.212927,0.466967,0.289341,0.065511,0.100664,0.060300
9749,29,Black,Female,0.097651,0.211030,0.277610,0.194112,0.162039,0.224122,0.421578,0.288226,0.103735,0.111726,0.063296
13863,37,Indian,Female,0.058946,0.268242,0.258368,0.134121,0.139073,0.233481,0.496769,0.277316,0.060997,0.103147,0.061249
10676,30,Black,Male,0.117964,0.220063,0.237812,0.115594,0.103283,0.237751,0.389047,0.303773,0.091334,0.112026,0.073158


In [None]:
encoder_e = LabelEncoder()
encoder_e.fit(train["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(train["gender"])
encoder_g.classes_

e_train = encoder_e.transform(train["ethnicity"])
g_train = encoder_g.transform(train["gender"])
train.insert(14, "ethnicity_encoded", e_train)
train.insert(15, "gender_encoded", g_train)
train = train.drop(columns= ["ethnicity", "gender"])



In [None]:
encoder_e = LabelEncoder()
encoder_e.fit(test["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(test["gender"])
encoder_g.classes_

e_test = encoder_e.transform(test["ethnicity"])
g_test = encoder_g.transform(test["gender"])
test.insert(14, "ethnicity_encoded", e_test)
test.insert(15, "gender_encoded", g_test)
test = test.drop(columns= ["ethnicity", "gender"])

In [None]:
train_label = train["ethnicity_encoded"].values
test_label = test["ethnicity_encoded"].values



In [None]:
train_label

array([0, 0, 3, ..., 3, 1, 0])

In [None]:
train.drop(columns = ["ethnicity_encoded"])
test.drop(columns=["ethnicity_encoded"])

Unnamed: 0,age,nose2lip,lip2chin,len_between_eyes,left_eyelid_len,right_eyelid_len,nose_width,size_of_mouth,nose_len,bottom_lip_width,left_eye_size,right_eye_size,gender_encoded
4100,23,0.059314,0.215059,0.272338,0.138321,0.170802,0.208327,0.363157,0.320715,0.075709,0.120775,0.081977,0
10918,30,0.109280,0.246838,0.276281,0.096393,0.077469,0.226573,0.436577,0.287225,0.088484,0.094961,0.055195,0
18463,55,0.103177,0.219777,0.265034,0.154921,0.163515,0.170065,0.465004,0.256334,0.062776,0.099944,0.068208,0
21916,81,0.159627,0.316462,0.270254,0.121778,0.128285,0.167558,0.313542,0.304065,0.042569,0.094993,0.061116,1
18531,55,0.123849,0.312362,0.256673,0.128292,0.123849,0.221118,0.462464,0.284527,0.055779,0.079277,0.044970,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7724,26,0.077680,0.245589,0.275148,0.184076,0.188111,0.264570,0.507873,0.266490,0.073154,0.074265,0.041802,1
19592,60,0.095718,0.241691,0.295744,0.119234,0.125683,0.204369,0.500455,0.269795,0.061828,0.085797,0.045316,1
22248,89,0.095902,0.362697,0.246354,0.103419,0.117721,0.210245,0.333671,0.316169,0.038706,0.100769,0.078481,1
8310,27,0.115715,0.335624,0.260169,0.115715,0.118007,0.183685,0.449557,0.283561,0.040912,0.088316,0.052392,1


In [None]:
clf = MLPClassifier(hidden_layer_sizes=(20), max_iter=100, alpha=0.0001,
                     solver='lbfgs', verbose=10,  random_state=21,tol=0.000000001)
clf.fit(train, train_label)
pred = clf.predict(test)
print(accuracy_score(test_label, pred))

0.9631520532741399


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
df_ethnicity = pd.DataFrame({"Predicted":pred, "Actual" :test_label})
df_ethnicity

Unnamed: 0,Predicted,Actual
0,0,0
1,4,4
2,4,4
3,4,4
4,4,4
...,...,...
4500,3,3
4501,4,4
4502,2,3
4503,4,4


In [None]:
#This shows you the best variables to use for the model
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

bestfeatures = SelectKBest(score_func=chi2, k=13)
fit = bestfeatures.fit(train, train_label)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(train.columns)

featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(20,'Score'))

                Specs         Score
12  ethnicity_encoded  16514.477320
0                 age  13684.261331
13     gender_encoded     50.547690
2            lip2chin     15.707238
4     left_eyelid_len     14.606153
5    right_eyelid_len     14.450770
7       size_of_mouth     14.384268
9    bottom_lip_width     12.536209
6          nose_width     11.098648
11     right_eye_size      2.758162
10      left_eye_size      2.377901
1            nose2lip      1.530402
3    len_between_eyes      1.503128
8            nose_len      0.653892
