**Implementing a Neural Net**

In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.feature_extraction import DictVectorizer

from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np

In [2]:
from google.colab import files
uploaded = files.upload()


ModuleNotFoundError: No module named 'google.colab'

In [None]:
import io
face_recog = pd.read_csv(io.BytesIO(uploaded['to_neural_net.csv']))

In [None]:
face_recog['age_bins'] = pd.cut(x=face_recog['age'], 
                                bins=[0, 9, 29, 39, 49, 59, 69, 79, 89, 99, 130])
face_recog['age_encoded'] = pd.cut(x=face_recog['age'], 
                                bins=[0, 9, 29, 39, 49, 59, 69, 79, 89, 99, 130], 
                                labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
face_recog.head()

In [None]:
#Split the data into train test

train, test = train_test_split(face_recog, test_size=0.2)

**Use Neural Net to Predict Age**

In [None]:
#Use the encoded age for train and test labels
train_label = train["age_encoded"].values
test_label = test["age_encoded"].values
test_bins = test["age_bins"].values


In [None]:
#These are the bins for age
test_bins

In [None]:
#drop all the unused columns for train and test data
train = train.drop(columns= ["age", "path", "landmarks", "age_bins", "age_encoded"])
test = test.drop(columns= ["age", "path", "landmarks", "age_bins", "age_encoded"])


In [None]:
train

In [None]:
#encode ethnicity and gender variables for train data
encoder_e = LabelEncoder()
encoder_e.fit(train["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(train["gender"])
encoder_g.classes_

e_train = encoder_e.transform(train["ethnicity"])
g_train = encoder_g.transform(train["gender"])
train.insert(13, "ethnicity_encoded", e_train)
train.insert(14, "gender_encoded", g_train)
train = train.drop(columns= ["ethnicity", "gender"])


In [None]:
#encode ethnicity and gender variables for train data
encoder_e = LabelEncoder()
encoder_e.fit(test["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(test["gender"])
encoder_g.classes_

e_test = encoder_e.transform(test["ethnicity"])
g_test = encoder_g.transform(test["gender"])
test.insert(13, "ethnicity_encoded", e_test)
test.insert(14, "gender_encoded", g_test)
test = test.drop(columns= ["ethnicity", "gender"])

In [None]:
test

In [None]:
#run a neural net 

clf = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=100, alpha=0.0001,
                     solver='lbfgs', verbose=10,  random_state=21,tol=0.000000001)
clf.fit(train, train_label)
pred = clf.predict(test)
print(accuracy_score(test_label, pred))

Since the accuracy is really low, we decided to look more into different methods to differentiate age. Furthermore, with the additional features, I think we can run a neural net to predict ethnicity. We can probably find better results from predicting ethnicity. 


In [None]:
#This shows you the best variables to use for the model
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

bestfeatures = SelectKBest(score_func=chi2, k=13)
fit = bestfeatures.fit(train, train_label)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(train.columns)

featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(20,'Score'))

In [None]:
pd.DataFrame({"Predictions":pred, "Actual":test_label, "Bins": test_bins})

**Using Neural Net to Predict ethnicity**

In [None]:
import random
random.seed(20)
train, test = train_test_split(face_recog, test_size=0.2)

In [None]:
train = train.drop(columns= ["path", "landmarks", "age_bins", "age_encoded"])
test = test.drop(columns= ["path", "landmarks", "age_bins", "age_encoded"])


In [None]:
train

In [None]:
encoder_e = LabelEncoder()
encoder_e.fit(train["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(train["gender"])
encoder_g.classes_

e_train = encoder_e.transform(train["ethnicity"])
g_train = encoder_g.transform(train["gender"])
train.insert(14, "ethnicity_encoded", e_train)
train.insert(15, "gender_encoded", g_train)
train = train.drop(columns= ["ethnicity", "gender"])



In [None]:
encoder_e = LabelEncoder()
encoder_e.fit(test["ethnicity"])
encoder_e.classes_

encoder_g = LabelEncoder()
encoder_g.fit(test["gender"])
encoder_g.classes_

e_test = encoder_e.transform(test["ethnicity"])
g_test = encoder_g.transform(test["gender"])
test.insert(14, "ethnicity_encoded", e_test)
test.insert(15, "gender_encoded", g_test)
test = test.drop(columns= ["ethnicity", "gender"])

In [None]:
train_label = train["ethnicity_encoded"].values
test_label = test["ethnicity_encoded"].values



In [None]:
train_label

In [None]:
train.drop(columns = ["ethnicity_encoded"])
test.drop(columns=["ethnicity_encoded"])

In [None]:
clf = MLPClassifier(hidden_layer_sizes=(20), max_iter=100, alpha=0.0001,
                     solver='lbfgs', verbose=10,  random_state=21,tol=0.000000001)
clf.fit(train, train_label)
pred = clf.predict(test)
print(accuracy_score(test_label, pred))

In [None]:
df_ethnicity = pd.DataFrame({"Predicted":pred, "Actual" :test_label})
df_ethnicity

In [None]:
#This shows you the best variables to use for the model
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

bestfeatures = SelectKBest(score_func=chi2, k=13)
fit = bestfeatures.fit(train, train_label)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(train.columns)

featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(20,'Score'))