In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D

In [2]:
df = pd.read_csv('age_gender.csv');

In [3]:
X = pd.DataFrame(df['pixels'].str.split(' ', 48*48-1, expand=True)).astype(float)
y = df['ethnicity']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
X_train = X_train / 255
X_test = X_test / 255
num_pixels = X_test.shape[1]

In [6]:
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

In [7]:
def acrcy(y_predicted, y_actual):
    y_predicted = np.array(pd.DataFrame(y_predicted).idxmax(1))
    y_actual = np.array(pd.DataFrame(y_actual).idxmax(1))
    y_actl = pd.Series(y_actual, name='Actual')
    y_pred = pd.Series(y_predicted, name='Predicted')
    #create confusion matrix
    print(pd.crosstab(y_actl, y_pred))
    #print accuracy of model
    print("acccuracy: " + str(metrics.accuracy_score(y_actual, y_predicted)))

In [8]:
# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [9]:
model = baseline_model()
model.fit(X_train, y_train, validation_split=0.25, batch_size=128, epochs=8, verbose=2)

Epoch 1/8
94/94 - 8s - loss: 3.2152 - accuracy: 0.4616 - val_loss: 1.2030 - val_accuracy: 0.5387
Epoch 2/8
94/94 - 4s - loss: 1.0668 - accuracy: 0.6071 - val_loss: 1.1013 - val_accuracy: 0.5820
Epoch 3/8
94/94 - 4s - loss: 1.0481 - accuracy: 0.6199 - val_loss: 1.0299 - val_accuracy: 0.6336
Epoch 4/8
94/94 - 4s - loss: 0.9955 - accuracy: 0.6381 - val_loss: 1.1410 - val_accuracy: 0.5633
Epoch 5/8
94/94 - 4s - loss: 0.9563 - accuracy: 0.6581 - val_loss: 1.0477 - val_accuracy: 0.6356
Epoch 6/8
94/94 - 4s - loss: 0.9598 - accuracy: 0.6633 - val_loss: 1.0811 - val_accuracy: 0.6273
Epoch 7/8
94/94 - 4s - loss: 0.9163 - accuracy: 0.6799 - val_loss: 0.9711 - val_accuracy: 0.6401
Epoch 8/8
94/94 - 4s - loss: 0.8950 - accuracy: 0.6805 - val_loss: 0.9764 - val_accuracy: 0.6547


<keras.callbacks.History at 0x1fcc7ea5f70>

In [10]:
res = model.predict(X_test)

In [11]:
acrcy(res, y_test)

Predicted     0    1    2    3  4
Actual                           
0          3141   52  116   66  0
1           529  812   60   73  0
2           413   13  684   12  0
3           728   42   56  470  0
4           449   27   37   42  1
acccuracy: 0.6529464399846606


In [12]:
X_train = X_train.values.reshape(X_train.shape[0], 48, 48, 1).astype('float32')
X_test = X_test.values.reshape(X_test.shape[0], 48, 48, 1).astype('float32')

In [13]:
def cnn_model():
    # create model
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(48, 48, 1), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(160, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [14]:
model = cnn_model()
model.fit(X_train, y_train, validation_split=0.2, batch_size=128, epochs=12, verbose=2)

Epoch 1/12
100/100 - 11s - loss: 1.2828 - accuracy: 0.5100 - val_loss: 1.0530 - val_accuracy: 0.6062
Epoch 2/12
100/100 - 11s - loss: 0.9680 - accuracy: 0.6544 - val_loss: 0.8403 - val_accuracy: 0.7114
Epoch 3/12
100/100 - 11s - loss: 0.8385 - accuracy: 0.7058 - val_loss: 0.8032 - val_accuracy: 0.7240
Epoch 4/12
100/100 - 11s - loss: 0.7757 - accuracy: 0.7331 - val_loss: 0.7771 - val_accuracy: 0.7343
Epoch 5/12
100/100 - 11s - loss: 0.7267 - accuracy: 0.7498 - val_loss: 0.7198 - val_accuracy: 0.7561
Epoch 6/12
100/100 - 11s - loss: 0.6835 - accuracy: 0.7627 - val_loss: 0.7089 - val_accuracy: 0.7605
Epoch 7/12
100/100 - 11s - loss: 0.6486 - accuracy: 0.7762 - val_loss: 0.6975 - val_accuracy: 0.7624
Epoch 8/12
100/100 - 11s - loss: 0.6252 - accuracy: 0.7843 - val_loss: 0.6989 - val_accuracy: 0.7586
Epoch 9/12
100/100 - 11s - loss: 0.5998 - accuracy: 0.7948 - val_loss: 0.7076 - val_accuracy: 0.7617
Epoch 10/12
100/100 - 12s - loss: 0.5679 - accuracy: 0.8057 - val_loss: 0.6925 - val_accura

<keras.callbacks.History at 0x1fbaccd5b80>

In [15]:
res = model.predict(X_test)

In [16]:
acrcy(res, y_test)

Predicted     0     1    2    3   4
Actual                             
0          3035    81  101  145  13
1           208  1160   30   64  12
2           188    29  871   25   9
3           285    73   28  887  23
4           350    47   28   96  35
acccuracy: 0.7654352550172568
