Loading Libraries

In [238]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import cv2
import sklearn
import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

Loading and preparing training data

In [239]:
#label dataset
labels = pd.read_csv('eye_gender_data/Training_set.csv')

file_paths = [[fname, "eye_gender_data/train/" + fname] for fname in labels["filename"]]

#convert filepaths to dataframe
images = pd.DataFrame(file_paths, columns=["filename", "filepaths"])

#create train df
train_data = pd.merge(images, labels, how="inner", on="filename")

In [240]:
#check data
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,eye_gender_data/train/Image_1.jpg,male
1,Image_2.jpg,eye_gender_data/train/Image_2.jpg,female
2,Image_3.jpg,eye_gender_data/train/Image_3.jpg,female
3,Image_4.jpg,eye_gender_data/train/Image_4.jpg,female
4,Image_5.jpg,eye_gender_data/train/Image_5.jpg,male


In [248]:
#convert labels to 0, 1
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_data['label'] = le.fit_transform(train_data['label'])

In [296]:
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,eye_gender_data/train/Image_1.jpg,1
1,Image_2.jpg,eye_gender_data/train/Image_2.jpg,0
2,Image_3.jpg,eye_gender_data/train/Image_3.jpg,0
3,Image_4.jpg,eye_gender_data/train/Image_4.jpg,0
4,Image_5.jpg,eye_gender_data/train/Image_5.jpg,1


Data Pre-processing

In [249]:
#prepare data
#turn all images into array and all the same size
data = []

for i in range(len(train_data)):
    img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE)
    new_img_array = cv2.resize(img_array, (100,100))
    data.append([new_img_array, train_data["label"][i]])
    #data_list.append([train_resized_img])

#check images as numerical values
print(data[:3])

[[array([[188, 188, 189, ..., 176, 175, 175],
       [189, 189, 188, ..., 174, 173, 172],
       [190, 189, 188, ..., 168, 167, 167],
       ...,
       [133, 137, 144, ..., 168, 167, 166],
       [134, 138, 145, ..., 165, 164, 163],
       [135, 139, 146, ..., 163, 162, 162]], dtype=uint8), 1], [array([[167, 169, 173, ..., 194, 195, 195],
       [168, 170, 173, ..., 193, 194, 195],
       [171, 171, 173, ..., 192, 193, 194],
       ...,
       [183, 185, 189, ..., 199, 197, 196],
       [183, 186, 189, ..., 199, 197, 195],
       [184, 186, 190, ..., 199, 196, 195]], dtype=uint8), 0], [array([[181, 179, 177, ..., 131, 134, 136],
       [178, 177, 174, ..., 127, 127, 128],
       [174, 172, 170, ..., 120, 118, 116],
       ...,
       [126, 128, 132, ...,  99,  96,  94],
       [127, 130, 133, ...,  98,  95,  93],
       [128, 131, 135, ...,  98,  94,  92]], dtype=uint8), 0]]


In [250]:
#shuffle data
np.random.shuffle(data)

In [251]:
#separate image and labels
X = []
y = []
for image in data:
    X.append(image[0])
    y.append(image[1])

#convert x and y to np.array
X = np.array(X)
y = np.array(y)

In [252]:
np.unique(y, return_counts=True)

(array([0, 1]), array([4162, 5058]))

In [253]:
#make all images the same shape and size
X = X.reshape(-1, 100, 100, 1)

#split into train and val (80-20)
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=42)

Building Model & Hyperparameter Tuning

In [282]:
#define CNN model architecture

model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(100,100,1)),
        tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(rate=0.25),

        tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
        tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(rate=0.25),

        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2,2)),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(rate=0.5),
        tf.keras.layers.Dense(150, activation='relu'),
        tf.keras.layers.Dropout(rate=0.3),
        tf.keras.layers.Dense(2, activation='softmax')
    ])

In [283]:
model.compile(loss='sparse_categorical_crossentropy', #trying categorical instead of binary
              optimizer='adam',
              metrics=['accuracy'])

In [284]:
#fit model
model.fit(X_train, y_train,
                    batch_size=30,
                    epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x164c61360>

Validate the model

In [285]:
#validate model
model.evaluate(X_val, y_val)



[0.25128212571144104, 0.8975054025650024]

Data Pre-processing on test_data

In [286]:
#load the order of the image's name that has been provided
test_image_order = pd.read_csv('eye_gender_data/Testing_set.csv')
test_image_order.head()

Unnamed: 0,filename
0,Image_1.jpg
1,Image_2.jpg
2,Image_3.jpg
3,Image_4.jpg
4,Image_5.jpg


In [287]:
#get images file path
file_paths = [[fname, 'eye_gender_data/test/' + fname] for fname in test_image_order['filename']]

In [288]:
#convert filepaths to dataframe
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,eye_gender_data/test/Image_1.jpg
1,Image_2.jpg,eye_gender_data/test/Image_2.jpg
2,Image_3.jpg,eye_gender_data/test/Image_3.jpg
3,Image_4.jpg,eye_gender_data/test/Image_4.jpg
4,Image_5.jpg,eye_gender_data/test/Image_5.jpg


In [289]:
test_pixel_data = []

for i in range(len(test_images)):
    img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE)
    new_img_array = cv2.resize(img_array, (100,100))
    test_pixel_data.append(new_img_array)

In [290]:
#turn data list into array
test_pixel_data = np.array(test_pixel_data)

#make images all same shape and size
test_pixel_data = test_pixel_data.reshape(-1, 100, 100, 1)

Make Prediction on Test Dataset

In [291]:
#make predictions on test data
pred = model.predict(test_pixel_data)

#check probability values
pred[:3]

array([[2.9565985e-04, 9.9970430e-01],
       [7.5970501e-01, 2.4029496e-01],
       [2.7191434e-02, 9.7280854e-01]], dtype=float32)

In [292]:
#convert probability values into respective classes
prediction = []
for value in pred:
    prediction.append(np.argmax(value))

In [293]:
#re-process labels
predictions = le.inverse_transform(prediction)

In [295]:
#res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions})
res = pd.DataFrame({'label': predictions})
res.to_csv("submission.csv", index = False)