## Download images into colab 

## Import libraries

In [2]:
import pandas as pd                                     
import numpy as np                                      
import tensorflow as tf                                 
import os                                               
import cv2                                              
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'pandas'

## Loading and preparing training data


In [None]:
labels = pd.read_csv("/content/content/eye_gender_data/Training_set.csv")   # loading the labels
file_paths = [[fname, '/content/content/eye_gender_data/train/' + fname] for fname in labels['filename']]
images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
train_data = pd.merge(images, labels, how = 'inner', on = 'filename')

data = []     
image_size = 32      # image size taken is 32 here. 

for i in range(len(train_data)):
  
  img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE)   # converting the image to gray scale
  new_img_array = cv2.resize(img_array, (image_size, image_size))      # resizing the image array
  data.append(new_img_array) 

## Data Pre-processing
It is necessary to bring all the images to the same shape and size as defined by the VGG-19 model. Also, convert all the labels from categorical to numerical values.

In [None]:
arr = np.array(data)

In [None]:
arr

In [None]:
arr.shape

In [None]:
# reshape into 3 channels for feeding into the model
train_images_3ch = np.stack([arr]*3, axis=-1)
print('\nTrain_images.shape: {}, of {}'.format(train_images_3ch.shape, train_images_3ch.dtype))

In [None]:
# normalisation
train_images_scaled = train_images_3ch / 255.

In [None]:
# encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_labels = le.fit_transform(train_data[['label']])

In [None]:
train_labels.shape

## Building Model & Hyperparameter tuning


In [None]:
# Build VGG19 Model

# define input shape
INPUT_SHAPE = (32, 32, 3)

# get the VGG19 model
vgg_layers = tf.keras.applications.vgg19.VGG19(weights='imagenet', include_top=False, 
                                               input_shape=INPUT_SHAPE) 

vgg_layers.summary()

In [None]:
# Fine-tune all the layers
for layer in vgg_layers.layers:
    layer.trainable = True 

# Check the trainable status of the individual layers
for layer in vgg_layers.layers:
    print(layer, layer.trainable)

In [None]:
# Build CNN model on top of VGG19

# define sequential model
model = tf.keras.models.Sequential()

# Add the vgg convolutional base model
model.add(vgg_layers)

# add flatten layer
model.add(tf.keras.layers.Flatten())

# add dense layers with some dropout
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.3))

# add output layer
model.add(tf.keras.layers.Dense(1, activation='sigmoid')) #single node with ‘sigmoid‘ activation

# compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# view model layers
model.summary()

In [None]:
EPOCHS = 30

es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, 
                                               restore_best_weights=True,
                                               verbose=1)

history = model.fit(train_images_scaled, train_labels,
                    batch_size=256,
                    callbacks=[es_callback], 
                    validation_split=0.1, epochs=EPOCHS, 
                    verbose=1)

## Validate the model


In [None]:
# Plot Learning Curves
fig, ax = plt.subplots(1, 2, figsize=(10, 4))

history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot(kind='line', ax=ax[0])
history_df[['accuracy', 'val_accuracy']].plot(kind='line', ax=ax[1]);

In [None]:
# saving the model
save_dir = "/results/"
model_name = 'eye_model.h5'
model.save(model_name)
model_path = save_dir + model_name
print('Saved trained model at %s ' % model_path)

## Predict The Output For Testing Dataset

## Load Test Set

In [None]:
test_data = pd.read_csv("/content/content/eye_gender_data/Testing_set.csv" )
file_paths_test = [[fname, '/content/content/eye_gender_data/test/' + fname] for fname in test_data['filename']]
images_test = pd.DataFrame(file_paths_test, columns=['filename', 'filepaths'])

new_test_data = []
#image_size = 32 

for i in range(len(images_test)):
  
  img_array_test = cv2.imread(images_test['filepaths'][i], cv2.IMREAD_GRAYSCALE)   # converting the image to gray scale
  new_img_array_test = cv2.resize(img_array_test, (image_size, image_size))      # resizing the image array
  new_test_data.append(new_img_array_test)

## Data Pre-processing on test_data


In [None]:
test_arr = np.array(new_test_data)

In [None]:
test_arr

In [None]:
test_arr.shape

In [None]:
# reshape into 3 channels for feeding into the model
test_images_3ch = np.stack([test_arr]*3, axis=-1)
print('\nTest_images.shape: {}, of {}'.format(test_images_3ch.shape, test_images_3ch.dtype))

In [None]:
# normalisation
test_images_scaled = test_images_3ch / 255.

## Make Prediction on Test Dataset

In [None]:
test_predictions = model.predict(test_images_scaled)

In [None]:
images_test['predictprobability'] = test_predictions

In [None]:
def convert_to_label (row):
  if row['predictprobability'] >=0.5:
    return 'male'
  else:
    return 'female'

images_test['predictlabel'] = images_test.apply(convert_to_label, axis=1)

In [None]:
images_test.head()

In [None]:
images_test.tail()

In [None]:
# Save prediction results locally via colab notebook
images_test.to_csv("images_test.csv", index = False) 

# To download the csv file locally
from google.colab import files        
files.download('images_test.csv')