In [1]:
# import necessary libraries

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input

ModuleNotFoundError: No module named 'cv2'

In [None]:
# set paths
data_path = "D:/Permanent"

In [None]:
# define image size
IMG_SIZE = (256, 256)

In [None]:
def clean_images(data_dir):
    for img_name in os.listdir(data_dir):
        img_path = os.path.join(data_dir, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, IMG_SIZE)
        img = cv2.equalizeHist(img)
        img = img / 255.0
        cv2.imwrite(img_path, img*255)

In [None]:
# preprocess data
def preprocess_data(data_dir):
    images = []
    ages = []
    for img_name in os.listdir(data_dir):
        # extract age and name from image name
        age = int(img_name.split("_")[1])
        name = img_name.split("_")[0] + "_" + str(age) + "_.jpg"
        # rename image file
        old_path = os.path.join(data_dir, img_name)
        new_path = os.path.join(data_dir, name)
        os.rename(old_path, new_path)
        # read, resize, and normalize image
        img = cv2.imread(new_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, IMG_SIZE)
        img = img / 255.0
        images.append(img)
        ages.append(age)
    images = np.array(images)
    ages = np.array(ages)
    return images, ages

In [None]:
# split dataset into training and testing sets
def split_dataset(data_dir, test_size=0.2, random_state=42):
    # get list of image names
    img_names = os.listdir(data_dir)
    # split into training and testing sets
    train_names, test_names = train_test_split(img_names, test_size=test_size, random_state=random_state)
    # create directories for training and testing sets
    train_dir = os.path.join(data_dir, "train")
    test_dir = os.path.join(data_dir, "test")
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    # move images to respective directories
    for name in train_names:
        src_path = os.path.join(data_dir, name)
        dst_path = os.path.join(train_dir, name)
        os.rename(src_path, dst_path)
    for name in test_names:
        src_path = os.path.join(data_dir, name)
        dst_path = os.path.join(test_dir, name)
        os.rename(src_path, dst_path)

In [None]:
# split dataset into training and testing sets
#split_dataset(data_path)

In [None]:
# clean training data
train_path = os.path.join(data_path, "train")
clean_images(train_path)

# Read the image from file
img_path = 'D:/Permanent/train/1_2_.jpg'
img = cv2.imread(img_path)

# Display the image
plt.imshow(img)
plt.show()

In [None]:
# clean testing data
test_path = os.path.join(data_path, "test")
clean_images(test_path)

# Read the image from file
img_path = 'D:/Permanent/test/0_6_.jpg'
img = cv2.imread(img_path)

# Display the image
plt.imshow(img)
plt.show()

In [None]:
# preprocess training data
#train_path = os.path.join(data_path, "train")
#train_images, train_ages = preprocess_data(train_path)

In [None]:
# preprocess testing data
#test_path = os.path.join(data_path, "test")
#test_images, test_ages = preprocess_data(test_path)

In [None]:
# Define the path to the training dataset
#data_dir = 'D:/Permanent/train'

# Define the rotation angles to use for data augmentation
#angles = [-10, -5, 5, 10]

# Loop through the images in the dataset
#for file_name in os.listdir(data_dir):
    # Load the image
    #img_path = os.path.join(data_dir, file_name)
    #img = cv2.imread(img_path)

    # Loop through the rotation angles
    #for angle in angles:
        # Apply rotation to the image
        #rows, cols, _ = img.shape
        #M = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
        #rotated_img = cv2.warpAffine(img, M, (cols, rows))

        # Save the rotated image with new file name
        #new_file_name = file_name.split('.')[0] + '_rotated_' + str(angle) + '_.jpg'
        #new_img_path = os.path.join(data_dir, new_file_name)
        #cv2.imwrite(new_img_path, rotated_img)

In [None]:
train_dir= 'D:/Permanent/train'

In [None]:
image_paths = []
age_labels = []
gender_labels = []
for filename in os.listdir(train_dir):
    image_path = os.path.join(train_dir, filename)
    temp = filename.split('_')
    gender = int(temp[0])
    age = int(temp[1])
    image_paths.append(image_path)
    age_labels.append(age)
    gender_labels.append(gender)

In [None]:
df = pd.DataFrame()
df['image'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head()

In [None]:
# map labels for gender
gender_dic = {0:'Male', 1:'Female'}

In [None]:
#Exploratory Data Analysis

In [None]:
from PIL import Image
img = Image.open(df['image'][0])
plt.axis('off')
plt.imshow(img);

In [None]:
sns.distplot(df['age'])

In [None]:
# to display grid of images

In [None]:
plt.figure(figsize=(20,20))
files = df.iloc[0:25]

for index, file, age, gender in files.itertuples():
    plt.subplot(5,5, index+1)
    img = load_img(file)
    img = np.array(img)
    plt.imshow(img)
    plt.title(f"Age:{age} Gender: {gender_dic[gender]}")
    plt.axis('off')

In [None]:
def extract_features(images):
    features = []
    for image in images:
        img = load_img(image, grayscale = True)
        img = img.resize((128,128),Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)
    
    features = np.array(features)
    features= features.reshape(len(features),128,128,1)
    return features

In [None]:
X= extract_features(df['image'])

In [None]:
X.shape

In [None]:
# normalize the image
X = X/255.0

In [None]:
y_gender = np. array(df['gender'])
y_age = np.array(df['age'])

In [None]:
input_shape = (128, 128, 1)

In [None]:
# Model Creation

In [None]:
inputs = Input((input_shape))
#convolutional layers
conv_1 = Conv2D(32, kernel_size=(3, 3),activation = 'relu') ( inputs)
maxp_1  = MaxPooling2D(pool_size = (2, 2))(conv_1)
conv_2 = Conv2D(64, kernel_size=(3, 3),activation = 'relu') ( maxp_1)
maxp_2  = MaxPooling2D(pool_size = (2, 2))(conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3),activation = 'relu') ( maxp_2)
maxp_3  = MaxPooling2D(pool_size = (2, 2))(conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3),activation = 'relu') ( maxp_3)
maxp_4  = MaxPooling2D(pool_size = (2, 2))(conv_4)

flatten = Flatten() (maxp_4)
# Fully connected layers
dense_1 = Dense(256, activation = 'relu')(flatten)
dense_2 = Dense(256, activation = 'relu')(flatten)

dropout_1 = Dropout(0.3) (dense_1)
dropout_2 = Dropout(0.3) (dense_2)

output_1 = Dense(1, activation = 'sigmoid', name = 'gender_out') (dropout_1)
output_2 = Dense(1, activation = 'relu', name = 'age_out') (dropout_2)

model = Model(inputs=[inputs], outputs =[output_1, output_2])

model.compile(loss = ['binary_crossentropy','mae'], optimizer= 'adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
# train model
history = model.fit(x=X, y=[y_gender,y_age], batch_size = 32, epochs = 30, validation_split=0.2)

In [None]:
# Plot results for Gender
acc = history.history['gender_out_accuracy']
val_acc = history.history['val_gender_out_accuracy']
epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label= 'Training Accuracy')
plt.plot(epochs, val_acc, 'r', label= 'Validation Accuracy')
plt.title('Accuracy Graph')
plt.legend()
plt.figure()

loss = history.history['gender_out_loss']
val_loss = history.history['val_gender_out_loss']

plt.plot(epochs, loss, 'b', label= 'Training Loss')
plt.plot(epochs, val_loss, 'r', label= 'Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.figure()

In [None]:
# Plot for Age
loss = history.history['age_out_loss']
val_loss = history.history['val_age_out_loss']
epochs = range(len(loss))

plt.plot(epochs, loss, 'b', label= 'Training Loss')
plt.plot(epochs, val_loss, 'r', label= 'Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.figure()

In [None]:
image_index=200
print("Original Gender:" ,gender_dic[y_gender[image_index]], "Original Age:", y_age[image_index])
# predict from model
pred = model.predict(X[image_index].reshape(1,128,128,1))
pred_gender = gender_dic[round(pred[0][0][0])]
pred_age = round(pred[1][0][0])
print("Predicted Gender:", pred_gender, "Predicted Age:", pred_age)
plt.axis('off')
plt.imshow(X[image_index].reshape(128,128), cmap='gray')