<a href="https://colab.research.google.com/github/karthikreddyi12/Age-and-gender-prediction/blob/main/Age_and_Gender_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'utkface-new:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F44109%2F78156%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240819%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240819T051226Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D8fbf366c3ecda89be327b81c4d0c763ef71aee2be7a477f48af398ded836fdfa40f5e93983f82b16974b3ec8d2e936fde774492f4abe1c4a7b29092c3aa3c51f2e6a1bf1b9f0f8e96039a82097d70cc5c694ee3f5beb6c1cb664d8fd1d50d7ac4834e574798c478c6a73b88c05823b70db00e6d80095f4aab11f9f440d514700a3b41de5cd6022fc8840a69b2b2585d1d11b310231fec9df76ec1293547d1becb64f4e104c36261f2d4dcb3228de1c3eb9610b8eedb78911113644b2931540ce720a95b475be440b221f7683e0b302d1e9c07140c90d6a6beabcfe1beabdbcdbe79b15dc7a8a6805b24edc995b0577fc228cb6305237b76fd7fbe2b353e8f788'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


## Import the Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dropout, Flatten, BatchNormalization, Dense, MaxPooling2D, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Activation, Add
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, Adagrad, Adadelta, Adamax, RMSprop, SGD

## Extract data from the dataset

In [None]:
filedr = '/kaggle/input/utkface-new/UTKFace'

In [None]:
import os
files = os.listdir(filedr)

### Create List of Images, Age and Gender from the dataset  

In [None]:
ages=[]
genders=[]
images=[]

for fl in files:
    age = int(fl.split('_')[0])
    gender = int(fl.split('_')[1])
    total = filedr + '/' + fl
    print(total)
    image = cv2.imread(total)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image,(48,48))
    images.append(image)
    ages.append(age)
    genders.append(gender)

#### Viewing a sample

In [None]:
plt.imshow(images[7])

### Saving images array,ages array, and genders array in the system

In [None]:
images_f=np.array(images)
ages_f=np.array(ages)
genders_f=np.array(genders)

In [None]:
np.save('/kaggle/working/image.npy',images_f)
np.save('/kaggle/working/ages.npy',ages_f)
np.save('/kaggle/working/genders.npy',genders_f)

#### Finding the no. of Male and Female samples respectively

In [None]:
np.unique(genders_f,return_counts=True)

#### Plotting the No. of Male and Female Samples

In [None]:
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
gender=['Male','Female']
values=[12391,11317]
ax.bar(gender,values)
plt.show()

#### Finding The no. of samples in each age

In [None]:
values, count=np.unique(ages_f,return_counts=True)
print(count)

In [None]:
count = count.tolist()

#### Plotting The Samples Agewise

In [None]:
plt.plot(count)
plt.xlabel("Ages")
plt.ylabel('Distribution')
plt.show()

### Defining Labels for Output

In [None]:
labels=[]
i=0
while i<len(ages):
    label=[]
    label.append(ages[i])
    label.append(genders[i])
    labels.append(label)
    i=i+1

In [None]:
images_f_2=images_f/255
images_f_2.shape

In [None]:
labels_f=np.array(labels)

## Splitting The Dataset into test and train

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(images_f_2,labels_f,test_size=0.25)

In [None]:
Y_train[0:5]

In [None]:
Y_train_2=[Y_train[:,1],Y_train[:,0]]
Y_test_2=[Y_test[:,1],Y_test[:,0]]

In [None]:
Y_train_2[0][0:5]

In [None]:
Y_train_2[1][0:5]

## Defining the Model

In [None]:
def Convolution(input_tensor,filters):
    x=Conv2D(filters=filters,kernel_size=(3,3),padding="same",strides=(1,1),kernel_regularizer=l2(0.001))(input_tensor)
    x=Dropout(0.1)(x)
    x=Activation('relu')(x)
    return x

In [None]:
def model(input_shape):
    inputs=Input((input_shape))
    conv_1=Convolution(inputs,32)
    maxp_1=MaxPooling2D(pool_size=(2,2))(conv_1)
    conv_2=Convolution(maxp_1,64)
    maxp_2=MaxPooling2D(pool_size=(2,2))(conv_2)
    conv_3=Convolution(maxp_2,128)
    maxp_3=MaxPooling2D(pool_size=(2,2))(conv_3)
    conv_4=Convolution(maxp_3,256)
    maxp_4=MaxPooling2D(pool_size=(2,2))(conv_4)
    flatten= Flatten()(maxp_4)
    dense_1=Dense(64,activation='relu')(flatten)
    dense_2=Dense(64,activation='relu')(flatten)
    drop_1=Dropout(0.2)(dense_1)
    drop_2=Dropout(0.2)(dense_2)
    output_1=Dense(1,activation='sigmoid',name='sex_out')(drop_1)
    output_2=Dense(1,activation='relu',name='age_out')(drop_2)
    model=Model(inputs=[inputs],outputs=[output_1,output_2])
    model.compile(loss=["binary_crossentropy","mae"],optimizer='rmsprop',metrics=["accuracy",'mse'])
    return model

In [None]:
model_ag=model((48,48,3))

In [None]:
model_ag.summary()

## Initializing the Model

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
fle_s='Age_Sex_Detection.keras'
checkpoint=ModelCheckpoint(fle_s,monitor='val_sex_out_accuracy',verbose=1,save_best_only=True,save_weights_only=False, mode="max",save_freq='epoch')
Early_stop=tf.keras.callbacks.EarlyStopping(patience=50,monitor='val_sex_out_accuracy',restore_best_weights='True',mode='max')
callback_list=[checkpoint,Early_stop]

In [None]:
history_ag=model_ag.fit(X_train,Y_train_2,batch_size=128,validation_data=(X_test,Y_test_2),epochs=150, callbacks=callback_list)

## Model Evaluation

In [None]:
model_ag.evaluate(X_test,Y_test_2)

In [None]:
pred=model_ag.predict(X_test)

In [None]:
pred[1]

## Plotting Loss

In [None]:
plt.plot(history_ag.history['loss'])
plt.plot(history_ag.history['val_loss'])
plt.title('Model Loss')
plt.xlabel=('Epoch')
plt.ylabel=('Loss')
plt.legend(['Train','Validation'],loc='upper left')
plt.subplots_adjust(top=1.0,bottom=0.0,right=0.95,left=0,hspace=0.25,wspace=0.35)

## Plotting Sex Accuracy

In [None]:
plt.plot(history_ag.history['sex_out_accuracy'])
plt.plot(history_ag.history['val_sex_out_accuracy'])
plt.title('Model Accuracy')
plt.xlabel=('Epoch')
plt.ylabel=('Accuracy')
plt.legend(['Train','Validation'],loc='upper left')
plt.subplots_adjust(top=1.0,bottom=0.0,right=0.95,left=0,hspace=0.25,wspace=0.35)
plt.grid()

## Plotting Gender Accuracy

In [None]:
fig,ax=plt.subplots()
ax.scatter(Y_test_2[1],pred[1])
ax.plot([Y_test_2[1].min(),Y_test_2[1].max()],[Y_test_2[1].min(),Y_test_2[1].max()],'k--',lw=4)
ax.set_xlabel('Actual Age')
ax.set_ylabel('Predicted Age')
plt.show()

## Making Report of the Model (Gender Only)

In [None]:
i=0
Pred_l=[]
while(i<len(pred[0])):
    Pred_l.append(int(np.round(pred[0][i])))
    i=i+1

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
report=classification_report(Y_test_2[0],Pred_l)
print(report)

## Making Heatmap of the Model (Gender only)

In [None]:
results=confusion_matrix(Y_test_2[0],Pred_l)

In [None]:
print(results)

In [None]:
import seaborn as sns
sns.heatmap(results,annot=True, fmt=".0f")

## Testing some sample images

In [None]:
def test_image(ind,images_f,images_f_2,model):
    plt.imshow(images_f[ind])
    image_test=images_f_2[ind]
    pred_l=model.predict(np.array([image_test]))
    sex_f=['Male','Female']
    age=int(np.round(pred_l[1][0]))
    sex=int(np.round(pred_l[0][0]))
    print("Predicted Age is "+ str(age))
    print("Predicted Gender is "+ sex_f[sex])

In [None]:
test_image(45,images_f,images_f_2,model_ag)

In [None]:
test_image(123,images_f,images_f_2,model_ag)

In [None]:
test_image(543,images_f,images_f_2,model_ag)

In [None]:
test_image(2343,images_f,images_f_2,model_ag)