Topic - Peer Group Prediction

Details about the data:
Human face image files are available, all the images are split based on the ages. 6 age groups are available, it varies from 20 to 49. In each age group multiple image files are available.


In [None]:
# importing the libraries
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tensorflow. keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
from pathlib import Path


Read training images and preprocess the data and split the features and the targets

In [None]:
def preprocess_train_data():
   # assigning the path and list of age categories
   # set the path of the folder where the images stored
    data_dir = "/content/drive/MyDrive/AGE DATASET/train_5batch_modified/"
   # set the list of age categories available in the folder
    categories = ["2024","2529","3034","3539","4044","4549"]
   
   # analyze the images
    for item_ in categories:
      # assigning the location of the images to a variable , 6 classes
        path = os.path.join(data_dir, item_)  
        # iterate over each images, considering all the age groups
        for img in os.listdir(path):  
          # read one by one image
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)  # convert to array
            # plot the image
            # plt.imshow(img_array, cmap='gray')  
            # plt.show()  # displays the last image!
            break  # executing one at a time
        break  #break the for loop 
    # checking the array shape
    print(img_array.shape) #(128, 128)
    # set an image size
    IMG_SIZE = 50
    #initialize new array to keep all the training data
    training_data = []
    
    # function to read all the images and extract the training images and corresponding classes
    def generate_training_data():
      # iterate through all the age folder and through each image
        for item_ in categories:  
          # access the location to individual images based on the age group
            path = os.path.join(data_dir,item_) 
            # get the list of available classes based each age group 
            class_num = categories.index(item_)  
            
            # iterate over each image per age folder
            for img in (os.listdir(path)):  
                try:
                    # read one by one images and convert it into the grayscale format, which is an array
                    img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)  
                    # resize the image; means it helps to normalize data size
                    new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  
                    # adding the extracted trained data and corresponding classes to a list
                    training_data.append([new_array, class_num])  
                # skip the errors, if there is any possibility to occur
                except Exception as e:  
                    pass

    generate_training_data()
    #checking the total size of the training data
    print(len(training_data))
        
    #initialize an empty list to add the list of training features
    feature = []
    #initialize an empty list to add the list of training targets
    target = []
    
    # iterating through the set of training features and targets
    for features,label in training_data:
        # extracted feature from the training_data will be added to the 'feature' variable
        feature.append(features)
        # extracted target from the training_data will be added to the 'target' variable
        target.append(label)

    # convert the set of features into a numpy array and reshape it
    feature = np.array(feature).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    # convert the set of targets into a numpy array
    target = np.array(target)
    # normalize the set of training features
    X_train = feature/255
    
    # display the shape of X_train
    print(X_train.shape)
    # assign the target into another variable 'Y_train'
    Y_train = target
    # display the shape of Y_train
    print(Y_train.shape)
    # targets are integers. so, converting it into binary digits using the function 'to_categorical' and assign it to the new array 'Y_train'
    Y_train = to_categorical(Y_train)
        
    # display the shape of Y_train
    print(Y_train.shape)
    # return the set of training features and targets
    return X_train, Y_train


Building the training model

In [None]:
# build the training model
# passing the training features and targets to the 'build_train_model' function
def build_train_model(X_train, y_train):
    # initializing the model
    CNN_model = Sequential()
    # adding conv2D layer and parameters, to extract the features
    CNN_model.add(Conv2D(filters=32, kernel_size=(4,4),input_shape=(50,50,1), activation='relu',))
    # adding maxpool2D layer , to extract the improtant features
    CNN_model.add(MaxPool2D(pool_size=(2, 2)))   
    # adding flatten layer, used to flatten the data
    CNN_model.add(Flatten())
    # adding dropout layer, to reduce the overfitting 
    CNN_model.add(Dropout(0.25))
    # adding 1st dense layer,fully connected layer
    CNN_model.add(Dense(128, activation='relu'))
    # CNN_model.add(Dense(84, activation='relu'))
    # CNN_model.add(Dropout(0.25))
    # CNN_model.add(Dense(72, activation='relu'))
    # CNN_model.add(Dropout(0.25))
    # CNN_model.add(Dense(56, activation='relu'))
   
    # adding 2nd dense layer
    CNN_model.add(Dense(64, activation='relu'))
    # CNN_model.add(Dense(32, activation='relu'))
    # adding final dense layer, gives the final results
    CNN_model.add(Dense(6, activation='softmax')) 
    print('CNN Model details \n\n\n')
    # display the model summary
    CNN_model.summary()
    # stops when loss function update become small
    early_stop = EarlyStopping(monitor='val_loss', patience=3)
    #code to compile the models (model.compile). Use 'categorical cross entropy as the loss function'
    CNN_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    print('Training Started \n\n\n')

    #fitting/training the CNN model, using additional parameter -  callbacks = [early_stop] to enable early stopping.
    CNN_model.fit(X_train, y_train, epochs=30, batch_size=10,callbacks = [early_stop])   
    print('Training Completed \n\n\n')
    CNN_model.save('5batch_age_model_v2_.h5')  # creates a HDF5 file '5batch_age_model_v2_.h5'
    # losses = pd.DataFrame(CNN_model.history.history)
    return CNN_model


In [None]:
if __name__ == '__main__':
    # preprocess the training images
    X_train, Y_train = preprocess_train_data()
    # building trained model
    CNN_model = build_train_model(X_train, Y_train)
  

(128, 128)
26036
(26036, 50, 50, 1)
(26036,)
(26036, 6)
CNN Model details 



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 47, 47, 32)        544       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 23, 23, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 16928)             0         
                                                                 
 dropout (Dropout)           (None, 16928)             0         
                                                                 
 dense (Dense)               (None, 128)               2166912   
                                                                 
 dense_1 (Dense)             (None, 64)     



Epoch 2/30



Epoch 3/30



Epoch 4/30



Epoch 5/30



Epoch 6/30



Epoch 7/30



Epoch 8/30



Epoch 9/30



Epoch 10/30



Epoch 11/30



Epoch 12/30



Epoch 13/30



Epoch 14/30



Epoch 15/30



Epoch 16/30



Epoch 17/30



Epoch 18/30



Epoch 19/30



Epoch 20/30



Epoch 21/30



Epoch 22/30



Epoch 23/30



Epoch 24/30



Epoch 25/30



Epoch 26/30



Epoch 27/30



Epoch 28/30



Epoch 29/30



Epoch 30/30



Training Completed 



