Topic - Peer Group Prediction

Details about the data:
Human face image files are available, all the images are split based on the ages. 6 age groups are available, it varies from 20 to 49. In each age group multiple image files are available.


In [None]:
# importing the libraries
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tensorflow. keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from sklearn.metrics import confusion_matrix
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
from pathlib import Path
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization,MaxPooling2D,BatchNormalization,\
                        Permute, TimeDistributed, Bidirectional,GRU, SimpleRNN, LSTM, GlobalAveragePooling2D, SeparableConv2D,\
ZeroPadding2D, Convolution2D, ZeroPadding2D,AveragePooling2D
from keras.optimizers import RMSprop,Adam,Optimizer,Optimizer, SGD


In [None]:
def preprocess_train_data():
   # assigning the path and list of age categories
   # set the path of the folder where the images stored
    data_dir = "/content/drive/MyDrive/AGE DATASET/train_5batch_modified/"
   # set the list of age categories available in the folder
    # categories = ["2024","2529"]
    categories = ["2024","2529","3034","3539","4044","4549"]
   
   # analyze the images
    for item_ in categories:
      # assigning the location of the images to a variable , 6 classes
        path = os.path.join(data_dir, item_)  
        # iterate over each images, considering all the age groups
        for img in os.listdir(path):  
          # read one by one image
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)  # convert to array
            # plot the image
            # plt.imshow(img_array, cmap='gray')  
            # plt.show()  # displays the last image!
            break  # executing one at a time
        break  #break the for loop 
    # checking the array shape
    print(img_array.shape) #(128, 128)
    # set an image size
    IMG_SIZE = 50
    #initialize new array to keep all the training data
    training_data = []
    
    # function to read all the images and extract the training images and corresponding classes
    def generate_training_data():
      # iterate through all the age folder and through each image
        for item_ in categories:  
          # access the location to individual images based on the age group
            path = os.path.join(data_dir,item_) 
            # get the list of available classes based each age group 
            class_num = categories.index(item_)  
            
            # iterate over each image per age folder
            for img in (os.listdir(path)):  
                try:
                    # read one by one images and convert it into the grayscale format, which is an array
                    img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)  
                    # resize the image; means it helps to normalize data size
                    new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  
                    # adding the extracted trained data and corresponding classes to a list
                    training_data.append([new_array, class_num])  
                # skip the errors, if there is any possibility to occur
                except Exception as e:  
                    pass

    generate_training_data()
    #checking the total size of the training data
    print(len(training_data))
        
    #initialize an empty list to add the list of training features
    feature = []
    #initialize an empty list to add the list of training targets
    target = []
    
    # iterating through the set of training features and targets
    for features,label in training_data:
        # extracted feature from the training_data will be added to the 'feature' variable
        feature.append(features)
        # extracted target from the training_data will be added to the 'target' variable
        target.append(label)

    # convert the set of features into a numpy array and reshape it
    feature = np.array(feature).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    # convert the set of targets into a numpy array
    target = np.array(target)
    # normalize the set of training features
    X_train = feature/255
    
    # display the shape of X_train
    print(X_train.shape)
    # assign the target into another variable 'Y_train'
    Y_train = target
    # display the shape of Y_train
    print(Y_train.shape)
    # targets are integers. so, converting it into binary digits using the function 'to_categorical' and assign it to the new array 'Y_train'
    Y_train = to_categorical(Y_train)
        
    # display the shape of Y_train
    print(Y_train.shape)
    # return the set of training features and targets
    return X_train, Y_train


In [None]:
#preprocessing the images
X_train, Y_train = preprocess_train_data()

(128, 128)
26036
(26036, 50, 50, 1)
(26036,)
(26036, 6)


In [None]:
#checking the number of images to be trained
print(len(X_train))
print(len(Y_train))

26036
26036


In [None]:
# set the input shape of the image
INPUT_SHAPE = (X_train.shape[1],X_train.shape[2],X_train.shape[3])

#set early stopper and the checkpoint model (assign general parameters)
Early_Stopper_fn = tf.keras.callbacks.EarlyStopping(monitor="loss",patience=5,mode="min")
Checkpoint_Model = tf.keras.callbacks.ModelCheckpoint(monitor="val_accuracy", save_best_only=True,
                                                      save_weights_only=True, filepath="./modelcheck")


# for general
# assigning optimzer into a variable
optimizer_ = Adam()
# assigning loss value into a variable
loss_value_ = "msle" # msle stands for mean squared logarithmic error
# assigning required metrics into a variable
req_metrics = ["accuracy"]
# assigning the list of target classes into a variable
target_classes = 6


# building a VGG16 Model
# initializing the model
VGG_Model = Sequential()
# adding conv2D layer and parameters
VGG_Model.add(Conv2D(32,(3,3),activation="relu",input_shape=INPUT_SHAPE))
# adding batch normalizing layer 
VGG_Model.add(BatchNormalization())
# adding maxpool2D layer 
VGG_Model.add(MaxPooling2D((2,2)))
# adding 2nd conv2D layer and parameters
VGG_Model.add(Conv2D(64,(3,3),activation="relu"))
#add a drop out layer to reduce overfitting
VGG_Model.add(Dropout(0.1))
VGG_Model.add(MaxPooling2D((2,2)))
VGG_Model.add(BatchNormalization())

VGG_Model.add(Conv2D(128,(3,3),activation="relu"))
#add a drop out layer to reduce overfitting
VGG_Model.add(Dropout(0.1))
VGG_Model.add(MaxPooling2D((2,2)))
VGG_Model.add(BatchNormalization())
#flattening the layer
VGG_Model.add(Flatten())

VGG_Model.add(Dense(128,activation="relu"))
VGG_Model.add(BatchNormalization())
#add a drop out layer to reduce overfitting
VGG_Model.add(Dropout(0.4))
VGG_Model.add(Dense(64,activation="relu"))
VGG_Model.add(BatchNormalization())
#add a drop out layer to reduce overfitting
VGG_Model.add(Dropout(0.3))
#the outer most layer
VGG_Model.add(Dense(target_classes,activation="softmax"))

#compiling the model
VGG_Model.compile(optimizer=optimizer_,loss=loss_value_,metrics=req_metrics)
# CNN_Model = Model.fit(X_train,Y_train, callbacks=[Early_Stopper_fn,Checkpoint_Model],
#                       batch_size=12, epochs=50)

# train the VGG model by setting early stopper
VGG_Model_ = VGG_Model.fit(X_train, Y_train,  callbacks=[Early_Stopper_fn,Checkpoint_Model], epochs=30)   
print('done')


Epoch 1/30



Epoch 2/30



Epoch 3/30



Epoch 4/30



Epoch 5/30



Epoch 6/30



Epoch 7/30



Epoch 8/30



Epoch 9/30



Epoch 10/30



Epoch 11/30



Epoch 12/30



Epoch 13/30



Epoch 14/30



Epoch 15/30



Epoch 16/30



Epoch 17/30



Epoch 18/30



Epoch 19/30



Epoch 20/30



Epoch 21/30



Epoch 22/30



Epoch 23/30



Epoch 24/30



Epoch 25/30



Epoch 26/30



Epoch 27/30



Epoch 28/30



Epoch 29/30



Epoch 30/30



done


In [1]:
VGG_Model_.summary

NameError: ignored

In [None]:
# print('VGG_Model Training Completed \n\n\n')
# VGG_Model.save('5batch_age_model_VGG16_.h5')  # creates a HDF5 file '5batch_age_model_VGG16_.h5'
    

In [None]:
# Model_Results = Model.evaluate(xTest,yTest)
# print("LOSS:  " + "%.4f" % Model_Results[0])
# print("ACCURACY:  " + "%.2f" % Model_Results[1])