In [1]:
import matplotlib
matplotlib.use("Agg")
 
# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
from PIL import Image
import os

Using TensorFlow backend.


In [2]:
# initialize the data and labels
data = []
labels = []

In [3]:
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images('images/')))
random.seed(42)
random.shuffle(imagePaths)

In [4]:
imagePaths[:5]

['images/katrina_kaif/268678a08a4542e78a35414cd6ee4263.jpg',
 'images/akshay_kumar/c653ac1e91d94af1960e41e33f06ae82.jpeg',
 'images/katrina_kaif/82eb7af9a60344c3a4d6bffde6761836.jpg',
 'images/akshay_kumar/fdd6bc1075204110a18ab3adef7a945e.jpg',
 'images/nawazuddin_siddiqui/186587c23f914fdba51e3ed41b33387f.jpg']

## Resize, Flatten and append to data,label lists
### Flattening: 32x32x3 = 3072

In [5]:
for imagePath in imagePaths:
	image = cv2.imread(imagePath)
    # flatten the image into 32x32x3=3072
	image = cv2.resize(image, (32, 32)).flatten()
	data.append(image)
 
	# extract the class label from the image path and update the labels list
	label = imagePath.split(os.path.sep)[-2]
	labels.append(label)

In [6]:
data[:1][0]

array([255, 255, 255, ...,  49,  83, 137], dtype=uint8)

## Scale the raw pixel intensities to the range [0, 1] from [0, 255]

In [7]:
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

## Train, Test split of 75, 25

In [19]:
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)

In [20]:
# convert the labels from integers to vectors (for 2-class, binary
# classification you should use Keras' to_categorical function
# instead as the scikit-learn's LabelBinarizer will not return a
# vector)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)

In [32]:
# define the 3072-1024-256-3 architecture using Keras
model = Sequential()
model.add(Dense(1024, input_shape=(3072,), activation="sigmoid"))
model.add(Dense(512, activation="sigmoid")) #Hidden 1
model.add(Dense(256, activation="sigmoid")) #Hidden 2
# model.add(Dense(128, activation="sigmoid")) #Hidden 21
model.add(Dense(5, activation="softmax")) #How many predictions do you want?

In [37]:
# initialize our initial learning rate and # of epochs to train for
INIT_LR = 0.01
EPOCHS = 70
# compile the model using SGD as our optimizer and categorical
# cross-entropy loss (you'll want to use binary_crossentropy
# for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR)
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])

[INFO] training network...


In [38]:
# train the neural network
H = model.fit(trainX, trainY, validation_data=(testX, testY),
	epochs=EPOCHS, batch_size=9)

Train on 84 samples, validate on 29 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70


Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [36]:
observation_list = [EPOCHS,INIT_LR, H.history['loss'][-1], H.history['val_loss'][-1], H.history['acc'][-1], H.history['val_acc'][-1]]
observation_list
with open('./observations/observation_model.txt', 'a') as file_out:
    file_out.write(str(observation_list) + '\n')

## CNN


### Conv2D( number_of_filters, (size_of_filter) )

In [25]:
out = open('./observations/observation.csv' , 'w')
headers = 'EPOCH,INITIAL LEARNING RATE,BATCH SIZE,LOSS,VAL_LOSS,ACCURACY,VAL_ACC\n'
out.write(headers)

70

In [16]:
# import the necessary packages
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K

In [92]:
class SymNet:
	@staticmethod
	def build(width, height, depth, classes):
		# initialize the model along with the input shape to be
		# "channels last" and the channels dimension itself
		model = Sequential()
		inputShape = (height, width, depth)
		chanDim = -1

		# if we are using "channels first", update the input shape
		# and channels dimension
		if K.image_data_format() == "channels_first":
			inputShape = (depth, height, width)
			chanDim = 1
        
        # CONV => RELU => POOL layer set
		model.add(Conv2D(32, (3, 3), padding="same",
			input_shape=inputShape))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))
        
        # (CONV => RELU) * 2 => POOL layer set
		model.add(Conv2D(64, (3, 3), padding="same"))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(Conv2D(64, (3, 3), padding="same"))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))    
        
        # first (and only) set of FC => RELU layers
		model.add(Flatten())
		model.add(Dense(512))
		model.add(Activation("relu"))
		model.add(BatchNormalization())
		model.add(Dropout(0.5))
        
		# softmax classifier
		model.add(Dense(5))
		model.add(Activation("softmax"))

		# return the constructed network architecture
		return model

#### Batch Normalization is used to normalize the activations of a given input volume before passing it to the next layer in the network. It has been proven to be very effective at reducing the number of epochs required to train a CNN as well as stabilizing training itself.

In [93]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")

# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os

symnet = SymNet()

##  Load and preprocess our data:

In [94]:
# initialize the data and labels
data = []
labels = []

# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images('images/')))
random.seed(42)
random.shuffle(imagePaths)

# loop over the input images
for imagePath in imagePaths:
	# load the image, resize it to 64x64 pixels (the required input
	# spatial dimensions of SmallVGGNet), and store the image in the
	# data list
	image = cv2.imread(imagePath)
	image = cv2.resize(image, (64, 64))
	data.append(image)

	# extract the class label from the image path and update the
	# labels list
	label = imagePath.split(os.path.sep)[-2]
	labels.append(label)

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

## Split Data and Binarize Labels

In [101]:
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)
# print(trainY)
# convert the labels from integers to vectors (for 2-class, binary
# classification you should use Keras' to_categorical function
# instead as the scikit-learn's LabelBinarizer will not return a
# vector)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)

## Image Augmentation (Recall Lecture 4)

In [100]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
	height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
	horizontal_flip=True, fill_mode="nearest")

# initialize our VGG-like Convolutional Neural Network (64,64,3)
model = symnet.build(width=64, height=64, depth=3,
	classes=len(lb.classes_))

## Compile and Train Model

##### https://ml-cheatsheet.readthedocs.io

In [98]:
# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 1e-3
EPOCHS = 75
BS = 32

# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])

# train the network
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
	validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
	epochs=EPOCHS)

[INFO] training network...
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


## Save Observations

In [99]:
# out = open('./observations/observation.csv' , 'a')
observation_list = [EPOCHS,INIT_LR,BS, H.history['loss'][-1], H.history['val_loss'][-1], H.history['acc'][-1], H.history['val_acc'][-1]]
observation_list
with open('./observations/observation.csv', 'a') as file_out:
    file_out.write(str(observation_list) + '\n')
# out.write(EPOCHS + ',' + INIT_LR + ',' + BS + ',' + H.history['loss'][-1] + ',' + H.history['val_loss'][-1] + ',' + H.history['acc'][-1] + ',' + H.history['val_acc'][-1] + '\n')
# out.write(str(observation_list))

## Plot Results

In [21]:
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
	predictions.argmax(axis=1), target_names=lb.classes_))

# plot the training loss and accuracy
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["acc"], label="train_acc")
plt.plot(N, H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy (SmallVGGNet)")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig('symnet_results.png')

# save the model and label binarizer to disk
print("[INFO] serializing network and label binarizer...")
model.save('symnet.model')
f = open('labels.bin', "wb")
f.write(pickle.dumps(lb))
f.close()

[INFO] evaluating network...
                     precision    recall  f1-score   support

       akshay_kumar       0.25      0.50      0.33         4
       katrina_kaif       0.43      0.75      0.55         4
nawazuddin_siddiqui       1.00      0.44      0.62         9
      ranbir_kapoor       0.50      0.20      0.29         5
        zarine_khan       0.50      0.57      0.53         7

          micro avg       0.48      0.48      0.48        29
          macro avg       0.54      0.49      0.46        29
       weighted avg       0.61      0.48      0.49        29

[INFO] serializing network and label binarizer...
