# Importing Required Packages


In [None]:
#@title
import numpy as np
import matplotlib.pyplot as plt
from skimage import feature # This pacakge is used for LBP feature extraction
from sklearn import svm # This pacakge is used for svm classification
from sklearn import metrics
import sys
%matplotlib inline
import cv2
import seaborn as sns # This pacakge is used for better visualization of data (e.g confusion matrix)
import tensorflow as tf
%load_ext tensorboard
from numpy import asarray



```

```

# Mouting the drive and unzipping the dataset

In [None]:
#We mount our google drive to have access to the data files
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Change the directory to the file directory.
%cd filepath


In [None]:
#Unzip the dataset you have
!pwd
!unzip dataset.zip

# Data preprocessing and Splitting


In [None]:
#We use os to get pathname as well as the folder name which we will be using as labels for our classifiers.
#Change the folder_path to the actual pathname where we unzipped the dataset.
#If unable to get folder names using this, please refer to Python notebook for SVM where I have defined an alternative way to get class names.
symbols = []
import os
folder_path= #filepath                #Change filepath to the filepath you have.
for file in os.listdir(folder_path):
    folder = os.path.join(folder_path, file)
    if os.path.isdir(folder):
        symbols.append(os.path.basename(folder))
print (symbols)


In [None]:
#@title
#This section of the program will retrieve inidiviual images from all the folders into the variable 'data'.
path_actual=#filepath                #Change filepath to the filepath you have.
def getData():
  import os
  data = []
  count = 0
  for folder in os.listdir (path_actual):
      for symbol in symbols:
        if symbol in folder:
          symbol_class = symbols.index(symbol)
          path= os.path.join (path_actual,symbol)
          for filename in os.listdir(path):
            count+=1
            image = cv2.imread(os.path.join(path,filename),cv2.IMREAD_GRAYSCALE)
            data.append([image,symbol_class])
  print(count)
  return data

In [None]:
#This section of the code will be used for splitting the data set between training set, validation set and test set. Since the dataset has a total of 5000 images.
#3000 images will be used for testing, 1000 for validation and 1000 for testing. The code ensures that the 3000 images used for training will be proportionate across all classes.
#x_train and y_train are the training data and labels respectively, and x_test and y_test are test data and labels respectively.
#x_valid and y_valid are validation data and labels.
def split_dataset (dataset):
  import random
  random.seed(128)
  random.shuffle(dataset)
  x_train =[]
  y_train =[]
  x_valid=[]
  y_valid=[]
  x_test=[]
  y_test=[]
  test_dataset= []
  for image,label in dataset:
      if y_train.count(label)<300:
        x_train.append(image)
        y_train.append(label)
      elif y_valid.count(label)<100:
        x_valid.append(image)
        y_valid.append(label)
      else:
        x_test.append(image)
        y_test.append(label)

        test_dataset.append([image,label])


  return x_train,y_train,x_valid,y_valid,x_test,y_test

In [None]:
#We are initializing 'Data2' to call the getData() function we created above.
Data_ANN= getData()


In [None]:
#We are using the split_dataset  function with the 'Data1' object we created above which will return our training and testing parameters.
x_train, y_train,x_valid,y_valid, x_test, y_test = split_dataset(Data_ANN)
print (len(x_train))



# Data Visualization

In [None]:
# view few images and print its corresponding label
img_index = 20
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.axis('off')
ax1.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,2,2)
ax2.axis('off')
img_index = 30
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,2,3)
ax2.axis('off')
img_index = 112
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,2,4)
ax2.axis('off')
img_index = 400
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])


# Raw pixels features


In [None]:
#Displaying the shape of raw images
print (np.shape(x_train))
print (np.shape(x_valid))
print (np.shape(x_test))
print (np.shape(y_train))
print (np.shape(y_valid))
print (np.shape(y_test))

#Creating another instance of images and labels to be used for raw image based ANN.
x_train_raw = np.array(x_train)
x_test_raw = np.array(x_test)
y_train_raw = np.array(y_train)
y_test_raw = np.array(y_test)
x_valid_raw = np.array(x_valid)
y_valid_raw = np.array(y_valid)

# Creating ANN for raw pixels features.

In [None]:
#Creating a model for the raw pixels features
model_raw = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=[45,45]),
                                    tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

In [None]:
#Compiling the model
model_raw.compile(optimizer = tf.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

H_raw=model_raw.fit(x_train_raw, y_train_raw, epochs=30,validation_data=(x_valid_raw, y_valid_raw))
#We can see that the accuracy remains the same after around 30 epochs and loss starts to increase.
#Since we require higher epochs for the following models, the number of epochs is going to remain constant in all the models for ANN.
#In real life, if there is no decrease in loss for 5 consecutive epochs, we would stop the training of the model.
#We started with 45 epochs, but the model started overfitting after 30 epochs. So, we will be using 30 epochs.

# Learning curves for ANN classifier with raw features

In [None]:
## Plot the learning curves
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame(H_raw.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

## Plot only the loss train loss
plt.plot(H_raw.history['loss'])
plt.ylabel('cost')
plt.xlabel('Epochs')
plt.title("Cost/Loss Curve")
plt.show()

In [None]:
model_raw.summary()

# Evaluation of ANN with raw features

In [None]:
model_raw.evaluate(x_test_raw, y_test_raw)
#The model returns an accuracy of 0.71 which is close to the training model accuracy of 0.80. We can notice the slight signs of overfitting.

# Data Preproccesing for Normalization

#COnverting all training and testing data into array from list.
x_train = np.array(x_train)
y_train = np.array(y_train)
x_valid = np.array(x_valid)
y_valid = np.array(y_valid)
x_test = np.array (x_test)
y_test = np.array(y_test)

#Reshaping the image into a 1D array for normalization.
x_train = x_train.reshape(len(x_train),-1)
x_valid=x_valid.reshape(len(x_valid),-1)
x_test= x_test.reshape(len(x_test),-1)

print(np.shape(x_train))
print (np.shape(y_train))

# Normalization of data


In [None]:
#This section of the code is to perform normalization
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform (x_train)
x_valid = scaler.transform(x_valid)
x_test = scaler.transform (x_test)

In [None]:
#This section of the code is to transform the array back to 2d array to be used for LBP since we already know that the shape of image is 45* 45
image_shape = [45,45]
x_train = x_train.reshape (-1, image_shape[0], image_shape[1])
x_valid = x_valid.reshape (-1, image_shape[0], image_shape[1])
x_test = x_test.reshape (-1, image_shape[0], image_shape[1])

# LBP Feature extraction for Neural Networks

In [None]:
#LBP CLASS DEFINITION
class LocalBinaryPatterns:
	def __init__(self, points, radius):
		# store the number of points and radius
		self.points = points
		self.radius = radius

	def LBPfeatures(self, image, eps=1e-7):
		# compute the Local Binary Pattern representation
		# of the image, and then use the LBP representation
		# to build the histogram of patterns
		lbp = feature.local_binary_pattern(image, self.points,
			self.radius, method="uniform")
    # Form the histogram
		(hist, _) = np.histogram(lbp.ravel(),
			bins=np.arange(0, self.points + 3),
			range=(0, self.points + 2))

		# normalize the histogram
		hst = hist.astype("float")
		hst /= (hist.sum() + eps)

		# return the histogram of Local Binary Patterns
		return hst

In [None]:
# Create an object of LocalBinaryPatterns class for training data set and initial the parameters.
desc = LocalBinaryPatterns(24, 8)
data_train = []
labels_train = []


# loop over the training images
for img_index in range(len(x_train)):
	# load the train image, and extract LBP features
    image = (x_train [img_index])
    hist = desc.LBPfeatures (image)


	# extract the label from the image path, then update the
	# label and data lists
    labels_train.append(y_train[img_index])
    data_train.append(hist)

print (np.shape(data_train))

In [None]:
# Create an object of LocalBinaryPatterns class for validation data set and initial the parameters.
desc = LocalBinaryPatterns(24, 8)
data_valid = []
labels_valid = []


# loop over the training images
for img_index in range(len(x_valid)):
	# load the train image, and extract LBP features
    image = (x_valid [img_index])
    hist = desc.LBPfeatures (image)


	# extract the label from the image path, then update the
	# label and data lists
    labels_valid.append(y_valid[img_index])
    data_valid.append(hist)

print (np.shape(data_valid))

In [None]:
# Create an object of LocalBinaryPatterns class for test data set and initial the parameters.
desc = LocalBinaryPatterns(24, 8)
data_test = []
labels_test = []


# loop over the training images
for img_index in range(len(x_test)):
	# load the train image, and extract LBP features
    image = (x_test [img_index])
    hist = desc.LBPfeatures (image)


	# extract the label from the image path, then update the
	# label and data lists
    labels_test.append(y_test[img_index])
    data_test.append(hist)

print (np.shape(data_test))
print (np.shape(labels_test))

#Reshaping the data for the model


In [None]:

#Reshaping the data retrieved after LBP for the model
data_train = np.array(data_train)
data_valid= np.array(data_valid)
labels_train = np.array(labels_train)
labels_valid = np.array (labels_valid)
data_train = np.reshape(data_train, (-1,26,1))
data_valid = np.reshape(data_valid, (-1,26,1))
print (np.shape(data_train))
print (np.shape(data_valid))
print (type (data_train))
print (type (data_valid))
print (type (labels_train))
print (type (labels_valid))



```

```

# Creating ANN for LBP feature extraction


In [None]:
model_LBP = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=[26,1]),
                                    tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

In [None]:
model_LBP.compile(optimizer = tf.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

H=model_LBP.fit(data_train, labels_train, epochs=30,validation_data=(data_valid, labels_valid))
#Since we are using 30 epochs for raw features, and we are trying to compare the output between different features, it is important to use the same parameters.
#Hence, we are using 30 epochs in this model.
#In real life, since the loss curve is still going down, we should train for more epochs before loss stops decreasing.

# Learning curves for ANN classifier with LBP feature extraction

In [None]:
## Plot the learning curves
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame(H.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

## Plot only the loss train loss
plt.plot(H.history['loss'])
plt.ylabel('cost')
plt.xlabel('Epochs')
plt.title("Cost/Loss Curve")
plt.show()

In [None]:
## Use model.summary to great a summary for the model(layers, type, shape, etc.)
model_LBP.summary()



```


#Evaluation of ANN with LBP feature extraction

In [None]:
data_test = np.array (data_test)
labels_test = np.array (labels_test)
data_test = np.reshape(data_test,(-1,26,1))
print (np.shape(data_test))

In [None]:
model_LBP.evaluate(data_test, labels_test)

# HOG feature extraction for ANN

In [None]:
#x_train, x_valid and x_train variables have the training, validation and testing images respectively.
# initialize the data matrix and labels
print("Extracting features from training dataset...")
hog_data_train = []
hog_labels_train = []

# loop over the training images
for img_index in range(len(x_train)):
  # load the image, and extract HOG features it

  image = (x_train [img_index]) # Complete the code

  # Hint: use orientation =9, pixel_per_cell=(10,10), cells_per_block=2,2,
  # transform_sqrt=True and block_norm="L2-Hys"
  #
  H = feature.hog(image, orientations=9, pixels_per_cell=(10,10), cells_per_block=(2,2), transform_sqrt=False, block_norm="L2-Hys")

  # update the data and labels
  hog_data_train.append(H) # Complete the code
  hog_labels_train.append(y_train[img_index]) # Complete the code
## END YOUR CODE HERE ##
print(np.shape(hog_data_train))
print(np.shape(hog_labels_train))

In [None]:
#x_train, x_valid and x_train variables have the training, validation and testing images respectively.
# initialize the data matrix and labels
print("Extracting features from validation dataset...")
hog_data_valid = []
hog_labels_valid = []

# loop over the training images
for img_index in range(len(x_valid)):
  # load the image, and extract HOG features it

  image = (x_valid [img_index]) # Complete the code

  # Hint: use orientation =9, pixel_per_cell=(10,10), cells_per_block=2,2,
  # transform_sqrt=True and block_norm="L2-Hys"
  #
  H = feature.hog(image, orientations=9, pixels_per_cell=(10,10), cells_per_block=(2,2), transform_sqrt=False, block_norm="L2-Hys")

  # update the data and labels
  hog_data_valid.append(H) # Complete the code
  hog_labels_valid.append(y_valid[img_index]) # Complete the code
## END YOUR CODE HERE ##
print(np.shape(hog_data_valid))
print(np.shape(hog_labels_valid))

In [None]:
#x_train, x_valid and x_train variables have the training, validation and testing images respectively.
# initialize the data matrix and labels
print("Extracting features from testing dataset...")
hog_data_test = []
hog_labels_test = []

# loop over the training images
for img_index in range(len(x_test)):
  # load the image, and extract HOG features it

  image = (x_test [img_index]) # Complete the code

  # Hint: use orientation =9, pixel_per_cell=(10,10), cells_per_block=2,2,
  # transform_sqrt=True and block_norm="L2-Hys"
  #
  H = feature.hog(image, orientations=9, pixels_per_cell=(10,10), cells_per_block=(2,2), transform_sqrt=False, block_norm="L2-Hys")

  # update the data and labels
  hog_data_test.append(H) # Complete the code
  hog_labels_test.append(y_test[img_index]) # Complete the code
## END YOUR CODE HERE ##
print(np.shape(hog_data_test))
print(np.shape(hog_labels_test))

# Data transformation for HOG feature extraction

In [None]:
#Reshaping the training, validation and testing images before feeding them into the model
hog_data_train = np.array(hog_data_train)
hog_data_valid= np.array(hog_data_valid)
hog_data_test =np.array(hog_data_test)
hog_labels_train = np.array(hog_labels_train)
hog_labels_valid = np.array (hog_labels_valid)
hog_labels_test = np.array (hog_labels_test)
hog_data_train = np.reshape(hog_data_train, (-1,324,1))
hog_data_valid = np.reshape(hog_data_valid, (-1,324,1))
hog_data_test = np.reshape(hog_data_test, (-1,324,1))
print (np.shape(hog_data_train))
print (np.shape(hog_data_valid))
print (type (hog_data_train))
print (type (hog_data_valid))
print (type (hog_labels_train))
print (type (hog_labels_valid))

# Creating ANN for HOG feature extration

In [None]:
model_hog = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=[324,1]),
                                    tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                  tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

In [None]:
model_hog.compile(optimizer = tf.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

H_hog=model_hog.fit(hog_data_train, hog_labels_train, epochs=30,validation_data=(hog_data_valid, hog_labels_valid))

# Learning curves for ANN classifier with HOG feature extraction

In [None]:
## Plot the learning curves
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame(H_hog.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

## Plot only the loss train loss
plt.plot(H_hog.history['loss'])
plt.ylabel('cost')
plt.xlabel('Epochs')
plt.title("Cost/Loss Curve")
plt.show()
#From the learning curve, we can observe that the accuracy stops increasing and the loss stops decreasing after a few epochs.
#In real world, we would reduce the number of epochs to reduce the computation load for this model. The ideal epochs would be around 10 since the loss started increasing after it.
#But, to keep the parameters similar between models for different features, we are going to leave the epochs as it is.

In [None]:
#Evaluating on a test dataset

model_hog.evaluate(hog_data_test, hog_labels_test)
#We get an accuracy of 0.9900 for a test set which is similar to the accuracy we received for training set  which is 0.9970
#This emphasizes the importance of HOG feature extraction since it extracts more features than LBP and has a higher accuracy.
