Firstly, we import the required packages for this task.

# Importing Required Packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage import feature # This pacakge is used for LBP feature extraction
from sklearn import svm # This pacakge is used for svm classification
from sklearn import metrics
import sys
%matplotlib inline
import cv2
import seaborn as sns # This pacakge is used for better visualization of data (e.g confusion matrix)
import tensorflow as tf


# Mouting the drive and unzip the dataset

In [None]:

#We mount our google drive to have access to the data files
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Change the directory
%cd #filepath                #Change filepath to the filepath you have.


In [None]:
#Unzip the dataset
!pwd
!unzip dataset.zip


#  Getting Symbol Names


In [None]:
#We use os to get pathname as well as the folder name which we will be using as labels for our classifiers.
symbols = []
import os
folder_path=#filepath                #Change filepath to the filepath you have.
for file in os.listdir(folder_path):
    folder = os.path.join(folder_path, file)
    if os.path.isdir(folder):
        symbols.append(os.path.basename(folder))
print (symbols)



Plase refer to the section below if you have trouble getting folder names with the section above.

In [None]:
#Alternatively, we can use keras to train a small model and retrieve class names from the trained model.
#This section of the code will create a training set, which will not be used later. Only the class labels created from the data set will be used.
# use this section in case you are unable to get the folder names from the section above.

#train_ds = tf.keras.utils.image_dataset_from_directory(
 # '/content/gdrive/My Drive/42028/Assessment1/',
 # validation_split=0.2,
 # subset="training",
 # seed=123,
 # image_size=(256, 256),
 # batch_size=32)

#symbols = train_ds.class_names
#print(symbols)

# Image retrieval

In [None]:
#This section of the program will retrieve inidiviual images from all the folders into the variable 'data'.
path_actual=#filepath                #Change filepath to the filepath you have.
def getData():
  import os
  data = []
  count = 0
  for folder in os.listdir (path_actual):
      for symbol in symbols:
        if symbol in folder:
          symbol_class = symbols.index(symbol)
          path= os.path.join (path_actual,symbol)
          for filename in os.listdir(path):
            count+=1
            image = cv2.imread(os.path.join(path,filename),cv2.IMREAD_GRAYSCALE)
            data.append([image,symbol_class])
  print(count)
  return data

# Splitting data into Training and Test set

In [None]:
#This section of the code will be used for splitting the data set between training set and test set. Since the dataset has a total of 5000 images.
#70% will be used for training and 30% for testing. The code ensures that the 70% images used for training will be proportionate across all classes.
#x_train and y_train are the training data and labels respectively, and x_test and y_test are test data and labels respectively. Each class contains 500 images
def split_dataset (dataset):

  x_train =[]
  y_train =[]
  x_test=[]
  y_test=[]
  test_dataset= []
  for image,label in dataset:
      if y_train.count(label)<0.7*500:
        x_train.append(image)
        y_train.append(label)
      else:
        test_dataset.append([image,label])
  for image,label in test_dataset:
      x_test.append(image)
      y_test.append(label)


  return x_train,y_train,x_test,y_test

In [None]:
#We are initializing 'Data1' to call the getData() function we created above.
Data_SVM= getData()



In [None]:
#We are using the split_dataset  function with the 'Data1' object we created above which will return our training and testing parameters.
x_train, y_train, x_test, y_test = split_dataset(Data_SVM)
print (len(x_train))
print(np.shape(x_train))

# Data Visualization


In [None]:
# view few images and print its corresponding label
img_index = 10
fig = plt.figure()
ax1 = fig.add_subplot(2,5,1)
ax1.axis('off')
ax1.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,2)
ax2.axis('off')
img_index = 360
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,3)
ax2.axis('off')
img_index = 750
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,4)
ax2.axis('off')
img_index = 1100
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,5)
ax2.axis('off')
img_index = 1600
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,6)
ax2.axis('off')
img_index = 1800
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,7)
ax2.axis('off')
img_index = 2100
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,8)
ax2.axis('off')
img_index = 2700
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,9)
ax2.axis('off')
img_index = 3000
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])

ax2 = fig.add_subplot(2,5,10)
ax2.axis('off')
img_index = 3400
ax2.imshow(x_train[img_index])
print(symbols[y_train[img_index]])


In [None]:
##Displaying the shape of raw images
print (np.shape(x_train))
print (np.shape(x_test))
print (np.shape(y_train))
print (np.shape(y_test))


#Section 1: Raw images preprocessing

In [None]:
#Creating arrays of training and testing images and labels to be used for raw model.
x_train_raw = np.array(x_train)
x_test_raw = np.array(x_test)
y_train_raw = np.array(y_train)
y_test_raw = np.array(y_test)

In [None]:
print (len(x_train_raw))

In [None]:
#Changing the images into one dimensional array to feed into the model
x_train_raw = x_train_raw.reshape(len(x_train_raw),-1)
x_test_raw= x_test_raw.reshape(len(x_test_raw),-1)

# Training SVM for raw images

In [None]:
# train a SVM clasifier on the training data
# Initialize the SVM model
# Use rbf Kernel, c = 100 and randon_state=42
#using c=100.0 gives us the test accuracy of 0.9785, and using c=50.0 gives the test accuracy of 0.985
#Using c=25.0 has the same accuracy value as c=50
#Since the subsequent modesl will be using c=100 as it is proven to have higher accuracy for LBP and HOG, c=100 will be used to ensure consistency.


model_SVM_RAW = svm.SVC(kernel='rbf', C=100.0, random_state=42)

# Start training the SVM classifier
model_SVM_RAW.fit(x_train_raw, y_train_raw)




In [None]:
# Check the training accuray
print("Train set Accuracy: {:.2f}".format(model_SVM_RAW.score(x_train_raw,y_train_raw)))

# Testing SVM with raw images on test data set

In [None]:
#Evaluating on Test dataset
raw_predictions=[]
raw_predict_label=[]
# Exract LBP features for each test sample and classify it with the trained SVM classifier


for img_index in range(len(x_test_raw)):
  imag = x_test_raw[img_index]
  # Perform classification, Hint: use model.predict()
  prediction = model_SVM_RAW.predict(imag.reshape(1,-1))


  # Store the classfication result
  raw_predictions.append(prediction)
  raw_predict_label.append(y_test[img_index])

# Accuracy and confusion matrix for test data set for raw pixels

In [None]:
accuracy = metrics.accuracy_score(y_test_raw, raw_predictions)
print("Accuracy on test dataset:",accuracy)

In [None]:
# plot the confusion matrix
cm_raw  = metrics.confusion_matrix(y_test_raw, raw_predictions)
print(cm_raw)

# Plot confusion matrix using seaborn library
plt.figure(figsize=(9,9))
sns.heatmap(cm_raw, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(accuracy)
plt.title(all_sample_title, size = 15);

In [None]:
#We will randomly grab a few images and their classifications.
images_raw=[]
raw_orig_label = []
for i in np.random.choice(np.arange(0, len(y_test)), size=(16,)):
  # classify the images
  image_raw = x_test_raw[i]
  prediction = model_SVM_RAW.predict(image_raw.reshape(1, -1))
  label = symbols[prediction[0]]
  raw_orig_label=symbols[y_test[i]]
  image= x_test[i]
  color = (0, 255, 0)
  image= cv2.merge([image] * 3)
  image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
  cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.50,color, 2)
  images_raw.append(image)



In [None]:
## Display the classification results
#Plot the image and the predicted labels
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.imshow(images_raw[0])
ax2 = fig.add_subplot(2,2,2)
ax2.imshow(images_raw[1])
ax3 = fig.add_subplot(2,2,3)
ax3.imshow(images_raw[2])
ax4 = fig.add_subplot(2,2,4)
ax4.imshow(images_raw[3])

#  Section 2:LBP Class Definition for SVM

In [None]:
#LBP CLASS DEFINITION
class LocalBinaryPatterns:
	def __init__(self, points, radius):
		# store the number of points and radius
		self.points = points
		self.radius = radius

	def LBPfeatures(self, image, eps=1e-7):
		# compute the Local Binary Pattern representation
		# of the image, and then use the LBP representation
		# to build the histogram of patterns
		lbp = feature.local_binary_pattern(image, self.points,
			self.radius, method="uniform")
    # Form the histogram
		(hist, _) = np.histogram(lbp.ravel(),
			bins=np.arange(0, self.points + 3),
			range=(0, self.points + 2))

		# normalize the histogram
		hst = hist.astype("float")
		hst /= (hist.sum() + eps)

		# return the histogram of Local Binary Patterns
		return hst

In [None]:
# Create an object of LocalBinaryPatterns class and initial the parameters.
desc = LocalBinaryPatterns(24, 8)
data_train = []
labels_train = []


# loop over the training images
for img_index in range(len(x_train)):
	# load the train image, and extract LBP features
	image = (x_train [img_index])
	hist = desc.LBPfeatures (image)


	# extract the label from the image path, then update the
	# label and data lists
	labels_train.append(y_train[img_index])
	data_train.append(hist)

#  Creating SVM model with LBP feature extraction



In [None]:
# train a SVM clasifier on the training data
# Initialize the SVM model
# Use rbf Kernel, c = 100 and randon_state=42
#using the value of c=50 has an accuracy of 0.75 whereas the value c=100 has accuracy of 0.78

model = svm.SVC(kernel='rbf', C=100.0, random_state=42)
# Start training the SVM classifier
model.fit(data_train, labels_train)


print(np.shape(data_train))
print(np.shape(labels_train))

In [None]:
# Check the training accuray
print("Train set Accuracy: {:.2f}".format(model.score(data_train,labels_train)))


# Evaluation on Test dataset after LBP feature extraction


In [None]:
#Evaluating on Test dataset
predictions=[]
predict_label=[]
# Exract LBP features for each test sample and classify it with the trained SVM classifier


for img_index in range(len(x_test)):
  imag = x_test[img_index]

  # Extract LBP feature
  histo = desc.LBPfeatures (imag)
  # Perform classification, Hint: use model.predict()
  prediction = model.predict(histo.reshape(1,-1))


  # Store the classfication result
  predictions.append(prediction)
  predict_label.append(y_test[img_index])

# Accuracy and evaluation of SVM with LBP extraction

In [None]:
#Calculating accuracy
accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy on test dataset:",accuracy)
# plot the confusion matrix
cm  = metrics.confusion_matrix(y_test, predictions)
print(cm)

# Plot confusion matrix using seaborn library
plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(accuracy)
plt.title(all_sample_title, size = 15);

#  Showing some classification results from the SVM model

In [None]:
# Display some classification result on test samples
images = []
LBP_orig_label = []
# randomly select a few testing fashion items
for i in np.random.choice(np.arange(0, len(y_test)), size=(16,)):
  # classify the clothing
  histog = desc.LBPfeatures(x_test[i])
  prediction = model.predict(histog.reshape(1, -1))
  label = symbols[prediction[0]]
  LBP_orig_label=symbols[y_test[i]]
  image = x_test[i]
  color = (0, 255, 0)
  image = cv2.merge([image] * 3)
  image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
  cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.50, color, 2)
  images.append(image)

In [None]:
np.shape(images[1])

In [None]:
## Display the classification results
#Plot the image and the predicted labels
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.imshow(images[1])
ax2 = fig.add_subplot(2,2,2)
ax2.imshow(images[2])
ax3 = fig.add_subplot(2,2,3)
ax3.imshow(images[5])
ax4 = fig.add_subplot(2,2,4)
ax4.imshow(images[6])

#  HOG feature extraction for SVM


In [None]:
#HOG feature extraction
# initialize the data matrix and labels
print("Extracting features from training dataset...")
datahog_train = []
labelshog_train = []

# loop over the training images
for imge_index in range(len(x_train)):
  # load the image, and extract HOG features it

  image = (x_train [imge_index])
  H = feature.hog(image, orientations=9, pixels_per_cell=(10,10), cells_per_block=(2,2), transform_sqrt=True, block_norm="L2-Hys") # Complete the code

  # update the data and labels
  datahog_train.append(H)
  labelshog_train.append(y_train[imge_index])

print(np.shape(datahog_train))
print(np.shape(labelshog_train))



```


# Creating SVM model with HOG feature extraction

In [None]:
#creating another model which uses the features extracting with HOG.
# train a SVM clasifier on the training data
# Initialize the SVM model
# Use rbf Kernel, c = 100 and randon_state=42

model2 = svm.SVC(kernel='rbf', C=100.0, random_state=42)  #using the same parameters we used with LBP
#Start training the SVM classifier
model2.fit(datahog_train, labelshog_train)

In [None]:
#Calculating the accuracy of the XVM model we created.

print(np.shape(data_train))
print(np.shape(labels_train))
print("Train set Accuracy: {:.2f}".format(model2.score(datahog_train,labelshog_train)))

# Evaluation and accuracy of SVM with HOG extraction

In [None]:
#Evaluating on a test data set
predicthog_test = []
labelshog_test = []
datahog_test=[]
# loop over the training images
for img_ind in range(len(x_test)):
  # load the image, and extract HOG features
  img= x_test[img_ind]
  H = feature.hog(img, orientations=9, pixels_per_cell=(10,10), cells_per_block=(2,2), transform_sqrt=True, block_norm="L2-Hys")# Hint: use same settings as used in training phase

  pred = model2.predict(H.reshape(1,-1)) [0]
  # update the data and labels
  predicthog_test.append(pred)
  datahog_test.append(H)

  labelshog_test.append(y_test[img_ind])

print(np.shape(predicthog_test))
print(np.shape(labelshog_test))

In [None]:
# Test set Accuracy

accuracy = metrics.accuracy_score(y_test, predicthog_test)
print("Accuracy on test dataset:",accuracy)

# plot the confusion matrix
cm2  = metrics.confusion_matrix(y_test, predictions)
print(cm2)

# Plot confusion matrix using seaborn library
plt.figure(figsize=(9,9))
sns.heatmap(cm2, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(accuracy)
plt.title(all_sample_title, size = 15);


# Showing some classification results from the SVM model


In [None]:
images = []
orig_labels=[]
# randomly select a few symols
for i in np.random.choice(np.arange(0, len(y_test)), size=(16,)):
  # classify the images
  test_img = (x_test[i])
  H1 = feature.hog(test_img, orientations=9, pixels_per_cell=(10, 10),
                  cells_per_block=(2, 2), transform_sqrt=True, block_norm="L2-Hys")
  pred = model2.predict(H1.reshape(1, -1))[0]
  label = symbols[pred]
  orig_labels.append(symbols[y_test[i]])
  color = (0, 255, 0)
  test_img = cv2.merge([test_img] * 3)
  test_img = cv2.resize(test_img, (96, 96), interpolation=cv2.INTER_LINEAR)
  cv2.putText(test_img, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.50, color, 2)
  images.append(test_img)

In [None]:
orig_labels[1]

In [None]:
## Display the classification results
#Plot the image and the predicted labels
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.imshow(images[1])
print(orig_labels[1])
ax2 = fig.add_subplot(2,2,2)
ax2.imshow(images[2])
print(orig_labels[2])
ax3 = fig.add_subplot(2,2,3)
ax3.imshow(images[3])
print(orig_labels[3])
ax4 = fig.add_subplot(2,2,4)
ax4.imshow(images[4])
print(orig_labels[4])