In [None]:
#importing necessary libraries
import os
import cv2
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

Image Preprocessing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Declaring directories
train_dir="" #example file path: /content/drive/TrainingImages
test_dir=""

x=[]
y=[]
x_test=[]
y_test=[]

#loading training images from training directory
for class_name in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_name)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))
        img = img/255.0
        x.append(img)
        y.append(class_name)

#loading test images from test directory
for class_name in os.listdir(test_dir):
    class_path = os.path.join(test_dir, class_name)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))
        img = img/255.0
        x_test.append(img)
        y_test.append(class_name)

#Converting into a numpy array
x=np.array(x)
y=np.array(y)
x_test=np.array(x_test)
y_test=np.array(y_test)

Feature extraction

In [None]:
# Importing library for feature extraction using scikit-image HOG
from skimage.feature import hog

# Extracting HOG features from the RGB images for training and test data set
hog_features = []
hog_features_test=[]

for image in x:
    # Computing HOG features for each image
    features = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), transform_sqrt=True,channel_axis=-1)
    hog_features.append(features)

for image in x_test:
    features_test = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), transform_sqrt=True,channel_axis=-1)
    hog_features_test.append(features_test)

# Converting hog_features to a NumPy array
hog_features = np.array(hog_features)
hog_features_test=np.array(hog_features_test)

Below is an example to visualize the working of HOG feature extraction

In [None]:
from skimage import exposure
#loading an image from training dataset
image=x[3]
# Extracting HOG features
fd, hog_image = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 1), visualize=True, channel_axis=-1)
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
# PLotting the images for comparision
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)
ax1.imshow(image)
ax1.set_title(y[3])
ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)
ax2.set_title('HOG Features')
plt.show()

In [None]:
#Splitting data into training and validation
x_train, x_val, y_train, y_val = train_test_split(hog_features,y,test_size=0.2,random_state=42)

**SVM**

In [None]:
#implementing svm using various kernels 
rbf = svm.SVC(kernel='rbf', gamma='scale', C=1.0,decision_function_shape='ovr').fit(x_train, y_train)
poly = svm.SVC(kernel='poly', degree=3, C=1,decision_function_shape='ovr').fit(x_train, y_train)

In [None]:
#Accuracy and F1 scores for validaiton and test data usign rbf
rbf_predict=rbf.predict(x_val)
rbf_predict_test=rbf.predict(hog_features_test)

rbf_accuracy = accuracy_score(y_val, rbf_predict)
rbf_test_accuracy=accuracy_score(y_test,rbf_predict_test)

rbf_f1 = f1_score(y_val, rbf_predict, average='weighted')
rbf_f1_test = f1_score(y_test, rbf_predict_test, average='weighted')

print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100))

print('Test Accuracy (RBF Kernel): ', "%.2f" % (rbf_test_accuracy*100))
print('F1 (RBF Kernel) value for test data: ', "%.2f" % (rbf_f1_test*100))

In [None]:
#Accuracy and F1 scores for validaiton and test data usign poly
poly_predict=poly.predict(x_val)
poly_predict_test=poly.predict(hog_features_test)

poly_accuracy = accuracy_score(y_val, poly_predict)
poly_accuracy_test = accuracy_score(y_test, poly_predict_test)
 
poly_f1 = f1_score(y_val, poly_predict, average='weighted')
poly_f1_test = f1_score(y_test, poly_predict_test, average='weighted')


print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy*100))
print('F1 (Polynomial Kernel): ', "%.2f" % (poly_f1*100))

print('Test Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy_test*100))
print('F1 (Polynomial Kernel) value for test data: ', "%.2f" % (poly_f1_test*100))

In [None]:
#plotting a confusion matrix
from sklearn.metrics import confusion_matrix

# Compute confusion matrix
cm = confusion_matrix(y_test, rbf_predict_test)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
       yticks=np.arange(cm.shape[0]),
       xlabel='Predicted label',
       ylabel='True label',
       title='Confusion Matrix for SVM Multiclass Image Classification')
plt.show()

Using **KNN** for classification

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
#performing dimensionality reduction using PCA
pca = PCA(n_components=2)
x_train_pca= pca.fit_transform(x_train)
x_val_pca=pca.fit_transform(x_val)
x_test_pca=pca.fit_transform(hog_features_test)

In [None]:
#Scatter plot for the data
import pandas as pd
columns=['col1','col2']
df=pd.DataFrame(x_train_pca,columns=columns)
cmap=plt.cm.get_cmap('coolwarm')
plt.scatter(df['col1'],df['col2'],c=df['col1'],cmap=cmap)
plt.colorbar()
plt.show()

In [None]:
#set up for GridSearchCV
knn=KNeighborsClassifier()
grid_parameters={'n_neighbors':[4,8,16,36,72],'metric':['euclidean','manhattan','cosine']}
grid_search = GridSearchCV(knn,grid_parameters,cv=5)
grid_search.fit(x_train_pca,y_train)
best_neigbors=grid_search.best_params_['n_neighbors']
best_metric=grid_search.best_params_['metric']

In [None]:
#training the KNN model
knn_model=KNeighborsClassifier(n_neighbors=best_neigbors,metric=best_metric)
knn_model.fit(x_train_pca,y_train)

In [None]:
#Printing the optimal parameters
print("Best hyperparameters: ", grid_search.best_params_)
#Printing the results for validation data
y_pred = knn_model.predict(x_val_pca)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy: ", accuracy*100)
#Printing the results for testing data
y_pred_test=knn_model.predict(x_test_pca)
accuracy=accuracy_score(y_pred_test,y_test)
print("Testing Accuracy: ",accuracy*100)

**Random_Forest_Classifier**

In [None]:
#importing the RFC library
from sklearn.ensemble import RandomForestClassifier
#training the RFC model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(x_train,y_train)
#predicting the validation and test data
y_val_pred1=rf.predict(x_val)
y_val_pred2=rf.predict(hog_features_test)

In [None]:
#Printing the accuracy results
ac1=accuracy_score(y_val,y_val_pred1)
ac2=accuracy_score(y_val_pred2,y_test)
print("Validation Accuracy: ",ac1*100," Test Accuracy: ",ac2*100)