# Feature Extraction using HoG

In [25]:
import cv2
import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
import glob

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [26]:
input0 = './train/Preprocessed/'
temp = ['Bowlegs', 'Knock Knees', 'Normal Knee']

In [27]:
for j in temp:
    for filename in os.listdir('./train/Preprocessed/' + j):
        img = cv2.resize(cv2.imread('./train/Preprocessed/' + j + '/' + filename, 0), (256, 256))
        #initialise HoG descriptor
        cell_size = (32, 32)  # h x w in pixels
        block_size = (2, 2)  # h x w in cells
        nbins = 9  # number of orientation bins


        # winSize is the size of the image cropped to an multiple of the cell size
        # cell_size is the size of the cells of the img patch over which to calculate the histograms
        # block_size is the number of cells which fit in the patch

        hog = cv2.HOGDescriptor(_winSize=(img.shape[1] // cell_size[1] * cell_size[1],
                                        img.shape[0] // cell_size[0] * cell_size[0]),
                    _blockSize=(block_size[1] * cell_size[1],
                                block_size[0] * cell_size[0]),
                    _blockStride=(cell_size[1], cell_size[0]),
                    _cellSize=(cell_size[1], cell_size[0]),
                    _nbins=nbins)


        descriptor = hog.compute(img)
        out=pd.DataFrame(descriptor)

        #append to the csv file
        out = out.transpose()
        csv_data=out.to_csv('./HoG/HoG_' + j + '.csv', mode='a', header=False, index=False)
        


# K-means

In [28]:
data1 = pd.read_csv('./HoG/HoG_Bowlegs.csv')
data2 = pd.read_csv('./HoG/HoG_Knock Knees.csv')
data3 = pd.read_csv('./HoG/HoG_Normal Knee.csv')

In [29]:
#performing kmeans on each class
#Bowlegs
kmeans1 = KMeans(n_clusters=5)
kmeans1.fit(data1)

#Knock Knees
kmeans2 = KMeans(n_clusters=5)
kmeans2.fit(data2)

#Normal
kmeans3 = KMeans(n_clusters=5)
kmeans3.fit(data3)

KMeans(n_clusters=5)

In [30]:
hist1=np.histogram(kmeans1.labels_,bins=[0,1,2,3,4,5])
hist2=np.histogram(kmeans2.labels_,bins=[0,1,2,3,4,5])
hist3=np.histogram(kmeans3.labels_,bins=[0,1,2,3,4,5])

print('histogram of bowed')
print(hist1,"\n")

print('histogram of knocked')
print(hist2,"\n")

print('histogram of normal')
print(hist3,"\n")

histogram of bowed
(array([28, 14, 12, 35, 21], dtype=int64), array([0, 1, 2, 3, 4, 5])) 

histogram of knocked
(array([56, 50, 33, 57, 52], dtype=int64), array([0, 1, 2, 3, 4, 5])) 

histogram of normal
(array([56, 45, 72, 43, 32], dtype=int64), array([0, 1, 2, 3, 4, 5])) 



In [None]:
#performing kmeans prediction of the entire apple dataset with the pretrained kmeans model
#initialising i=0; as its the first class
i=0
input0 = 'train/Preprocessed/'
for j in temp:
    data=[]
    for filename in os.listdir(input0 + j):
        path = input0 + j + '/' + filename
        a=cv2.imread(path)
        resize=(256,256)
        img=cv2.resize(a,resize)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        #initialise sift descriptor
        cell_size = (32, 32)  # h x w in pixels
        block_size = (2, 2)  # h x w in cells
        nbins = 9  # number of orientation bins


        # winSize is the size of the image cropped to an multiple of the cell size
        # cell_size is the size of the cells of the img patch over which to calculate the histograms
        # block_size is the number of cells which fit in the patch

        hog = cv2.HOGDescriptor(_winSize=(img.shape[1] // cell_size[1] * cell_size[1],
                                        img.shape[0] // cell_size[0] * cell_size[0]),
                    _blockSize=(block_size[1] * cell_size[1],
                                block_size[0] * cell_size[0]),
                    _blockStride=(cell_size[1], cell_size[0]),
                    _cellSize=(cell_size[1], cell_size[0]),
                    _nbins=nbins)


        descriptor = hog.compute(img)
        out=pd.DataFrame(descriptor)

        #drop first coloumn as it's the no of feature detected. Not required.
        #append to the csv file
        out = out.transpose()
        
        #predict values of feature vector with pretrained kmeans
        #ValueError: Buffer dtype mismatch, expected 'float' but got 'double', in order to avoid this dtype=np.double
        array_double = np.array(out, dtype=np.double)

        a=kmeans1.predict(array_double)
        hist=np.histogram(a,bins=[0,1,2,3,4,5])
        #append the dataframe into the array in append mode, the array will only have 5 values which will store the values in a row
        data.append(hist[0])
    
    #convert Array to Dataframe and append to the list
    Output = pd.DataFrame(data)
    #add row class 
    Output["Class"] = i 
    csv_data=Output.to_csv('./HoG/HoG_'+ j + 'Final.csv', mode='a', index=False)
    i += 1

In [32]:
final = pd.read_csv('./HoG/HoG_BowlegsFinal.csv')
temp1 = final.append(pd.read_csv('./HoG/HoG_Knock KneesFinal.csv'))
tc = pd.read_csv('./HoG/HoG_Normal KneeFinal.csv')
tcc = temp1.append(tc)

csv_data = tcc.to_csv('./HoG/HoG_FeatureFinal.csv', mode='a', index=False)
print(tcc)

     0  1  2  3  4  Class
0    0  0  1  0  0      0
1    0  0  0  1  0      0
2    0  0  0  1  0      0
3    0  0  0  1  0      0
4    0  0  0  1  0      0
..  .. .. .. .. ..    ...
244  0  0  0  0  1      2
245  1  0  0  0  0      2
246  1  0  0  0  0      2
247  0  0  0  0  1      2
248  1  0  0  0  0      2

[609 rows x 6 columns]


  temp1 = final.append(pd.read_csv('./HoG/HoG_Knock KneesFinal.csv'))
  tcc = temp1.append(tc)


# Data spliting

In [33]:
df = pd.read_csv('./HoG/HoG_FeatureFinal.csv')
X_train = df.iloc[:, 0:5]
Y_train = df.iloc[:, 5:6]
train_x, valid_x, train_y, valid_y = train_test_split(X_train, Y_train, 
                                                      test_size=0.5, 
                                                      stratify=Y_train, 
                                                      random_state=8)

# KNN

In [34]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error

knn = KNeighborsClassifier(n_neighbors=7)
  
knn.fit(train_x, train_y)

  return self._fit(X, y)


KNeighborsClassifier(n_neighbors=7)

In [35]:
print('\nResults obtained for KNN')

# accuracy on training data
y_pred_t = knn.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = knn.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))



Results obtained for KNN

Results obtained on Training Data
Accuracy on Train data :  0.48355263157894735
Precision:  0.3271697207920427
Recall:  0.3951612903225807
F-score:  0.33698366954851106

Results obtained on Testing Data
Accuracy on Test data :  0.46557377049180326
Precision:  0.31510975563839
Recall:  0.37866666666666665
F-score:  0.32176444245409763


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# XgBoost

In [36]:
import xgboost as xgb

classifier = xgb.XGBClassifier(
    n_estimators=100,
    reg_lambda=1,
    gamma=0,
    max_depth=3
)

classifier.fit(train_x, train_y)



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=3, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

In [37]:
print('\nResults obtained for XgBoost')

# accuracy on training data
y_pred_t = classifier.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = classifier.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))


Results obtained for XgBoost

Results obtained on Training Data
Accuracy on Train data :  0.5526315789473685
Precision:  0.5144132016369863
Recall:  0.4777265745007681
F-score:  0.4705938778389054

Results obtained on Testing Data
Accuracy on Test data :  0.5672131147540984
Precision:  0.49294214380609835
Recall:  0.4783030303030303
F-score:  0.46171155205851183


# SVM

In [38]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier

subclassifier = SVC(kernel='rbf')
classifier = OneVsOneClassifier(estimator=subclassifier)
classifier.fit(train_x, train_y)

  y = column_or_1d(y, warn=True)


OneVsOneClassifier(estimator=SVC())

In [39]:
print('\nResults obtained for SVM')

# accuracy on training data
y_pred_t = classifier.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = classifier.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))


Results obtained for SVM

Results obtained on Training Data
Accuracy on Train data :  0.5526315789473685
Precision:  0.5144132016369863
Recall:  0.4777265745007681
F-score:  0.4705938778389054

Results obtained on Testing Data
Accuracy on Test data :  0.5672131147540984
Precision:  0.49294214380609835
Recall:  0.4783030303030303
F-score:  0.46171155205851183


# Naive Bayes

In [40]:
from sklearn.naive_bayes import BernoulliNB
classifier = BernoulliNB()
classifier.fit(train_x, train_y)

  y = column_or_1d(y, warn=True)


BernoulliNB()

In [41]:
print('\nResults obtained for Naive Bayes')

# accuracy on training data
y_pred_t = classifier.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = classifier.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))


Results obtained for Naive Bayes

Results obtained on Training Data
Accuracy on Train data :  0.5526315789473685
Precision:  0.5144132016369863
Recall:  0.4777265745007681
F-score:  0.4705938778389054

Results obtained on Testing Data
Accuracy on Test data :  0.5672131147540984
Precision:  0.49294214380609835
Recall:  0.4783030303030303
F-score:  0.46171155205851183
