# Feature Extraction using ORB

In [2]:
import cv2
import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
import glob

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
input0 = './train/Preprocessed/'
temp = ['Bowlegs', 'Knock Knees', 'Normal Knee']

In [4]:

count = 0
for i in temp:
    for filename in os.listdir(input0 + i):
        img = cv2.resize(cv2.imread(input0 + i + '/' + filename), (512, 512))#resize image
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        #initialise sift descriptor
        orb = cv2.ORB_create()
        keypoints, descriptors = orb.detectAndCompute(gray, None)

        orb_image = cv2.drawKeypoints(gray, keypoints, img)
        
        #convert the descriptor array into a dataframe format
        out=pd.DataFrame(descriptors)
        #append to the csv file
        csv_data=out.to_csv('./ORB/ORB_' + i + '.csv', mode='a', index=False)
        count += 1
    print(i + ": " + str(count))

Bowlegs: 111
Knock Knees: 360
Normal Knee: 609


# K-means

In [5]:
data1 = pd.read_csv('./ORB/ORB_Bowlegs.csv', dtype='uint8')
data2 = pd.read_csv('./ORB/ORB_Knock Knees.csv', dtype='uint8')
data3 = pd.read_csv('./ORB/ORB_Normal Knee.csv', dtype='uint8')

data1 = data1.astype('uint8')
data2 = data2.astype('uint8')
data3 = data3.astype('uint8')

In [6]:
#performing kmeans on each class
#Bowlegs
kmeans1 = KMeans(n_clusters=5)
kmeans1.fit(data1)

#Knock Knees
kmeans2 = KMeans(n_clusters=5)
kmeans2.fit(data2)

#Normal
kmeans3 = KMeans(n_clusters=5)
kmeans3.fit(data3)

KMeans(n_clusters=5)

In [7]:
hist1=np.histogram(kmeans1.labels_,bins=[0,1,2,3,4,5])
hist2=np.histogram(kmeans2.labels_,bins=[0,1,2,3,4,5])
hist3=np.histogram(kmeans3.labels_,bins=[0,1,2,3,4,5])

print('histogram of bowed')
print(hist1,"\n")

print('histogram of knocked')
print(hist2,"\n")

print('histogram of normal')
print(hist3,"\n")

histogram of bowed
(array([4155, 4520, 4919, 4843, 4566], dtype=int64), array([0, 1, 2, 3, 4, 5])) 

histogram of knocked
(array([11872, 12939, 13349, 10295, 10564], dtype=int64), array([0, 1, 2, 3, 4, 5])) 

histogram of normal
(array([12201, 10778, 14383, 13048, 14589], dtype=int64), array([0, 1, 2, 3, 4, 5])) 



In [None]:
#performing kmeans prediction of the entire apple dataset with the pretrained kmeans model
#initialising i=0; as its the first class
i=0
count = 0
for j in temp:
    data=[]
    for filename in os.listdir(input0 + j):
        path = input0 + j + '/' + filename
        a=cv2.imread(path)
        resize=(512,512)
        img=cv2.resize(a,resize)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        #initialise sift descriptor
        orb = cv2.ORB_create()
        keypoints, descriptors = orb.detectAndCompute(gray, None)
        
        #convert the descriptor array into a dataframe format
        out=pd.DataFrame(descriptors)
        
        #predict values of feature vector with pretrained kmeans
        #ValueError: Buffer dtype mismatch, expected 'float' but got 'double', in order to avoid this dtype=np.double
        array_double = np.array(out, dtype=np.double)
        if(array_double.shape == (0, 0)): continue

        a=kmeans1.predict(array_double)
        hist=np.histogram(a,bins=[0,1,2,3,4,5])
        #append the dataframe into the array in append mode, the array will only have 5 values which will store the values in a row
        data.append(hist[0])
    
    #convert Array to Dataframe and append to the list
    Output = pd.DataFrame(data)
    #add row class 
    Output["Class"] = i 
    csv_data=Output.to_csv('./ORB/ORB_'+ j + 'Final.csv', mode='a', index=False)
    i += 1

In [9]:
final = pd.read_csv('./ORB/ORB_BowlegsFinal.csv')
temp1 = final.append(pd.read_csv('./ORB/ORB_Knock KneesFinal.csv'))
tc = pd.read_csv('./ORB/ORB_Normal KneeFinal.csv')
tcc = temp1.append(tc)
csv_data = tcc.to_csv('./ORB/ORB_FeatureFinal.csv', mode='a', index=False)
print(tcc)

      0   1   2    3    4  Class
0    18  20   3    2    6      0
1    12  19   2    7   19      0
2    26  37  28    4    1      0
3    37  45  31  141  117      0
4    48  30  46  131  106      0
..   ..  ..  ..  ...  ...    ...
241  79  41  24   28    5      2
242   8   5   1    3    2      2
243   7   4   2    2    2      2
244   9   7   2    3    3      2
245  28  40  29    6    7      2

[605 rows x 6 columns]


  temp1 = final.append(pd.read_csv('./ORB/ORB_Knock KneesFinal.csv'))
  tcc = temp1.append(tc)


# Data spliting

In [10]:
df = pd.read_csv('./ORB/ORB_FeatureFinal.csv')
X_train = df.iloc[:, 0:5]
Y_train = df.iloc[:, 5:6]
train_x, valid_x, train_y, valid_y = train_test_split(X_train, Y_train, 
                                                      test_size=0.5, 
                                                      stratify=Y_train, 
                                                      random_state=8)

# KNN

In [11]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error

knn = KNeighborsClassifier(n_neighbors=7)
  
knn.fit(train_x, train_y)

  return self._fit(X, y)


KNeighborsClassifier(n_neighbors=7)

In [19]:
print('\nResults obtained for KNN')

# accuracy on training data
y_pred_t = knn.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = knn.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))



Results obtained for KNN

Results obtained on Training Data
Accuracy on Train data :  0.6854304635761589
Precision:  0.6541992229841803
Recall:  0.62926153748341
F-score:  0.6358607506282701

Results obtained on Testing Data
Accuracy on Test data :  0.5115511551155115
Precision:  0.484672619047619
Recall:  0.4638954454060669
F-score:  0.4677800066246281


# XgBoost

In [12]:
import xgboost as xgb

classifier = xgb.XGBClassifier(
    n_estimators=100,
    reg_lambda=1,
    gamma=0,
    max_depth=3
)

classifier.fit(train_x, train_y)



  from pandas import MultiIndex, Int64Index
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=3, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

In [13]:
print('\nResults obtained for XgBoost')

# accuracy on training data
y_pred_t = classifier.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = classifier.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))


Results obtained for XgBoost

Results obtained on Training Data
Accuracy on Train data :  1.0
Precision:  1.0
Recall:  1.0
F-score:  1.0

Results obtained on Testing Data
Accuracy on Test data :  0.5775577557755776
Precision:  0.5365236864771749
Recall:  0.527757483796036
F-score:  0.5296582147240595


# SVM

In [14]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier

subclassifier = SVC(kernel='rbf')
classifier = OneVsOneClassifier(estimator=subclassifier)
classifier.fit(train_x, train_y)

  y = column_or_1d(y, warn=True)


OneVsOneClassifier(estimator=SVC())

In [15]:
print('\nResults obtained for SVM')

# accuracy on training data
y_pred_t = classifier.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = classifier.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))


Results obtained for SVM

Results obtained on Training Data
Accuracy on Train data :  0.6059602649006622
Precision:  0.411020692862125
Recall:  0.4942739749978145
F-score:  0.44210194808116254

Results obtained on Testing Data
Accuracy on Test data :  0.504950495049505
Precision:  0.33654548776032156
Recall:  0.4133228429058484
F-score:  0.3665634508920792


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Naive Bayes

In [16]:
from sklearn.naive_bayes import BernoulliNB
classifier = BernoulliNB()
classifier.fit(train_x, train_y)

  y = column_or_1d(y, warn=True)


BernoulliNB()

In [17]:
print('\nResults obtained for Naive Bayes')

# accuracy on training data
y_pred_t = classifier.predict(train_x)
train_data_accuracy = accuracy_score(y_pred_t, train_y)

print('\nResults obtained on Training Data')
print('Accuracy on Train data : ', train_data_accuracy)
print("Precision: ", precision_score(train_y, y_pred_t, average='macro'))
print("Recall: ", recall_score(train_y, y_pred_t, average='macro'))
print("F-score: ", f1_score(train_y, y_pred_t, average='macro'))

# Accuracy on test data
y_pred_ts = classifier.predict(valid_x)
test_data_accuracy = accuracy_score(y_pred_ts, valid_y)

print('\nResults obtained on Testing Data')
print('Accuracy on Test data : ', test_data_accuracy)
print("Precision: ", precision_score(valid_y, y_pred_ts, average='macro'))
print("Recall: ", recall_score(valid_y, y_pred_ts, average='macro'))
print("F-score: ", f1_score(valid_y, y_pred_ts, average='macro'))


Results obtained for Naive Bayes

Results obtained on Training Data
Accuracy on Train data :  0.46688741721854304
Precision:  0.5302847247392372
Recall:  0.3862142272449118
F-score:  0.3120775073612215

Results obtained on Testing Data
Accuracy on Test data :  0.41254125412541254
Precision:  0.40314575506644895
Recall:  0.34285589399672795
F-score:  0.2682887030978634
