In [None]:
import numpy as np
import cv2 as cv
import pandas as pd
import os
import argparse
from skimage import feature
from imutils import paths
from imutils import build_montages
from google.colab.patches import cv2_imshow
from sklearn.preprocessing import LabelEncoder


from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score

from sklearn.model_selection import RandomizedSearchCV

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn import svm 
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report 


LOAD DATA

In [None]:
dataset_dir = '/content/drive/My Drive/numbers'

training_path = os.path.join(dataset_dir, 'training')
testing_path = os.path.join(dataset_dir, 'testing')

In [None]:
def preprocess(image, image_size=128):
  image = cv.cvtColor(image, cv.COLOR_BGR2GRAY) #Ubah menjadi greyscale
  image = cv.resize(image, (image_size, image_size)) #resize gambar menjadi suatu ukuran (default = 128)

  image = cv.threshold(image, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)[1] #melakukan thresholding dan mengambil gambar hasil thresholding

  return image

In [None]:
# From: https://www.pyimagesearch.com/2019/04/29/detecting-parkinsons-disease-with-opencv-computer-vision-and-the-spiral-wave-test/
def quantify_image_hog(image): #HOG Features
  features = feature.hog(image, orientations=9, pixels_per_cell=(10, 10), cells_per_block=(2, 2), transform_sqrt=True, block_norm='L1')

  return features

In [None]:
# From: https://www.pyimagesearch.com/2015/12/07/local-binary-patterns-with-python-opencv/
def quantify_image_lbp(image):
  features = feature.local_binary_pattern(image, 24, 8, method='uniform')
  
  (hist, _) = np.histogram(features.flatten(), bins=np.arange(0, 26), range=(0, 26))

  hist = hist.astype('float')
  hist /= (hist.sum() + 1e-7)

  return hist

In [None]:
def load_split(path, image_size=200, extraction_method='hog'):
  image_paths = list(paths.list_images(path))
  data = []
  labels = []

  for image_path in image_paths:
    label = image_path.split(os.path.sep)[-2]

    image = cv.imread(image_path)
    image = preprocess(image, image_size=image_size)

    if extraction_method == 'hog':
      features = quantify_image_hog(image)
    elif extraction_method == 'lbp':
      features = quantify_image_lbp(image)

    data.append(features)
    labels.append(label)

  return (np.array(data), np.array(labels))

In [None]:
#HOG 128
resize_image_size_200 = 200
extraction_method = 'hog'

In [None]:
(X_train, y_train) = load_split(training_path, image_size=resize_image_size_200, extraction_method=extraction_method)
(X_test, y_test) = load_split(testing_path, image_size=resize_image_size_200, extraction_method=extraction_method)

print('Data berhasil diupload!')

Data berhasil diupload!


In [None]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [None]:
y_train

array([2, 2, 2, ..., 0, 0, 0])

In [None]:
y_test

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,

In [None]:
from sklearn.svm import LinearSVC

lin_svc = LinearSVC()
lin_svc.get_params()

lin_svc = LinearSVC(penalty='l2', loss='hinge', random_state=10)
lin_svc.fit(X_train, y_train)
prediksi_lin_svc_test = lin_svc.predict(X_test)

cm = confusion_matrix(y_test, prediksi_lin_svc_test)
cr = classification_report(y_test, prediksi_lin_svc_test)

print('Accuracy of SVM on test set: {:.2f}'.format(lin_svc.score(X_test, y_test)))
print('F1 {:.2f}'.format(f1_score(y_test, prediksi_lin_svc_test, average='macro')))
print('Precision{:.2f}'.format(precision_score(y_test, prediksi_lin_svc_test, average='macro')))
print('Recall {:.2f}'.format(recall_score(y_test, prediksi_lin_svc_test, average='macro')))
print()
print(cr)
print(cm)

Accuracy of SVM on test set: 1.00
F1 1.00
Precision1.00
Recall 1.00

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       300
           1       1.00      1.00      1.00       300
           2       1.00      0.99      0.99       300

    accuracy                           1.00       900
   macro avg       1.00      1.00      1.00       900
weighted avg       1.00      1.00      1.00       900

[[299   0   1]
 [  1 299   0]
 [  2   0 298]]


In [None]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
logreg.fit(X_train, y_train)
predictions = logreg.predict(X_test)

classification_report(y_test, predictions)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test))) # .score() -> Accuracy
print("F1", f1_score(y_test, predictions, average="macro"))
print("Precision", precision_score(y_test, predictions, average="macro"))
print("Recall", recall_score(y_test, predictions, average="macro"))

Accuracy of logistic regression classifier on test set: 0.99
F1 0.9933405222345085
Precision 0.9933952094039813
Recall 0.9933333333333333


In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=10)
model.fit(X_train, y_train)

predictions = model.predict(X_test)

classification_report(y_test, predictions)

print(pd.crosstab(y_test, predictions, rownames=['True'], colnames=['Predicted'], margins=True))

Predicted    0    1    2  All
True                         
0          297    0    3  300
1            0  297    3  300
2            4    1  295  300
All        301  298  301  900


In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score

print(classification_report(y_test, predictions))

print('Accuracy of Random Forest on test set: {:.2f}'.format(model.score(X_test, y_test)))
print("F1", f1_score(y_test, predictions, average="macro"))
print("Precision", precision_score(y_test, predictions, average="macro"))
print("Recall", recall_score(y_test, predictions, average="macro"))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       1.00      0.99      0.99       300
           2       0.98      0.98      0.98       300

    accuracy                           0.99       900
   macro avg       0.99      0.99      0.99       900
weighted avg       0.99      0.99      0.99       900

Accuracy of Random Forest on test set: 0.99
F1 0.9877869845315406
Precision 0.987807234646629
Recall 0.9877777777777778
