## Initialization

In [1]:
import os
import cv2
import glob
import imutils
import argparse
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import splitfolders

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from skimage.feature import hog

In [86]:
data_path = r"C:\\Users\SMSK_Narinda\Desktop\Research\dataset2"
train_path = r"C:\\Users\SMSK_Narinda\Desktop\Research\dataset2\train"
test_path = r"C:\\Users\SMSK_Narinda\Desktop\Research\dataset2\test"
val_path = r"C:\\Users\SMSK_Narinda\Desktop\Research\dataset2\val"
img_size = 200
num_classes = 23
EPOCHS = 20
BATCH_SIZE = 64

In [87]:
subfolders= [os.path.join(test_path, d) for d in os.listdir(test_path) if os.path.isdir(os.path.join(test_path, d))]
labels = []

for label in subfolders:
    print(os.path.normcase(label))
    labels.append(label)

c:\\users\smsk_narinda\desktop\research\dataset2\test\acne_and_rosacea_photos
c:\\users\smsk_narinda\desktop\research\dataset2\test\actinic_keratosis_basal_cell_carcinoma_and_other_malignant_lesions
c:\\users\smsk_narinda\desktop\research\dataset2\test\atopic_dermatitis_photos
c:\\users\smsk_narinda\desktop\research\dataset2\test\bullous_disease_photos
c:\\users\smsk_narinda\desktop\research\dataset2\test\cellulitis_impetigo_and_other_bacterial_infections
c:\\users\smsk_narinda\desktop\research\dataset2\test\eczema_photos
c:\\users\smsk_narinda\desktop\research\dataset2\test\exanthems_and_drug_eruptions
c:\\users\smsk_narinda\desktop\research\dataset2\test\hair_loss_photos_alopecia_and_other _hair_diseases
c:\\users\smsk_narinda\desktop\research\dataset2\test\herpes_hpv_other_stds_photos
c:\\users\smsk_narinda\desktop\research\dataset2\test\light_diseases_and_disorders_of_pigmentation
c:\\users\smsk_narinda\desktop\research\dataset2\test\lupus_and_other_connective_tissue_diseases
c:\\u

## KNN Classifier

In [88]:
def image_2_feature_vector(image, dsize=(128,128)):
    ###resize image to be fixed size, flatten image into raw pixel
    return cv2.resize(image, dsize).flatten()

In [89]:
def extract_color_histogram(image, bins=(32,32,32)):
    ###extract 3d color
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1,2], None, bins,
                       [0, 180, 0 , 256, 0, 256])
    
    ### handle normalizing the historigram
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    
    else :
        cv2.normalize(hist,hist)
        
    return hist.flatten()

In [90]:
### initialize the raw pixel intensities matrix
rawImages =[]
features = []
labels = []

In [None]:
for (i, classes) in enumerate(subfolders):
    for classes in subfolders:
        for filename in os.scandir(classes):
            if filename.is_file():
                # Mendapatkan path file dari objek filename
                image_path = filename.path
                # Membaca gambar dari path file
                image = cv2.imread(image_path)
                # Mendapatkan label dari nama subfolder
                label = classes.split(os.path.sep)[-1]

                # Ekstrak fitur dari gambar dan histogram warna
                pixels = image_2_feature_vector(image)
                hist = extract_color_histogram(image)

                # Menambahkan fitur dan label ke dalam list yang sesuai
                rawImages.append(pixels)
                features.append(hist)
                labels.append(label)
            
    if i > 0 and ((i + 1)% 200 == 0 or i ==len(subfolders)-1):
        # Menampilkan pesan jika sudah diproses sejumlah tertentu gambar
        print("[INFO] processed {}/{}".format(len(labels), len(subfolders)))

In [None]:
rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)

In [None]:
print("[INFO] pixels matrix: {:.2f}MB".format(
	rawImages.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

In [None]:
(trainRI, testRI, trainRL, testRL) = train_test_split(
	rawImages, labels, test_size=0.15, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.15, random_state=42)

### Cara 1 KNN Classifier

In [None]:
ap = argparse.ArgumentParser()
args = ap.parse_args(args=[])
ap.add_argument("-d", "--dataset",
	help="path to input dataset")
ap.add_argument("-k", "--neighbors", type=int, default=4,
	help="# of nearest neighbors for classification")
args = ap.parse_args(args=[])

In [None]:
# k-NN raw Images
print("\n")
print("[INFO] evaluating raw pixel accuracy...")
modelRI = KNeighborsClassifier(n_neighbors=args.neighbors)
modelRI.fit(trainRI, trainRL)
accRI = modelRI.score(testRI, testRL)
print("[INFO] k-NN classifier: k=%d" % args.neighbors)
print("[INFO] raw pixel accuracy: {:.2f}%".format(accRI * 100))

In [None]:
# Cross Validation

knn = KNeighborsClassifier(n_neighbors=24)
scores = cross_val_score(knn, X_train, y_train, cv=24, scoring='accuracy')
print("Accuracy : %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()*2))

In [15]:
# k-NN Historigram
print("\n")
print("[INFO] evaluating histogram accuracy...")
model = KNeighborsClassifier(n_neighbors=args.neighbors)
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] k-NN classifier: k=%d" % args.neighbors)
print("[INFO] histogram accuracy: {:.2f}%".format(acc * 100))




[INFO] evaluating histogram accuracy...
[INFO] k-NN classifier: k=4
[INFO] histogram accuracy: 99.91%


### Cara 2 KNN Classifier

In [30]:
folders = [os.path.join(train_path, d) for d in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, d))]

train_images = []
train_labels = []

for folder in folders:
    folder_path = os.path.join(train_path, folder)
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (224, 224))
        hog_features = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        train_images.append(hog_features)
        train_labels.append(folders.index(folder))

In [31]:
test_images = []
test_labels = []
for folder in folders:
    folder_path = os.path.join(test_path, folder)
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (224, 224))
        hog_features = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        test_images.append(hog_features)
        test_labels.append(folders.index(folder))

In [32]:
X_train = np.array(train_images)
y_train = np.array(train_labels)
X_test = np.array(test_images)
y_test = np.array(test_labels)

## K Fold Cross Validation

Source:
https://www.analyticsvidhya.com/blog/2022/02/k-fold-cross-validation-technique-and-its-essentials/

In [None]:
# Evaluasi KNN cara 1

scores = cross_val_score(model, X_t

In [40]:
knn = KNeighborsClassifier(n_neighbors=24)
scores = cross_val_score(knn, X_train, y_train, cv=24, scoring='accuracy')
print("Accuracy : %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()*2))

Accuracy : 0.09 (+/- 0.04)


## Split Class

In [10]:
splitfolders.ratio(data_path, output="dataset2",
                  seed=42,
                  ratio=(.7, .2, .1),
                  group_prefix = None)

splitfolders.fixed(data_path, output="dataset2",
                   seed=42,
                  fixed=(200, 200, 100),
                  oversample=True,
                  group_prefix=None)


Copying files: 0 files [00:00, ? files/s][A
Copying files: 102 files [00:00, 1014.64 files/s][A
Copying files: 263 files [00:00, 1359.89 files/s][A
Copying files: 425 files [00:00, 1474.78 files/s][A
Copying files: 589 files [00:00, 1536.61 files/s][A
Copying files: 757 files [00:00, 1585.13 files/s][A
Copying files: 916 files [00:00, 1304.13 files/s][A
Copying files: 1072 files [00:00, 1374.88 files/s][A
Copying files: 1219 files [00:00, 1400.21 files/s][A
Copying files: 1369 files [00:00, 1427.12 files/s][A
Copying files: 1515 files [00:01, 1426.39 files/s][A
Copying files: 1660 files [00:01, 1422.96 files/s][A
Copying files: 1804 files [00:01, 1413.59 files/s][A
Copying files: 1958 files [00:01, 1448.50 files/s][A
Copying files: 2107 files [00:01, 1458.49 files/s][A
Copying files: 2267 files [00:01, 1498.09 files/s][A
Copying files: 2418 files [00:01, 1432.89 files/s][A
Copying files: 2583 files [00:01, 1493.21 files/s][A
Copying files: 2742 files [00:01, 1519.08 

ValueError: Using fixed with 3 values together with oversampling is not implemented.