## Initialization

In [1]:
import os
import cv2
import glob
import imutils
import argparse
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import splitfolders

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from skimage.feature import hog

In [3]:
data_path = r"C:\\Users\SMSK_Narinda\Desktop\Research\dataset"
img_size = 128
num_classes = 23
EPOCHS = 20
BATCH_SIZE = 64

In [4]:
subfolders= [os.path.join(data_path, d) for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
labels = []

for label in subfolders:
    print(os.path.normcase(label))
    labels.append(label)

c:\\users\smsk_narinda\desktop\research\dataset\acne and rosacea photos
c:\\users\smsk_narinda\desktop\research\dataset\actinic keratosis basal cell carcinoma and other malignant lesions
c:\\users\smsk_narinda\desktop\research\dataset\atopic dermatitis photos
c:\\users\smsk_narinda\desktop\research\dataset\bullous disease photos
c:\\users\smsk_narinda\desktop\research\dataset\cellulitis impetigo and other bacterial infections
c:\\users\smsk_narinda\desktop\research\dataset\eczema photos
c:\\users\smsk_narinda\desktop\research\dataset\exanthems and drug eruptions
c:\\users\smsk_narinda\desktop\research\dataset\hair loss photos alopecia and other hair diseases
c:\\users\smsk_narinda\desktop\research\dataset\herpes hpv and other stds photos
c:\\users\smsk_narinda\desktop\research\dataset\light diseases and disorders of pigmentation
c:\\users\smsk_narinda\desktop\research\dataset\lupus and other connective tissue diseases
c:\\users\smsk_narinda\desktop\research\dataset\melanoma skin cancer

## KNN Classifier

In [5]:
def extract_color_histogram(image, bins=(32,32,32)):
    ###extract 3d color
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1,2], None, bins,
                       [0, 180, 0 , 256, 0, 256])
    
    ### handle normalizing the historigram
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    
    else :
        cv2.normalize(hist,hist)
        
    return hist.flatten()

In [6]:
### initialize the raw pixel intensities matrix
features = []
labels = []

In [7]:
for i in range (1,8):
    for classes in subfolders:
        print("[INFO] Processing for label " + classes)
        for filename in os.scandir(classes):
            if filename.is_file():
                # Mendapatkan path file dari objek filename
                image_path = filename.path
                # Membaca gambar dari path file
                image = cv2.imread(image_path)
                # Mendapatkan label dari nama subfolder
                label = classes.split(os.path.sep)[-1]
    
                # Ekstrak fitur dari gambar dan histogram warna
                hist = extract_color_histogram(image)
    
                # Menambahkan fitur dan label ke dalam list yang sesuai
                features.append(hist)
                labels.append(label)
    print("[STOP] \n")

[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Acne and Rosacea Photos
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Atopic Dermatitis Photos
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Bullous Disease Photos
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Cellulitis Impetigo and other Bacterial Infections
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Eczema Photos
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Exanthems and Drug Eruptions
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Hair Loss Photos Alopecia and other Hair Diseases
[INFO] Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Herpes HPV and other STDs Photos


In [8]:
print("[INFO] processed {}/{}".format(len(labels), len(subfolders)))

[INFO] processed 135023/23


In [10]:
features = np.array(features)
labels = np.array(labels)

In [11]:
labels.shape

(135023,)

In [12]:
print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

[INFO] features matrix: 17282.94MB


In [13]:
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.3, random_state=42)

In [14]:
ap = argparse.ArgumentParser()
args = ap.parse_args(args=[])
ap.add_argument("-k", "--neighbors", type=int, default=4,
	help="# of nearest neighbors for classification")
args = ap.parse_args(args=[])

### Elbow Method

In [None]:
# Preprocessing
rawImages = rawImages/255.0
reshaped_rawImages = rawImages.reshape(len(rawImages), -1)
reshaped_rawImages.shape

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 23, random_state = 0)
clusters = kmeans.fit_predict(reshaped_rawImages)
kmeans.cluster_centers_.shape

In [None]:

# Membuat x_data berdasarkan jumlah fitur (49152)
x_data = np.arange(49152)

# Menghasilkan palet warna dengan 23 warna
colors = plt.cm.jet(np.linspace(0, 1, 23))

# Membuat scatter plot untuk setiap cluster
plt.figure(figsize=(10, 6))
for i in range(23):
    plt.scatter(x_data, kmeans.cluster_centers_[i], color=colors[i], alpha=0.01, s=20)  # Mengurangi alpha dan meningkatkan s

plt.xlabel('Features')
plt.ylabel('Cluster Centers')
plt.title('Cluster Centers for KMeans Clustering (n_clusters=23)')
plt.show()

In [None]:
# mapping labels from cluster to original labels
def get_reference_dict(clusters,labels):
    reference_label = {}
    # For loop to run through each label of cluster label
    for i in range(len(np.unique(clusters))):
        index = np.where(clusters == i,1,0)
        num = np.bincount(labels[index==1]).argmax()
        reference_label[i] = num
    return reference_label
    
# Mapping predictions to original labels
def get_labels(clusters,labels):
    temp_labels = np.random.rand(len(clusters))
    for i in range(len(clusters)):
        temp_labels[i] = reference_labels[clusters[i]]
    return temp_labels

In [None]:
reference_labels = get_reference_dict(clusters,labels)
predicted_labels = get_labels(clusters,reference_labels)
print(accuracy_score(predicted_labels,labels))

### KNN Classifier Histogram (ekstraksi warna) K-fold Cross Validation

In [None]:
# k-NN Historigram

print("[INFO] evaluating histogram accuracy...")
modelFeat = KNeighborsClassifier(n_neighbors=args.neighbors)
modelFeat.fit(trainFeat, trainLabels)
acc = modelFeat.score(testFeat, testLabels)
print("[INFO] k-NN classifier: k=%d" % args.neighbors)
print("[INFO] histogram accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating histogram accuracy...


In [None]:
# Evaluasi KNN cara 2 Histogram K-fold Cross Validation

scores = cross_val_score(modelFeat, trainFeat, trainLabels, cv=5, scoring='accuracy')
print("Accuracy : %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()*2))

In [None]:
print("[RESULT] score mean: " )
print(scores.mean())

# Code Below

## Confussion Matrix

Source : https://medium.com/analytics-vidhya/how-to-create-a-confusion-matrix-with-the-test-result-in-your-training-model-802b1315d8ee

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## K Fold Cross Validation

Source:
https://www.analyticsvidhya.com/blog/2022/02/k-fold-cross-validation-technique-and-its-essentials/

In [None]:
knn = KNeighborsClassifier(n_neighbors=24)
scores = cross_val_score(knn, X_train, y_train, cv=24, scoring='accuracy')
print("Accuracy : %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()*2))

## Split Class

In [None]:
splitfolders.ratio(data_path, output="dataset2",
                  seed=42,
                  ratio=(.7, .2, .1),
                  group_prefix = None)

splitfolders.fixed(data_path, output="dataset2",
                   seed=42,
                  fixed=(200, 200, 100),
                  oversample=True,
                  group_prefix=None)