# KNN Classifier (Color Extraction)

## Initiation

In [1]:
# Import modules and packages
import os
import cv2
import imutils
import argparse
import timeit
import pickle
import pandas as pd
import numpy as np
import splitfolders
import seaborn as sns
import ipywidgets as widgets
import matplotlib.pyplot as plt

In [2]:
from PIL import Image
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from skimage.feature import hog
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score

In [3]:
# Define path
data_path = r"C:\\Users\SMSK_Narinda\Desktop\Research\dataset"

In [4]:
# Describe classes in dataset
subfolders= [os.path.join(data_path, d) for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
labels = []

for label in subfolders:
    print(os.path.normcase(label))
    labels.append(label)

c:\\users\smsk_narinda\desktop\research\dataset\acne and rosacea photos
c:\\users\smsk_narinda\desktop\research\dataset\actinic keratosis basal cell carcinoma and other malignant lesions
c:\\users\smsk_narinda\desktop\research\dataset\atopic dermatitis photos
c:\\users\smsk_narinda\desktop\research\dataset\bullous disease photos
c:\\users\smsk_narinda\desktop\research\dataset\cellulitis impetigo and other bacterial infections
c:\\users\smsk_narinda\desktop\research\dataset\eczema photos
c:\\users\smsk_narinda\desktop\research\dataset\exanthems and drug eruptions
c:\\users\smsk_narinda\desktop\research\dataset\hair loss photos alopecia and other hair diseases
c:\\users\smsk_narinda\desktop\research\dataset\herpes hpv and other stds photos
c:\\users\smsk_narinda\desktop\research\dataset\light diseases and disorders of pigmentation
c:\\users\smsk_narinda\desktop\research\dataset\lupus and other connective tissue diseases
c:\\users\smsk_narinda\desktop\research\dataset\melanoma skin cancer

## Preprocessing

In [5]:
# Create extract color from image function.  
def extract_color_histogram(image, bins=(32,32,32)):
    # Extract 3d color by its hue, saturation, and vue (HSV)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1,2], None, bins,
                       [0, 180, 0 , 256, 0, 256])
    
    ### handle normalizing the historigram
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    else :
        cv2.normalize(hist,hist)
        
    return hist.flatten()

In [6]:
# Initialize the raw pixel intensities matrix
features = []
labels = []

In [None]:
# Load image data with iteration and color extraction function
for i in range (1,8):
    # Inform iteration data
    print("[INFO] Iteration ", i)
    for classes in subfolders:
        # Inform class label
        print("Processing for label " + classes)

        # Iteration for image data
        for filename in os.scandir(classes):
            if filename.is_file():
                # Get image's path from object filename
                image_path = filename.path
                
                # Read image from image_path 
                image = cv2.imread(image_path)
                
                # Get label from the subfolder name 
                label = classes.split(os.path.sep)[-1]
    
                # Use extract_color_histogram function
                # Features extraction from color histogram image 
                hist = extract_color_histogram(image)
    
                # Add to list
                features.append(hist)
                labels.append(label)
                
    print("[STOP] \n")

[INFO] Iteration  1
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Acne and Rosacea Photos
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Atopic Dermatitis Photos
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Bullous Disease Photos
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Cellulitis Impetigo and other Bacterial Infections
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Eczema Photos
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Exanthems and Drug Eruptions
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Hair Loss Photos Alopecia and other Hair Diseases
Processing for label C:\\Users\SMSK_Narinda\Desktop\Research\dataset\Herpes HPV and other STDs Photos
Processing for label C:\\Users\SMSK_Narinda

In [None]:
print("[INFO] Processed {} labels from {} subfolders".format(len(labels), len(subfolders)))

In [None]:
# Convert features and labels into array
features = np.array(features)
labels = np.array(labels)

In [None]:
print("[INFO] Features matrix: {:.2f} MB".format(
	features.nbytes / (1024 * 1000.0)))
print("[INFO] Features shape ", features.shape)
print("[INFO] Labels shape ", labels.shape)

In [None]:
# Split dataset into training and testing set ratio 7 : 3
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.3, random_state=42)

# Split dataset into training and validation set ratio 9 : 1
(X_train, x_val, Y_train, y_val) = train_test_split(
	trainFeat, trainLabels, test_size=0.1, random_state=42)

In [None]:
print('Train data shape: ', trainFeat.shape)
print('Train labels shape: ', trainLabels.shape)
print('Validation data shape: ', x_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', testFeat.shape)
print('Test labels shape: ', testLabels.shape)

## Training Model

In [None]:
knn = KNeighborsClassifier(n_neighbors=args.neighbors)

In [None]:
print("[START] KNN Classifier start training the model")

# Define KNN model
model = knn.fit(trainFeat, trainLabels)

print('[STOP]')

## Evaluating Model; Model Summary

In [None]:
# Model accuracy score
acc = model.score(testFeat, testLabels)

print("[SUMMARY]")
print("KNN classifier : k = %d" % args.neighbors)
print("Model accuracy : {:.2f}%".format(acc * 100))

In [None]:
# Cross Validation for model
scores = cross_val_score(
    model, trainFeat, trainLabels, 
    cv=5, scoring='accuracy')

print("[INFO] Cross Validation Model Score : %0.2f (+/- %0.2f)" 
      % (scores.mean()*100, scores.std()*2))
print("[RESULT] Score mean: ", scores.mean()*100, '%')

In [None]:
# Additional statistic report of the model
# Source report :
# https://www.datasklr.com/select-classification-methods/k-nearest-neighbors

# Reshape the arrays
testLabels_encoded = testLabels_encoded.reshape(-1, 1)
prediction_encoded = prediction_encoded.reshape(-1, 1)

# Now you can use the reshaped arrays with sklearn metrics functions
print('Average Precision : ', metrics.average_precision_score(testLabels_encoded, prediction_encoded)*100, '%') 
print('F1 Score : ', metrics.f1_score(testLabels_encoded, prediction_encoded, average='weighted')*100, '%')
print('Precision : ', metrics.precision_score(testLabels_encoded, prediction_encoded, average='weighted')*100, '%')
print('Recall : ', metrics.recall_score(testLabels_encoded, prediction_encoded, average='weighted')*100, '%')

## Classification Report

In [None]:
# Describe unique classes in dataset
classes = []
for unique_class in subfolders:
    # Get label from the subfolder name 
    label = unique_class.split(os.path.sep)[-1]
    classes.append(label)

# Describe classification report
print(classification_report(testLabels, prediction, target_names=classes))

## Confussion Matrix

In [None]:
prediction = model.predict(testFeat)

In [None]:
cf_matrix = confusion_matrix(testLabels, prediction)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(cf_matrix, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

## Save Model

In [None]:
pickle.dump(model,open('KNNClassifier.p','wb'))
print("[INFO] Model is saved successfully")

## Testing Model

C:\Users\SMSK_Narinda\Downloads\melanoma.jpg
Source contoh gambar : "C:\Users\SMSK_Narinda\Downloads\089056200_1612266979-Melanoma.jpg"
https://www.klikdokter.com/info-sehat/kanker/perbedaan-kanker-kulit-melanoma-dan-karsinoma

In [None]:
# Load model
model = pickle.load(open('C:\\Users\\SMSK_Narinda\\Desktop\\Skripsi_Checkpoint_KNN_SVM_Dermnet\\Model\\KNNClassifier.p', 'rb'))
print("[INFO] Model loaded successfully")

In [None]:
# Input example image
path = input('Enter URL of Image')

# Mendapatkan label dari nama subfolder
img_name = os.path.splitext(os.path.basename(path))[0]
print(img_name)

In [None]:
# Fungsi untuk menguji gambar dan mendapatkan prediksi
def test_image(model, image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    test_image = extract_color_histogram(img)
    test_feature = np.array(test_image).reshape(1, -1)
    predicted_class = model.predict(test_feature)[0]
    return predicted_class

In [None]:
# Menguji gambar dan mendapatkan prediksi
prediction = test_image(model, path)

In [None]:
# Menampilkan hasil prediksi
print(f"Image: {os.path.basename(path)}")
# print(f"True Label: {true_label}")
print(f"Predicted Class: {predicted_class}")

In [None]:
prediction_name_words = []
for char in prediction:
    if char in ('_', ' ', '-'):  # Check for both underscore and space
        # If the character is a separator, add a new word
        prediction_name_words.append('')
    else:
        # If the character is not a separator, append it to the current word
        if prediction_name_words:  # Check if any words have been added yet
            prediction_name_words[-1] += char
        else:
            prediction_name_words.append(char)
print(prediction_name_words)

In [None]:
image_name_words = []
for char in img_name:
    if char in ('_', ' ', '-'):  # Check for both underscore and space
        # If the character is a separator, add a new word
        image_name_words.append('')
    else:
        # If the character is not a separator, append it to the current word
        if image_name_words:  # Check if any words have been added yet
            image_name_words[-1] += char
        else:
            image_name_words.append(char)
print(image_name_words)

In [None]:
def any_keyword_present(image_name_words, prediction_words):
  # Ubah kedua list menjadi huruf kecil agar cocok tanpa memperhatikan huruf besar/kecil
  image_name_words = [word.lower() for word in image_name_words]
  prediction_words = [word.lower() for word in prediction_words]

  # Periksa apakah ada kata dalam prediction_words yang ada dalam image_name_words
  return any(word in image_name_words for word in prediction_words)

In [None]:
is_present = any_keyword_present(image_name_words, prediction_name_words)
accuracy = float(is_present)

In [None]:
# Menghitung dan menampilkan akurasi untuk gambar ini
print(f"Accuracy for this image: {accuracy * 100:.2f}%")

In [None]:
# Menampilkan gambar
img = cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.title(f"Test Image: {os.path.basename(path)}\nTrue Label: {true_label}\nPredicted Class: {predicted_class}\nAccuracy: {accuracy * 100:.2f}%")
plt.show()