# SVM Classifier (Color Extraction)

## Initiation

In [2]:
import os
import cv2
import imutils
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.model_selection as model_selection

In [3]:
from sklearn import svm, datasets
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

In [4]:
# Testing train data
datadir= r"C:\Users\SMSK_Narinda\Desktop\Research\data_split_augmented\test"

In [5]:
Categories=[]
img_size = 100

In [6]:
subfolders= [os.path.join(datadir, d) for d in os.listdir(datadir) if os.path.isdir(os.path.join(datadir, d))]

In [7]:
for label in os.listdir(datadir):
    Categories.append(label)

label_to_index = {category_name: index for index, category_name in enumerate(Categories)}

## Preprocessing

In [8]:
flat_data_arr=[]
target_arr=[]

In [9]:
for i in range (1,2):
    print("[INFO] ITERATION", i)
    for classes in subfolders:
        # Get the label (category name) from the subfolder
        label = classes.split(os.path.sep)[-1]
        print("Processing for label " + label)
        # Iterate over the images in the current subfolder        
        for filename in os.scandir(classes):
            if filename.is_file():
                # Get the path of the image file
                image_path = filename.path
                
                # Read and resize the image
                image = cv2.imread(image_path)
                image = cv2.resize(image, (150, 150))
                
                # Append the flattened image data and numerical label to their respective lists
                flat_data_arr.append(image.flatten())
                target_arr.append(label_to_index[label])
    
    print("[STOP] \n")

[INFO] ITERATION 1
Processing for label Acne_and_Rosacea_Photos
Processing for label Actinic_Keratosis_Basal_Cell_Carcinoma_and_other_Malignant_Lesions
Processing for label Atopic_Dermatitis_Photos
Processing for label Bullous_Disease_Photos
Processing for label Cellulitis_Impetigo_and_other_Bacterial_Infections
Processing for label Eczema_Photos
Processing for label Exanthems_and_Drug_Eruptions
Processing for label Hair_Loss_Photos_Alopecia_and_other _Hair_Diseases
Processing for label Herpes_HPV_other_STDs_Photos
Processing for label Light_Diseases_and_Disorders_of_Pigmentation
Processing for label Lupus_and_other_Connective_Tissue_diseases
Processing for label Melanoma_Skin_Cancer_Nevi_and_Moles
Processing for label Nail_Fungus_and_other_Nail_Disease
Processing for label Poison_Ivy_Photos_and_other_Contact_Dermatitis
Processing for label Psoriasis_pictures_Lichen_Planus_and_related_diseases
Processing for label Scabies_Lyme_Disease_and_other_Infestations_and_Bites
Processing for lab

In [11]:
print("[INFO] Processed {}/{}".format(len(target_arr), len(subfolders)))

[INFO] Processed 7488/23


In [12]:
flat_data=np.array(flat_data_arr)
target=np.array(target_arr)

In [13]:
print("[INFO] flat_data matrix: {:.2f} MB".format(
	flat_data.nbytes / (1024 * 1000.0)))
print("[INFO] target matrix: {:.2f} MB".format(
	target.nbytes / (1024 * 1000.0)))
print("flat_data shape ", flat_data.shape)
print("target shape ", target.shape)

[INFO] flat_data matrix: 493.59 MB
[INFO] target matrix: 0.03 MB
flat_data shape  (7488, 67500)
target shape  (7488,)


In [14]:
df=pd.DataFrame(flat_data)

In [15]:
df['Target']=target

In [16]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [17]:
print("[INFO] x shape ", x.shape)
print("[INFO] y shape ", y.shape)

[INFO] x shape  (7488, 67500)
[INFO] y shape  (7488,)


In [18]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y,test_size=0.30,random_state=42,stratify=y)

print('Splitted Successfully')

Splitted Successfully


## Training Model

In [None]:
print("[START] SVM Classifier start training the model")

# Define SVM model
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
model = clf.fit(x_train, y_train)

print('[STOP]')

[START] SVM Classifier start training the model


## Evaluating Model; Model Summary

In [None]:
prediction = clf.predict(x_test)

In [None]:
acc = accuracy_score(y_test, prediction)

print("[SUMMARY]")
print(model)
print("Model accuracy : {:.2f}%".format(acc * 100))

In [None]:
# Cross Validation for model
scores = cross_val_score(
    model, trainFeat, trainLabels, 
    cv=5, scoring='accuracy')

print("[INFO] Cross Validation Model Score : %0.2f (+/- %0.2f)" 
      % (scores.mean()*100, scores.std()*2))
print("[RESULT] Score mean: ", scores.mean()*100, '%')

In [None]:
# Additional statistic report of the model
# Source report :
# https://www.datasklr.com/select-classification-methods/k-nearest-neighbors

# Reshape the arrays
testLabels_encoded = testLabels_encoded.reshape(-1, 1)
prediction_encoded = prediction_encoded.reshape(-1, 1)

# Now you can use the reshaped arrays with sklearn metrics functions
print('Average Precision : ', metrics.average_precision_score(testLabels_encoded, prediction_encoded)*100, '%') 
print('F1 Score : ', metrics.f1_score(testLabels_encoded, prediction_encoded, average='weighted')*100, '%')
print('Precision : ', metrics.precision_score(testLabels_encoded, prediction_encoded, average='weighted')*100, '%')
print('Recall : ', metrics.recall_score(testLabels_encoded, prediction_encoded, average='weighted')*100, '%')

## Classification Report

In [None]:
# Describe unique classes in dataset
classes = []
for unique_class in subfolders:
    # Get label from the subfolder name 
    label = unique_class.split(os.path.sep)[-1]
    classes.append(label)

# Describe classification report
print(classification_report(testLabels, prediction, target_names=classes))

## Confussion Matrix

In [None]:
cf_matrix = confusion_matrix(testLabels, prediction)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(cf_matrix, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

## Save Model

In [None]:
pickle.dump(model,open('KNNClassifier.p','wb'))
print("[INFO] Model is saved successfully")

## Testing Model

C:\Users\SMSK_Narinda\Downloads\melanoma.jpg
Source contoh gambar : "C:\Users\SMSK_Narinda\Downloads\089056200_1612266979-Melanoma.jpg"

https://www.klikdokter.com/info-sehat/kanker/perbedaan-kanker-kulit-melanoma-dan-karsinoma

In [None]:
# Load model
model = pickle.load(open('C:\\Users\\SMSK_Narinda\\Desktop\\Skripsi_Checkpoint_KNN_SVM_Dermnet\\Model\\KNNClassifier.p', 'rb'))
print("[INFO] Model loaded successfully")

In [None]:
# Input example image
path = input('Enter URL of Image')

# Mendapatkan label dari nama subfolder
img_name = os.path.splitext(os.path.basename(path))[0]
print(img_name)

In [None]:
# Preprocessing SVM
def transform_labels(labels):
    label_mapping = {label: idx for idx, label in enumerate(np.unique(labels))}
    return np.array([label_mapping[label] for label in labels]), label_mapping

def inverse_transform_labels(predictions, label_mapping):
    inverse_mapping = {v: k for k, v in label_mapping.items()}
    transformed_labels = []
    for pred in predictions:
        rounded_pred = int(round(pred))
        if rounded_pred in inverse_mapping:
            transformed_labels.append(inverse_mapping[rounded_pred])
        else:
            print(f"Unexpected prediction value: {rounded_pred}")
            transformed_labels.append("Unknown")  # Or handle it appropriately
    return np.array(transformed_labels)

In [None]:
# Transformasi label
labels_train_transformed, label_mapping = transform_labels(Categories)

In [None]:
print("Predictions:", predictions)
print("Label Mapping:", label_mapping)

In [None]:
# Read image from image_path 
image = cv2.imread(path)

# Resize image to the desired size
image = cv2.resize(image, (150, 150))
image = image.flatten()
image = np.array(image)

In [None]:
# Perform prediction
predictions = model.predict([image])
predicted_labels = inverse_transform_labels(predictions, label_mapping)

In [None]:
# Menampilkan hasil prediksi
print(f"Image: {os.path.basename(path)}")
# print(f"True Label: {true_label}")
print(f"Predicted Class: {predicted_labels}")

In [None]:
prediction_name_words = []
for char in predicted_labels:
    if char in ('_', ' ', '-'):  # Check for both underscore and space
        # If the character is a separator, add a new word
        prediction_name_words.append('')
    else:
        # If the character is not a separator, append it to the current word
        if prediction_name_words:  # Check if any words have been added yet
            prediction_name_words[-1] += char
        else:
            prediction_name_words.append(char)
print(prediction_name_words)

In [None]:
image_name_words = []
for char in img_name:
    if char in ('_', ' ', '-'):  # Check for both underscore and space
        # If the character is a separator, add a new word
        image_name_words.append('')
    else:
        # If the character is not a separator, append it to the current word
        if image_name_words:  # Check if any words have been added yet
            image_name_words[-1] += char
        else:
            image_name_words.append(char)
print(image_name_words)

In [None]:
def any_keyword_present(image_name_words, prediction_words):
  # Ubah kedua list menjadi huruf kecil agar cocok tanpa memperhatikan huruf besar/kecil
  image_name_words = [word.lower() for word in image_name_words]
  prediction_words = [word.lower() for word in prediction_words]

  # Periksa apakah ada kata dalam prediction_words yang ada dalam image_name_words
  return any(word in image_name_words for word in prediction_words)

In [None]:
is_present = any_keyword_present(image_name_words, prediction_name_words)
accuracy = float(is_present)

In [None]:
# Menghitung dan menampilkan akurasi untuk gambar ini
print(f"Accuracy for this image: {accuracy * 100:.2f}%")

In [None]:
# Menampilkan gambar
img = cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.title(f"Test Image: {os.path.basename(path)}\nTrue Label: {true_label}\nPredicted Class: {predicted_class}\nAccuracy: {accuracy * 100:.2f}%")
plt.show()