In [1]:
import os
os.chdir("..")

In [20]:
from skin_lesion_cad.data.BOVW import DenseDescriptor, BagofWords
import random
from tqdm import tqdm
from pathlib import Path
import numpy as np
import cv2
from joblib import Parallel, delayed, parallel_backend
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


In [21]:
train_path = Path('data/raw/chall1/train')
training_names = train_path.rglob("*.jpg")
# Get path to all images and save them in a list
# image_paths and the corresponding label in image_paths


# Currently only sampling few images for quick testing
image_paths = random.sample([i for i in training_names],50)
image_classes = [0 if ("nevus" in str(i)) else 1 for i in image_paths]
mask_paths = [Path(str(image_path.parent).replace("raw", "processed")) /
              Path(image_path.stem+"_mask_1_0.png") for image_path in image_paths]

images = [() for i in image_paths]
# BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example

brisk = cv2.BRISK_create(thresh=30, octaves=0)
dense_brisk = DenseDescriptor(descriptor=brisk, minKeypoints=20)


In [22]:
def _load_and_extract_des(image_path, mask_path, descriptor):
    im = cv2.imread(str(image_path))
    mask = cv2.imread(str(mask_path))
    kpts, des = descriptor.detectAndCompute(im, mask)
    return des

with parallel_backend('threading', n_jobs=-1):
    des_list = Parallel()(
        delayed(_load_and_extract_des)(filename, mask_paths[i], dense_brisk) for i, filename in enumerate(image_paths)
    )


In [None]:
bovw = BagofWords(n_words=20, n_jobs=-1, random_state=None)
classifier = SVC(max_iter=10000)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(des_list, image_classes, test_size=0.33, random_state=42)


In [None]:
X_train_preprocessed = bovw.fit_transform(X_train,y_train)
X_test_preprocessed = bovw.transform(X_test)


In [None]:
classifier.fit(X_train_preprocessed,y_train)
y_pred = classifier.predict(X_test_preprocessed)

In [None]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.75      0.30      0.43        10
           1       0.46      0.86      0.60         7

    accuracy                           0.53        17
   macro avg       0.61      0.58      0.51        17
weighted avg       0.63      0.53      0.50        17

