In [1]:
import os
os.chdir("..")

In [2]:
from skin_lesion_cad.data.BOVW import DenseDescriptor, BagofWords
import random
from tqdm import tqdm
from pathlib import Path
import numpy as np
import cv2
from joblib import Parallel, delayed, parallel_backend
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


In [3]:
train_path = Path('data/raw/chall1/train')
training_names = train_path.rglob("*.jpg")
# Get path to all images and save them in a list
# image_paths and the corresponding label in image_paths


# Currently only sampling few images for quick testing
image_paths = [i for i in training_names]
image_classes = [0 if ("nevus" in str(i)) else 1 for i in image_paths]
mask_paths = [Path(str(image_path.parent).replace("raw", "processed")) /
              Path(image_path.stem+"_mask_1_0.png") for image_path in image_paths]

images = [() for i in image_paths]
# BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example

brisk = cv2.BRISK_create(thresh=30, octaves=0)
dense_brisk = DenseDescriptor(descriptor=brisk, minKeypoints=20)


In [4]:
def _load_and_extract_des(image_path, mask_path, descriptor):
    im = cv2.imread(str(image_path))
    mask = cv2.imread(str(mask_path))
    kpts, des = descriptor.detectAndCompute(im, mask)
    return des

with parallel_backend('threading', n_jobs=-1):
    des_list = Parallel(verbose=10)(
        delayed(_load_and_extract_des)(filename, mask_paths[i], dense_brisk) for i, filename in enumerate(image_paths)
    )


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   10.8s
[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  5.1min


In [None]:
bovw = BagofWords(n_words=20, n_jobs=-1, random_state=None)
classifier = SVC(max_iter=10000)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(des_list, image_classes, test_size=0.33, random_state=42)


In [None]:
X_train_preprocessed = bovw.fit_transform(X_train,y_train)
X_test_preprocessed = bovw.transform(X_test)


In [None]:
classifier.fit(X_train_preprocessed,y_train)
y_pred = classifier.predict(X_test_preprocessed)

In [None]:
print(classification_report(y_test, y_pred))
