In [1]:
import os
os.chdir("..")


In [2]:
!ls

LICENSE             descriptors.pkl     [1m[36mnotebooks[m[m           [1m[36mskin_lesion_cad[m[m
README.md           descriptors_all.pkl [1m[36mreferences[m[m
bovw.pkl            [1m[36mdocs[m[m                [1m[36mreports[m[m
[1m[36mdata[m[m                [1m[36mmodels[m[m              requirements.txt


In [3]:
from skin_lesion_cad.data.BOVW import DenseDescriptor, BagofWords, LBPDescriptor
import random
from tqdm import tqdm
from pathlib import Path
import numpy as np
import cv2
from joblib import Parallel, delayed, parallel_backend
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt


In [46]:
def get_chall2_class(path):
    if "bcc" in str(path):
        return "bcc"
    elif "mel" in str(path):
        return "mel"
    elif "scc" in str(path):
        return "scc"
    else:
        raise ValueError("class needs to be bcc, mel or scc")


In [50]:
# SAMPLE_SIZE = 1000

chall = "chall2"
train_path = Path(f"data/processed/{chall}/train")
training_names = train_path.rglob("*_inpaint_0_5*")

# Get path to all images and save them in a list
image_paths = [i for i in training_names]


# Currently only sampling few images for quick testing
# image_paths = [i for i in training_names]
if chall=="chall1":
    image_classes = [0 if ("nevus" in str(i)) else 1 for i in image_paths]
elif chall=="chall2":
    image_classes = [get_chall2_class(str(i)) for i in image_paths]
mask_paths = [image_path.parent/
              Path(image_path.name.replace("inpaint","mask")) for image_path in image_paths]


# BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example

brisk = cv2.BRISK_create(thresh=30, octaves=0)
dense_brisk = DenseDescriptor(
    descriptor=brisk, min_keypoints=100, max_keypoints=500, kp_size=25)
lbp = LBPDescriptor(descriptor=brisk, min_keypoints=100, max_keypoints=500,kp_size=25,n_points=8,radius=3)


In [51]:
image_paths[0]

PosixPath('data/processed/chall2/train/bcc/bcc01703_inpaint_0_5.png')

In [52]:
def _load_and_extract_des(image_path, mask_path, descriptor):
    im = cv2.imread(str(image_path))
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    mask = cv2.imread(str(mask_path))
    kpts, des = descriptor.detectAndCompute(im_gray, mask)
    return des



des_list = [_load_and_extract_des(filename, mask_paths[i], lbp) for i, filename in enumerate(image_paths)]


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapse

In [77]:
from sklearn.preprocessing import LabelEncoder
bovw = BagofWords(n_words=10, n_jobs=-1, random_state=None)
classifier = SVC(max_iter=10000)
X_train, X_test, y_train, y_test = train_test_split(
    des_list, image_classes, test_size=0.33, random_state=42)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)


In [70]:
X_train_preprocessed = bovw.fit_transform(X_train, y_train)
X_test_preprocessed = bovw.transform(X_test)

In [72]:
classifier.fit(X_train_preprocessed, y_train)
y_pred = classifier.predict(X_test_preprocessed)

In [73]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.64      0.52      0.57       689
           1       0.64      0.82      0.72       871
           2       0.00      0.00      0.00       118

    accuracy                           0.64      1678
   macro avg       0.42      0.44      0.43      1678
weighted avg       0.59      0.64      0.61      1678



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [81]:
from xgboost import XGBClassifier

xgb_cl = XGBClassifier()

xgb_cl.fit(X_train_preprocessed, y_train)
y_pred = xgb_cl.predict(X_test_preprocessed)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.61      0.57      0.59       689
           1       0.65      0.77      0.71       871
           2       0.00      0.00      0.00       118

    accuracy                           0.63      1678
   macro avg       0.42      0.45      0.43      1678
weighted avg       0.59      0.63      0.61      1678



# Whole image lbp

In [82]:
from skimage.feature import local_binary_pattern


def lbph(image, n_points=24, radius=8, method="default", eps=1e-7):

    lbp = local_binary_pattern(
        image, n_points, radius, method)
    (hist, _) = np.histogram(lbp.ravel(),
                            bins=np.arange(0, n_points + 3),
                            range=(0, n_points + 2))
    # normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + eps)
    return hist


In [84]:
def get_lbphfeat(image_path):
    im = cv2.imread(str(image_path))
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    
    return lbph(im_gray)

lbp_feats = [get_lbphfeat(image_path) for image_path in tqdm(image_paths)]


100%|██████████| 5082/5082 [03:03<00:00, 27.63it/s]


In [85]:
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(
    lbp_feats, image_classes, test_size=0.3, random_state=42)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [87]:
svc = SVC(kernel='rbf', probability=True)
svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.57      0.37      0.45       617
           1       0.59      0.83      0.69       801
           2       0.00      0.00      0.00       107

    accuracy                           0.58      1525
   macro avg       0.39      0.40      0.38      1525
weighted avg       0.54      0.58      0.54      1525



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [89]:
from xgboost import XGBClassifier

xgb_cl = XGBClassifier()

xgb_cl.fit(X_train, y_train)
y_pred = xgb_cl.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.52      0.47      0.50       617
           1       0.60      0.72      0.65       801
           2       0.00      0.00      0.00       107

    accuracy                           0.57      1525
   macro avg       0.37      0.40      0.38      1525
weighted avg       0.53      0.57      0.54      1525

