In [1]:
import os
os.chdir("..")


In [2]:
!ls

LICENSE             descriptors.pkl     [1m[36mnotebooks[m[m           [1m[36mskin_lesion_cad[m[m
README.md           descriptors_all.pkl [1m[36mreferences[m[m
bovw.pkl            [1m[36mdocs[m[m                [1m[36mreports[m[m
[1m[36mdata[m[m                [1m[36mmodels[m[m              requirements.txt


In [3]:
from skin_lesion_cad.data.BOVW import DenseDescriptor, BagofWords, LBPDescriptor
import random
from tqdm import tqdm
from pathlib import Path
import numpy as np
import cv2
from joblib import Parallel, delayed, parallel_backend
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt


In [4]:
def get_chall2_class(path):
    if "bcc" in str(path):
        return "bcc"
    elif "mel" in str(path):
        return "mel"
    elif "scc" in str(path):
        return "scc"
    else:
        raise ValueError("class needs to be bcc, mel or scc")


In [5]:
# SAMPLE_SIZE = 1000

chall = "chall2"
train_path = Path(f"data/processed/{chall}/train")
training_names = train_path.rglob("*_inpaint_0_5*")

# Get path to all images and save them in a list
image_paths = [i for i in training_names]


# Currently only sampling few images for quick testing
# image_paths = [i for i in training_names]
if chall=="chall1":
    image_classes = [0 if ("nevus" in str(i)) else 1 for i in image_paths]
elif chall=="chall2":
    image_classes = [get_chall2_class(str(i)) for i in image_paths]
mask_paths = [image_path.parent/
              Path(image_path.name.replace("inpaint","mask")) for image_path in image_paths]


# BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example

brisk = cv2.BRISK_create(thresh=30, octaves=0)
dense_brisk = DenseDescriptor(
    descriptor=brisk, min_keypoints=100, max_keypoints=500, kp_size=25)
lbp = LBPDescriptor(descriptor=brisk, min_keypoints=100, max_keypoints=500, kp_size=25)


In [6]:
image_paths[0]

PosixPath('data/processed/chall2/train/bcc/bcc01703_inpaint_0_5.png')

In [7]:
def _load_and_extract_des(image_path, mask_path, descriptor):
    im = cv2.imread(str(image_path))
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    mask = cv2.imread(str(mask_path))
    kpts, des = descriptor.detectAndCompute(im_gray, mask)
    return des




In [8]:

with parallel_backend('threading', n_jobs=-1):
    des_list = Parallel(verbose=10)(
        delayed(_load_and_extract_des)(filename, mask_paths[i], lbp) for i, filename in enumerate(image_paths)
    )


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:   10.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   13.3s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapse

In [9]:
from sklearn.preprocessing import LabelEncoder
bovw = BagofWords(n_words=10, n_jobs=-1, random_state=None)
classifier = SVC(max_iter=10000, probability=True, class_weight='balanced', C=1.0)
X_train, X_test, y_train, y_test = train_test_split(
    des_list, image_classes, test_size=0.33, random_state=42)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)


In [10]:
X_train_preprocessed = bovw.fit_transform(X_train, y_train)
X_test_preprocessed = bovw.transform(X_test)

In [11]:
classifier.fit(X_train_preprocessed, y_train)
y_pred = classifier.predict(X_test_preprocessed)

In [12]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.61      0.53      0.57       689
           1       0.68      0.59      0.63       871
           2       0.10      0.26      0.14       118

    accuracy                           0.54      1678
   macro avg       0.46      0.46      0.45      1678
weighted avg       0.61      0.54      0.57      1678



In [13]:
from xgboost import XGBClassifier

xgb_cl = XGBClassifier()

xgb_cl.fit(X_train_preprocessed, y_train)
y_pred = xgb_cl.predict(X_test_preprocessed)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.60      0.54      0.57       689
           1       0.64      0.77      0.70       871
           2       0.17      0.03      0.04       118

    accuracy                           0.62      1678
   macro avg       0.47      0.44      0.44      1678
weighted avg       0.59      0.62      0.60      1678



# Whole image lbp

In [14]:
from skimage.feature import local_binary_pattern


def lbph(image, n_points_radius=[(24, 8), (8, 3), (12, 3), (8, 2), (8, 1)], method="default", eps=1e-7):
    hist_concat = np.array([])
    for (n_points, radius) in n_points_radius:

        lbp = local_binary_pattern(
            image, n_points, radius, method)
        (hist, _) = np.histogram(lbp.ravel(),
                                bins=np.arange(0, n_points + 3),
                                range=(0, n_points + 2))
        # normalize the histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
        hist_concat = np.append(hist_concat,hist)
    return hist_concat


In [15]:
def get_lbphfeat(image_path):
    im = cv2.imread(str(image_path))
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    
    return lbph(im_gray)


with parallel_backend('threading', n_jobs=-1):
    lbp_feats = Parallel(verbose=10)(
        delayed(get_lbphfeat)(image_path) for image_path in image_paths
    )


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapse

In [25]:
pd.DataFrame(lbp_feats, columns=["lbp"+str(i) for i in range(len(lbp_feats[0]))])

Unnamed: 0,lbp0,lbp1,lbp2,lbp3,lbp4,lbp5,lbp6,lbp7,lbp8,lbp9,...,lbp60,lbp61,lbp62,lbp63,lbp64,lbp65,lbp66,lbp67,lbp68,lbp69
0,0.766444,0.049145,0.022204,0.018023,0.027220,0.003809,0.012728,0.011334,0.013006,0.001579,...,0.339532,0.148314,0.010666,0.049626,0.117162,0.028335,0.046849,0.247082,0.008855,0.003582
1,0.688889,0.078392,0.020324,0.025461,0.028476,0.009269,0.016527,0.024344,0.012507,0.002792,...,0.288031,0.191872,0.002620,0.021358,0.152964,0.034113,0.021951,0.283532,0.002818,0.000742
2,0.789949,0.037473,0.013410,0.017797,0.024063,0.002256,0.015415,0.018925,0.010026,0.000877,...,0.244932,0.128501,0.005868,0.039077,0.119765,0.028341,0.040144,0.384903,0.006735,0.001734
3,0.760518,0.056828,0.014369,0.019935,0.019288,0.006084,0.010744,0.022136,0.012298,0.001165,...,0.396464,0.141456,0.012125,0.042138,0.128664,0.034688,0.043263,0.187004,0.010122,0.004077
4,0.756760,0.038118,0.011893,0.019313,0.020939,0.002948,0.013011,0.020228,0.015654,0.001830,...,0.155842,0.144746,0.002858,0.045279,0.142001,0.029588,0.043486,0.433006,0.002410,0.000785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5077,0.765340,0.061475,0.014337,0.014222,0.025691,0.003670,0.010552,0.010552,0.012272,0.001032,...,0.364760,0.137591,0.007838,0.043938,0.141354,0.036548,0.041698,0.216016,0.007301,0.002956
5078,0.754593,0.045308,0.014772,0.020233,0.021971,0.003600,0.014772,0.032026,0.010427,0.001241,...,0.290369,0.145851,0.007626,0.041222,0.137639,0.028370,0.045808,0.295648,0.006079,0.001387
5079,0.774743,0.033183,0.011927,0.013754,0.020198,0.002789,0.008175,0.013754,0.016255,0.001250,...,0.345316,0.111167,0.009846,0.058846,0.157425,0.025793,0.057379,0.222488,0.009267,0.002471
5080,0.751670,0.023072,0.010322,0.013965,0.021251,0.003643,0.010322,0.019429,0.015179,0.002429,...,0.442473,0.106433,0.017739,0.063086,0.132792,0.020987,0.058963,0.135415,0.017114,0.004997


In [16]:
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(
    lbp_feats, image_classes, test_size=0.3, random_state=42)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [17]:
svc = SVC(kernel='rbf', probability=True, class_weight='balanced', C=1.0)
svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.63      0.57      0.60       617
           1       0.72      0.64      0.68       801
           2       0.16      0.36      0.22       107

    accuracy                           0.59      1525
   macro avg       0.50      0.53      0.50      1525
weighted avg       0.64      0.59      0.61      1525



In [18]:
from xgboost import XGBClassifier

xgb_cl = XGBClassifier()

xgb_cl.fit(X_train, y_train)
y_pred = xgb_cl.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.64      0.59      0.62       617
           1       0.68      0.81      0.74       801
           2       0.43      0.03      0.05       107

    accuracy                           0.67      1525
   macro avg       0.58      0.48      0.47      1525
weighted avg       0.65      0.67      0.64      1525



In [21]:
import pandas as pd
pd.DataFrame(lbp_feats)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,0.766444,0.049145,0.022204,0.018023,0.027220,0.003809,0.012728,0.011334,0.013006,0.001579,...,0.339532,0.148314,0.010666,0.049626,0.117162,0.028335,0.046849,0.247082,0.008855,0.003582
1,0.688889,0.078392,0.020324,0.025461,0.028476,0.009269,0.016527,0.024344,0.012507,0.002792,...,0.288031,0.191872,0.002620,0.021358,0.152964,0.034113,0.021951,0.283532,0.002818,0.000742
2,0.789949,0.037473,0.013410,0.017797,0.024063,0.002256,0.015415,0.018925,0.010026,0.000877,...,0.244932,0.128501,0.005868,0.039077,0.119765,0.028341,0.040144,0.384903,0.006735,0.001734
3,0.760518,0.056828,0.014369,0.019935,0.019288,0.006084,0.010744,0.022136,0.012298,0.001165,...,0.396464,0.141456,0.012125,0.042138,0.128664,0.034688,0.043263,0.187004,0.010122,0.004077
4,0.756760,0.038118,0.011893,0.019313,0.020939,0.002948,0.013011,0.020228,0.015654,0.001830,...,0.155842,0.144746,0.002858,0.045279,0.142001,0.029588,0.043486,0.433006,0.002410,0.000785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5077,0.765340,0.061475,0.014337,0.014222,0.025691,0.003670,0.010552,0.010552,0.012272,0.001032,...,0.364760,0.137591,0.007838,0.043938,0.141354,0.036548,0.041698,0.216016,0.007301,0.002956
5078,0.754593,0.045308,0.014772,0.020233,0.021971,0.003600,0.014772,0.032026,0.010427,0.001241,...,0.290369,0.145851,0.007626,0.041222,0.137639,0.028370,0.045808,0.295648,0.006079,0.001387
5079,0.774743,0.033183,0.011927,0.013754,0.020198,0.002789,0.008175,0.013754,0.016255,0.001250,...,0.345316,0.111167,0.009846,0.058846,0.157425,0.025793,0.057379,0.222488,0.009267,0.002471
5080,0.751670,0.023072,0.010322,0.013965,0.021251,0.003643,0.010322,0.019429,0.015179,0.002429,...,0.442473,0.106433,0.017739,0.063086,0.132792,0.020987,0.058963,0.135415,0.017114,0.004997
