In [None]:
import cv2
import skimage as ski
import skimage.io as scio
import numpy as np
import scipy as sp
import imageio
import matplotlib.pyplot as plt
%matplotlib inline
import csv
import math
from skimage.filters import threshold_sauvola as sauvola
import sklearn.metrics.pairwise as skmetrics
from sklearn.cluster import KMeans, MiniBatchKMeans
import sklearn

In [None]:
from ImageLoader import *
from featureExtractor import *

## Calculate all the AKAZE features

In [None]:
features = [ np.array(image_to_feature(i.content)) for i in storage.allList ]
features = np.array(features)

In [None]:
features[0]

Remove **empty** features.

In [None]:
def rejectNAN(features):
    feat = [] # features
    rejects = [] # rejected index
    for ii, f in enumerate(features):
        if f.any():
            feat.append(f)
        else:
            rejects.append(ii)
    return feat, rejects

In [None]:
features, rejects = rejectNAN(features)

In [None]:
features[0]

## Create lists 
- **ALL**: All imgs
- **ALL_CANCER**: All cancerous images
- **ALL_BENIN**: All Benin images
- **ALL_AKAZE_CANCER**: All akaze descr for cancerous images
- **ALL_AKAZE_BENIN**: All akaze descr for benin images

In [None]:
ALL = []
for i in range(len(storage.allList)):
    if i in rejects:
        continue
    ALL.append(storage.allList[i])
    
ALL_CANCER = [ i for i in ALL if i.is_cancer ]
ALL_BENIN  = [ i for i in ALL if not i.is_cancer ]
ALL_AKAZE_CANCER = [ features[i] for i in range(len(ALL)) if ALL[i].is_cancer ]
ALL_AKAZE_BENIN  = [ features[i]  for i in range(len(ALL)) if not ALL[i].is_cancer ]

In [None]:
len(ALL_CANCER), len(ALL_BENIN), len(ALL_AKAZE_CANCER), len(ALL_AKAZE_BENIN)

## Pre-process the AKAZE

In [None]:
def preprocess_akaze(features):
    train_desc = np.concatenate(features, axis=None).reshape(-1, 61)
    train_mean = np.mean(train_desc, axis = 0)
    display(train_mean, train_desc)
    train_desc = train_desc - train_mean
    return train_desc

In [None]:
train_desc = preprocess_akaze(features)

## Kmeans
Create 128 centers.  
Compute histogram for each descr

In [None]:
kmeans = MiniBatchKMeans(n_clusters=128, batch_size=128, random_state=0)
kmeans.fit(train_desc)
kmeans.cluster_centers_

In [None]:
kmeans.cluster_centers_.shape

In [None]:
cancer_image_descr = [ np.histogram(kmeans.predict(desc), bins=128)[0] for desc in ALL_AKAZE_CANCER ]
benin_image_descr = [ np.histogram(kmeans.predict(desc), bins=128)[0] for desc in ALL_AKAZE_BENIN ]

In [None]:
cancer_image_descr

In [None]:
np.shape(cancer_image_descr), np.shape(benin_image_descr)

## Prepare Data for fit

In [None]:
minSize = min(len(ALL_CANCER), len(ALL_BENIN))
indexes = np.random.choice(np.arange(minSize), minSize).astype(int)
CANCER = np.array(ALL_CANCER)[indexes]
BENIN = np.array(ALL_BENIN)[indexes]
CANCER_AKAZE = np.array(cancer_image_descr)[indexes]
BENIN_AKAZE = np.array(benin_image_descr)[indexes]

In [None]:
(train_imgs_cancer, val_imgs_cancer,
 train_akaze_cancer, val_akaze_cancer,
 train_imgs_benin, val_imgs_benin,
 train_akaze_benin, val_akaze_benin) = sklearn.model_selection.train_test_split(
    CANCER, CANCER_AKAZE, BENIN, BENIN_AKAZE
)
display((len(train_imgs_cancer), len(val_imgs_cancer), len(train_akaze_cancer), len(val_akaze_cancer)))
display((len(train_imgs_benin), len(val_imgs_benin), len(train_akaze_benin), len(val_akaze_benin)))

In [None]:
x_train = np.vstack((train_akaze_cancer, train_akaze_benin))
y_train = np.vstack((np.zeros(len(train_akaze_cancer)), np.ones(len(train_akaze_benin)))).reshape(-1,)
x_train.shape

In [None]:
x_val = np.vstack((val_akaze_cancer, val_akaze_benin))
y_val = np.vstack((np.zeros(len(val_akaze_cancer)), np.ones(len(val_akaze_benin)))).reshape(-1,)
x_val.shape

## Fit using SVC

In [None]:
from sklearn.svm import SVC, LinearSVC, LinearSVR, NuSVC
clf = SVC(kernel='linear', random_state=0, tol=1e-5, C=0.5)

In [None]:
clf.fit(x_train, y_train)

In [None]:
result = clf.score(x_val, y_val)
result

## Display Images

In [None]:
from matplotlib.image import imread

nelem = 17   # number of elements to show

def show_image(img, frame_color, gray=True):
    plt.imshow(img)
#     if gray:
#         plt.imshow(im, cmap='gray')
#     else:
#         plt.imshow(im)
    h, w = img.shape[:2]
    plt.plot([0, 0, w, w, 0], [0, h, h, 0, 0], frame_color, linewidth = 2)
    plt.axis('off')

# reduce the margins
plt.subplots_adjust(wspace = 0, hspace = 0,
                    top = 0.99, bottom = 0.01, left = 0.01, right = 0.99)

plt.figure(figsize=(10,40))
no = 1  # index current of subfigure
for ii in range(nelem):
    plt.subplot(nelem, 2, 2*ii+1)
    val_img_i = val_imgs_cancer[ii]
    x_val_i = val_akaze_cancer[ii]
    y_pred_i = clf.predict(x_val_i.reshape(1,-1))
    expected = 0; classname = "CANCER"
    show_image(val_img_i.content, 'g' if y_pred_i == expected else 'r')
    plt.title(classname + " " + ("OK" if y_pred_i == expected else "ERR"))
    
    plt.subplot(nelem, 2, 2*ii+2)
    val_img_i = val_imgs_benin[ii]
    x_val_i = val_akaze_benin[ii]
    y_pred_i = clf.predict(x_val_i.reshape(1,-1))
    expected = 1; classname = "BENIN"
    show_image(val_img_i.content, 'g' if y_pred_i == expected else 'r')
    plt.title(classname + " " + ("OK" if y_pred_i == expected else "ERR"))

plt.show()

## Create output array of the results

In [None]:
results = []
for ii, (x, y) in enumerate(zip(x_val, y_val)):
    p = clf.predict(x.reshape(1,-1))
    results.append([p, p == y])
results