In [1]:
%matplotlib inline
import os

import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import KFold
import xgboost as xgb
import numpy as np
import cv2

from common import Metrics, get_filenames
from common import DATA_PATH, X_IMG_PATH, Y_IMG_PATH

In [2]:
THRESHOLD_LABELS_POS = .5
THRESHOLD_PRED_POS = 150
MASK_COLOR_RGB = (60, 255, 0)

In [3]:
def load_img(path):
    """ Loads and returns image in RGB color model """
    img = cv2.imread(path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img_rgb

In [4]:
def process_train(path):
    """ Perform train image processing
    
    Removes background, performs some preprocessing operations
    and applies Canny edge detector.
    """
    img = load_img(path)
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    kernel3 = np.ones((3, 3), np.uint8)
    img_gray = cv2.filter2D(img_gray, -1, kernel3 / 9)
    
    return img_gray

def process_label(path):
    """ Process label image
    
    Loads image and applies threshold on uncertain pixels.
    """
    img = load_img(path)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    mask = img_gray > 100
    return mask

In [5]:
filenames = os.listdir(X_IMG_PATH)
images_orig = [load_img(X_IMG_PATH + fname) for fname in filenames]

In [6]:
images_prep = [process_train(X_IMG_PATH + x) for x in filenames]
labels_prep = [process_label(Y_IMG_PATH + x) for x in filenames]

In [27]:
def process_sample(X_sample, y_sample=None):
    var_colors = [np.var(X_sample)]
    moments = [*cv2.moments(X_sample).values()]
    hu_moments = list(cv2.HuMoments(moments).flatten())
    X_features = np.array(var_colors) # moments + hu_moments)
    
    if y_sample is not None:
        centre = np.array(y_sample.shape) // 2
        y_bin = y_sample[centre[0], centre[1]]
    else:
        y_bin = None
    
    return X_features, np.array([y_bin])

In [8]:
def create_samples(X, y, n_samples, size=5):
    ''' Create n_samples samples on given image with corresponding labeled image
    Sample sizes are by default of size 5x5
    '''
    
    n_lim, m_lim = np.array(X.shape) - size
    X_samples, y_samples = None, None
    
    for i in range(n_samples):
        n = np.random.randint(0, n_lim)
        m = np.random.randint(0, m_lim)
        X_sample = X[n:n+size, m:m+size]
        y_sample = y[n:n+size, m:m+size]
        
        X_features, y_bin = process_sample(X_sample, y_sample)
        
        if X_samples is None:
            X_samples, y_samples = X_features, y_bin
        else:
            X_samples = np.vstack([X_samples, X_features])
            y_samples = np.vstack([y_samples, y_bin])
        
    return X_samples, y_samples

In [35]:
def classify_img(X_test, clf, chunk=5):
    cnt = 0
    samples = []
    y_pred = np.zeros_like(X_test)
    
    for i in range(chunk, X_test.shape[0]-chunk):
        for j in range(chunk, X_test.shape[1]-chunk):
            X_chunk = process_sample(X_test[i:i-chunk//2, j:j+chunk//2])[0]
            X_chunk = np.expand_dims(X_chunk, axis=1)
            y_pred[i,j] = clf.predict(X_chunk)

In [31]:
kfold = KFold(n_splits=3)

for train_index, test_index in kfold.split(images_prep, labels_prep):
    SAMPLES_PER_IMG = 100
    train_data = [create_samples(images_prep[x], labels_prep[x], SAMPLES_PER_IMG) for x in train_index]
    test_data = [create_samples(images_prep[x], labels_prep[x], SAMPLES_PER_IMG) for x in test_index]
    
    def merge(data):
        X_ret, y_ret = None, None
        for x, y in data:
            if X_ret is None:
                X_ret = x
                y_ret = y
            else:
                X_ret = np.vstack([X_ret, x])
                y_ret = np.vstack([y_ret, y])
                
        return X_ret, y_ret.ravel()
                
    X_train, y_train = merge(train_data)
    X_test, y_test = merge(test_data)
    
    clf = xgb.XGBClassifier()
    clf.fit(X_train, y_train)
    print(clf.score(X_test, y_test))
    
    metrics = Metrics()
    metrics.calculate()

0.9242857142857143


TypeError: calculate() missing 2 required positional arguments: 'y_true' and 'y_pred'

In [36]:
classify_img(images_prep[0], clf)

In [None]:
plt.imshow(process_train(X_IMG_PATH + filenames[0]))