In [3]:
import os
from os.path import isfile, join
import pandas as pd
import numpy as np
from prep_image import prep_im_and_mask
import matplotlib.pyplot as plt

from asymmetry import mean_asymmetry
from color import color_var
from compactness import compactness_score
from convexity import convexity_score

## Extract features

In [2]:
def extract_features(im, im_mask):
    
    # Assymmetry
    asymmetry = mean_asymmetry(im_mask)
    
    # Color
    col_r, col_g, col_b = color_var(im, im_mask)
    
    # Compactness
    compactness = compactness_score(im_mask)
    
    # Convexity
    convexity = convexity_score(im_mask)
    
    return np.array([asymmetry, col_r, col_g, col_b, compactness, convexity], dtype=np.float16)

## Process Images

In [7]:
file_data = 'metadata_withmasks.csv'
image_folder = 'test_images' + os.sep
mask_folder = 'test_images_masks' + os.sep
file_features = 'feature_data.csv'

In [4]:
# Extract image ids and labels
df = pd.read_csv(file_data)

# Remove images without masks
df_mask = df['mask'] == 1
df = df.loc[df_mask]

labels = list(df['diagnostic'])

In [5]:
# Extract feature
feature_names = ['assymmetry', 'color_r', 'color_g', 'color_b', 'compactness', 'convexity']
features_n = len(feature_names)
features = np.zeros(shape = [len(df), features_n], dtype = np.float16)

In [1]:
# Extract features
images = []
for i, id in enumerate(list(df['img_id'])):
    
    im, mask = prep_im_and_mask(id, image_folder, mask_folder)
    images.append(im)

    # Extract features
    x = extract_features(im, mask)
    features[i,:] = x

NameError: name 'df' is not defined

In [None]:
list(df["img_id"])


In [None]:
# Save image_ids and features in a file
df_features = pd.DataFrame(features, columns = feature_names)
df_features.to_csv(file_features, index = False)

## Train classifier

In [None]:
import os
import pandas as pd
import numpy as np

# Default packages for the minimum example
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GroupKFold, train_test_split
from sklearn.metrics import accuracy_score #example for measuring performance


import pickle #for saving/loading trained classifiers

In [None]:
# Train-test split
#X_train, X_test, y_train, y_test = train_test_split(features, y, test_size= 0.2, random_state = 1, stratify = y)

folds = 5
group_kfold = GroupKFold(n_splits=folds)
group_kfold.get_n_splits(x, , patient_id)

In [None]:
clf = KNeighborsClassifier(n_neighbors = 10)

# Train it --> need to define y first
clf.fit(X_train_transformed, y_train)

# Predict on validation dataset and measure accuracy, f1-score
clf.predict(X_test_transformed);

print(clf.score(X_test_transformed, y_test))

## Evaluate classifier

In [None]:
import pickle #for loading your trained classifier

from extract_features import extract_features #our feature extraction

# The function that should classify new images. 
# The image and mask are the same size, and are already loaded using plt.imread
def classify(img, mask):
    
    
     #Resize the image etc, if you did that during training
    
     #Extract features (the same ones that you used for training)
     x = extract_features(img, mask)
         
     
     #Load the trained classifier
     classifier = pickle.load(open('groupXY_classifier.sav', 'rb'))
    
    
     #Use it on this example to predict the label AND posterior probability
     pred_label = classifier.predict(x)
     pred_prob = classifier.predict_proba(x)
     
     
     #print('predicted label is ', pred_label)
     #print('predicted probability is ', pred_prob)
     return pred_label, pred_prob

In [None]:
# Call in a loop