In [1]:
from pathlib import Path
import numpy as np 
import xml.etree.ElementTree as ET
from sklearn.svm import SVC
from skimage.io import imread
from skimage import img_as_float
from skimage.transform import resize
# from concurrent.futures import ThreadPoolExecutor as PoolExecutor
# from concurrent.futures import as_completed
from joblib import Parallel, delayed
from time import time

from pillclassification.feature_extraction import feature_extraction
from pillclassification.functions import crop_center

images_dir = Path('utils/Dataset/merge')
filenames = [x for x in images_dir.iterdir() if x.suffix != '.xml']

samples_num = len(filenames)
feature_number = 10

In [2]:
# calculating labels 
try:
    tree = ET.parse(images_dir / 'images.xml')
except ET.ParseError:
    print('Parse error on {}'.format(images_dir / 'images.xml'))
    exit(-1)

se = list(tree.getroot())[0]

labels_set = set()
for e in list(se):
    labels_set.add(e.find('NDC9').text)

labels = sorted(list(labels_set))
class_num = len(labels)


In [3]:
# extracting features
x_data = np.zeros((samples_num, feature_number))
y_data = np.zeros(samples_num)

def extract_features(f):
    # loading the image 
    try:
        img = imread(f)
    except ValueError as e:
        return None
    
    if img.shape[-1] == 4:
        img = img[:,:,:3]
    
    # cropping in the center
    img = crop_center(img, crop_scale=0.65)

    # rescaling with fixed width
    width = 600
    img = resize(img, (int(img.shape[0] * (width / img.shape[1])), width), anti_aliasing=True)

    # the img must be in float format 
    img = img_as_float(img)

    # feature extraction 
    hu, rgb_val = feature_extraction(img)

    for e in list(se):
        if e.find('File').find('Name').text == f.name:
            label = labels.index(e.find('NDC9').text)
        else:
            label = -1
    
    return np.append(hu, rgb_val), label

def test(f):
    return f

with Parallel(n_jobs=10) as parallel:
    s = time()
    for i, r in enumerate(parallel(delayed(extract_features)(f) for f in filenames[:10])):
        if r is not None:
            print('data from image', i)
            x_data[i, :], y_data[i] = r[0], r[1]
        else:
            print('no data for image', i)
    print(time() - s)

# printing and saving the features as npy file
print(x_data)
print(y_data)
# np.save('x_data_saved', x_data)
# np.save('y_data_saved', y_data)

ValueError: attempt to get argmin of an empty sequence

In [None]:
# loading features if already extracted
x_data = np.load('x_data_saved.npy')
y_data = np.load('y_data_saved.npy')

print(x_data.shape)
print(y_data.shape)

print(y_data)

In [None]:
# SVC training
print(y_data)
kernel = 'linear'
max_iteration = 10

models = []
for i in range(class_num):
    models.append(SVC(kernel=kernel, max_iter=max_iteration, probability=True))

for i in range(len(labels)):
    models[i].fit(x_data, y_data == i) # training 
