<a href="https://colab.research.google.com/github/stikice/features/blob/main/Feature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!python --version

In [None]:
import os
import csv

In [None]:
from __future__ import division
from tqdm import tqdm
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeRegressor
# Import datasets, classifiers and performance metrics
from sklearn import svm
#from sklearn.metrics import compare

from math import ceil
import util
from threading import Thread

In [None]:
IMG_SIZE = 228
Percentage_TESTED = 10
#DATA_FILE = 'all_image_features.csv'
#DATA_FILE = 'all_image_features_norm.csv'
##DATA_FILE = 'top-1-few-features.csv'
DATA_FILE = 'all_new_features_hier_norm.csv'
PATH_TO_FILES = '/images/val/images'

In [None]:
def cv_training_data (amount_images):
    """
    This functions returns the data that will be used for training and test different machine learning models.
    This information is collected from the file DATA_FILE.
    Return:
        data: Data for training the models
        data_result: data for validation
    """

    data = []
    first_level = []
    second_level = []
    third_level = []

    # Getting the images for training and testing
    row_count = 0
    with open(DATA_FILE, 'rb') as csvfile:
        lines = [line.decode('utf-8-sig') for line in csvfile]

        for row in csv.reader(lines):
            # Remove the headers of csv file
            if row_count is 0:
                row_count = row_count + 1
                continue

            data.append(row[-7:])
            first_level.append((row[0],row[1]))
            second_level.append((row[0],row[2]))
            third_level.append((row[0],row[3]))
            row_count = row_count + 1
            if row_count > amount_images:
                break
                
    return  data, first_level, second_level, third_level

In [None]:
cv_training_data (3)

In [None]:
def CV_fold_worker(test_idx, train_idx, img_data, first_level, second_level, third_level, first_level_machine, second_level_machine, third_level_machine, return_wrapper):
    """
    Worker function for each fold in CV. Trains a model with training data, tests with
    test_idx. Places the results as (image, prediction) tuples in return wrapper
    Args:
        test_idx: List if indexes where the test_data is
        train_idx: List if indexes where the train_data is
        img_data: all of the image data
        first_level: The names of the classes, respective to model return
        return_wrapper: The list to add all results
    """
    # Create a validation set which is 10% of the training_data
    X_train, _ = util.list_split(img_data, train_idx, [0])

    Y_train, _ = util.list_split(img_data, test_idx, [0])
    Y_test_first_level, _ = util.list_split(first_level, test_idx, [0])
    Y_test_second_level, _ = util.list_split(second_level, test_idx, [0])
    Y_test_third_level, _ = util.list_split(third_level, test_idx, [0])

    X_test_first_level, _ = util.list_split(first_level, train_idx, [0])
    X_test_second_level, _ = util.list_split(second_level, train_idx, [0])
    X_test_third_level, _ = util.list_split(third_level, train_idx, [0])

    X_val_first_level = [X_test_first_level[i][1] for i in range(0,len(X_test_first_level))]
    Y_val_first_level = [Y_test_first_level[i][1] for i in range(0,len(Y_test_first_level))]

    X_val_second_level = [X_test_second_level[i][1] for i in range(0,len(X_test_second_level))]
    Y_val_second_level = [Y_test_second_level[i][1] for i in range(0,len(Y_test_second_level))]

    X_val_third_level = [X_test_third_level[i][1] for i in range(0,len(X_test_third_level))]
    Y_val_third_level = [Y_test_third_level[i][1] for i in range(0,len(Y_test_third_level))]

    list_predictions = []
    Y_train_second_level = []
    Y_train_second_level_position = []
    Y_train_third_level = []
    Y_train_third_level_position = []

    ##################################################################################################################
    # First Level of hierarchy [Mobilnet_v1]
    ##################################################################################################################
    if first_level_machine == 'nn':
        predicted = nearest_neighbour(X_train, X_val_first_level, Y_train)
    elif first_level_machine == 'dt16':
        predicted_level_2, predicted_level_5, predicted_level_8, predicted_level_12, predicted_level_16 = decision_tree(X_train, X_val_first_level, Y_train)
        predicted = predicted_level_16
    elif first_level_machine == 'vc':
        predicted = vecto_classifier(X_train, X_val_first_level, Y_train)
    
    for position, prediction in enumerate(predicted):
        if first_level_machine == 'dt16':
            if prediction > 0.5:
                if Y_test_first_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[position][0], 1, prediction, 1, 'tf-mobilenet_v1'))
                else:
                    list_predictions.append((Y_test_first_level[position][0], 0, prediction, 1, 'tf-mobilenet_v1'))
            else:
                Y_train_second_level.append(Y_train[position])
                Y_train_second_level_position.append(position)
        else:
            if prediction == '1':
                if Y_test_first_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[position][0], 1, prediction, 1, 'tf-mobilenet_v1'))
                else:
                    list_predictions.append((Y_test_first_level[position][0], 0, prediction, 1, 'tf-mobilenet_v1'))
            else:
                Y_train_second_level.append(Y_train[position])
                Y_train_second_level_position.append(position)

    # Not necessary to go to the next level
    if len(Y_train_second_level) == 0:
        return_wrapper.append(list_predictions)
        return

    ##################################################################################################################
    # Second Level of hierarchy [Inception_v4]
    ##################################################################################################################
    #predicted = nearest_neighbour(X_train, X_val_second_level, Y_train_second_level)
    #predicted_level_2, predicted_level_5, predicted_level_8, predicted_level_12, predicted_level_16 = decision_tree(X_train, X_val, Y_train)
    #predicted = predicted_level_16
    #predicted = vecto_classifier(X_train, X_val, Y_train)

    if second_level_machine == 'nn':
        predicted = nearest_neighbour(X_train, X_val_second_level, Y_train_second_level)
    elif second_level_machine == 'dt16':
        predicted_level_2, predicted_level_5, predicted_level_8, predicted_level_12, predicted_level_16 = decision_tree(X_train, X_val_second_level, Y_train_second_level)
        predicted = predicted_level_16
    elif second_level_machine == 'vc':
        predicted = vecto_classifier(X_train, X_val_second_level, Y_train_second_level)

    for position, prediction in enumerate(predicted):
        if second_level_machine == 'dt16':
            if prediction > 0.5:
                if Y_test_second_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[Y_train_second_level_position[position]][0], 2, prediction, 2, 'tf-inception_v4'))
                else:
                    list_predictions.append((Y_test_first_level[Y_train_second_level_position[position]][0], 0, prediction, 2, 'tf-inception_v4'))
            else:
                Y_train_third_level.append(Y_train_second_level[position])
                Y_train_third_level_position.append(Y_train_second_level_position[position])
        else:
            if prediction == '1':
                if Y_test_second_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[Y_train_second_level_position[position]][0], 2, prediction, 2, 'tf-inception_v4'))
                else:
                    list_predictions.append((Y_test_first_level[Y_train_second_level_position[position]][0], 0, prediction, 2, 'tf-inception_v4'))
            else:
                Y_train_third_level.append(Y_train_second_level[position])
                Y_train_third_level_position.append(Y_train_second_level_position[position])

    if len(Y_train_third_level) == 0:
        return_wrapper.append(list_predictions)
        return

    ##################################################################################################################
    # Third Level of hierarchy [Resnet_v1_152]
    ##################################################################################################################
    #predicted = nearest_neighbour(X_train, X_val_third_level, Y_train_third_level)
    #predicted_level_2, predicted_level_5, predicted_level_8, predicted_level_12, predicted_level_16 = decision_tree(X_train, X_val_third_level, Y_train_third_level)
    #predicted = predicted_level_16
    #predicted = vecto_classifier(X_train, X_val, Y_train)

    if third_level_machine == 'nn':
        predicted = nearest_neighbour(X_train, X_val_third_level, Y_train_third_level)
    elif third_level_machine == 'dt16':
        predicted_level_2, predicted_level_5, predicted_level_8, predicted_level_12, predicted_level_16 = decision_tree(X_train, X_val_third_level, Y_train_third_level)
        predicted = predicted_level_16
    elif third_level_machine == 'vc':
        predicted = vecto_classifier(X_train, X_val_third_level, Y_train_third_level)

    for position, prediction in enumerate(predicted):
        if third_level_machine == 'dt16':
            if prediction > 0.5:
                if Y_test_third_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 3, prediction, 3, 'tf-resnet_v1_152'))
                else:
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 0, prediction, 3, 'tf-resnet_v1_152'))
            else:
                if Y_test_third_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 3, prediction, 0, 'failed'))
                else:
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 0, prediction, 0, 'failed'))
        else:
            if prediction == '1':
                if Y_test_third_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 3, prediction, 3, 'tf-resnet_v1_152'))
                else:
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 0, prediction, 3, 'tf-resnet_v1_152'))
            else:
                if Y_test_third_level[position][1] == '1':
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 3, prediction, 0, 'failed'))
                else:
                    list_predictions.append((Y_test_first_level[Y_train_third_level_position[position]][0], 0, prediction, 0, 'failed'))


    return_wrapper.append(list_predictions)