In [None]:
import math, pickle
from operator import itemgetter
from sklearn.svm import SVC

# Useful functions

In [1]:
# *****************************************************************************
def save_obj(obj, name, path):
    """

    Parameters
    ----------
    obj : object
        the object that will be saved.
    name : string
        the name of the saved file (pkl file).
    path : string
        the full path where obj will be saved.

    Returns
    -------
    None.

    """
    with open(path + name + '.pkl', 'wb') as f:
        pickle.dump(obj, f)
    
# *****************************************************************************
def load_obj(name, path):
    """

    Parameters
    ----------
    name : string
        the named of the pkl file that will be loaded.
    path : string
        the full path of the intended file.

    Returns
    -------
    type of the object within the file
        the object in the specified file.

    """
    with open(path + name + '.pkl', 'rb') as f:
        return pickle.load(f)

# *****************************************************************************
def construct_objects(objs, objects):
    features = []
    for obj in objects:
        count = 0
        for o in objs:
            if obj == o:
                count += 1
        features.append(count)
    return features

# *****************************************************************************
def scan_objects(category_examples):
    objects = set([])
    for category in category_examples:
        for instances in category_examples[category]:
            for obj in instances:
                objects.add(obj)
            
    return list(objects)

   
# ***************************************************************************** 
def load_dataset(dataset_name, learning_rate = 80, source_path = "/content/"):
    '''

    Parameters
    ----------
    dataset_name : string
        # names : LabelMe /|\ MITIndoor /|\  Sun397 /|\ Sun900

    source_path : string, optional
        specify the full path where the pkl file "labelme_ce.pkl" exist. The default is None.
    learning_rate : float, optional
        the learning rate. The default is 100.

    Returns
    -------
    learning_instances : list
        a list of instances having the format [([o1,o2,...],c), ([o3,o4,...], c), ...].
    test_instances : list
        a list of instances having the format [([o1,o2,...],c), ([o3,o4,...], c), ...].

    '''
    category_examples = load_obj(dataset_name + "_ce", source_path)
    objects = load_obj(dataset_name + "_o", source_path)
    
    I_l = []
    O_l = []
    I_t = []
    O_t = []
    
    nb_category = 0
    for category in category_examples:
        count = 1
        point_of_division = len(category_examples[category]) * learning_rate/100
        for objs in category_examples[category]:
            constructed_objects = construct_objects(objs, objects)
                        
            if count <= point_of_division :
                
                I_l.append(constructed_objects)
                O_l.append(category)
            else:
                I_t.append(constructed_objects)                
                O_t.append(category)
        
            count += 1
            
        nb_category += 1
    
    return (I_l, O_l, I_t, O_t)

# *****************************************************************************
def getAccuracyRate(classifier, X_test, y_test):
    """
    Get accuracy rate of a classifier on test data.

    Parameters:
        classifier (_Classifier): The classification model.
        X_test (list of [list of int]): List of test instances. Each instance is a list of features.
        y_test (list of int): List of the classes corresponding to each test's instance.

    Returns:
        accuracyRate (float): Accuracy rate of the "classifier".
    """

    cptCorrectPredict = 0

    for i in range(len(X_test)):
        outValue = classifier.predict([X_test[i]])[0] # Output of the classifier on current instance ("int" value)

        if outValue == y_test[i]: # If "output" equals "target" in the correct test instance: increment "correct predictions"
            cptCorrectPredict += 1

    return cptCorrectPredict / len(X_test) # Accuracy rate

# *****************************************************************************
def learning_algorithm(dataset_name, learning_rate = 80, nb_max_of_objects_to_keep1 = 10, nb_max_of_objects_to_keep2 = 10, source_path = "/content/"):
    I_l, O_l, I_t, O_t  = load_dataset(dataset_name, learning_rate = learning_rate, source_path = source_path)
    
     # prepare prerequisites
    print("prepare prerequisites :")
    prepare_prerequisites(dataset_name, learning_rate = learning_rate, source_path = source_path)
    
    # 
    objects = load_obj(dataset_name + "_o", source_path)
    NO = load_obj("NO_" + dataset_name, source_path)
    FO = load_obj("FO_" + dataset_name, source_path)
    W = load_obj("W_" + dataset_name, source_path)
    
    I_l1 = []
    I_l2 = []
    for l in range(0, len(I_l)):
        index_value1 = []
        index_value2 = []
        test1 = []
        test2 = []
        for i in range(0, len(I_l[l])):
            try:
                value1 = W[objects[i]] * I_l[l][i]
            except:
                value1 = 0

            try:
                value2 = W[objects[i]] * I_l[l][i]/(1 + FO[O_l[l]][objects[i]] * math.log(NO[O_l[l]][objects[i]] + 2, 2))
            except:
                value2 = 0

            test1.append(value1)
            test2.append(value2)

            if value1 != 0:                
                index_value1.append((i, value1))
                index_value2.append((i, value2))


        index_value1 = sorted(index_value1, key=itemgetter(1), reverse=True)[:nb_max_of_objects_to_keep1]
        index_value2 = sorted(index_value2, key=itemgetter(1), reverse=True)[:nb_max_of_objects_to_keep2]
        for i in range(0, len(index_value1)):
            test1[index_value1[i][0]] = index_value1[i][1]
        for i in range(0, len(index_value2)):
            test2[index_value2[i][0]] = index_value2[i][1]
        
        I_l1.append(test1)
        I_l2.append(test2)
    
    classifiers = []
    print("Classifier 1 :") # ------ SVM ------
    
    # best_classifier = None
    # best_accuracy = 0
    # for kernel in ["linear"]: # 200, 300
    #     for C in [0.5, 0.8, 1]:
    #         for gamma in ["scale"]:
    #             classifier = SVC(C = C, kernel = kernel, gamma=gamma).fit(I_l, O_l)
    #             accuracy = getAccuracyRate(classifier, I_t, O_t)
    #             if accuracy > best_accuracy:
    #                 best_accuracy = accuracy
    #                 best_classifier = classifier
    # for kernel in ["rbf"]: # 200, 300
    #     for C in [200, 300, 400]:
    #         for gamma in ["auto"]:
    #             classifier = SVC(C = C, kernel = kernel, gamma=gamma).fit(I_l, O_l)
    #             accuracy = getAccuracyRate(classifier, I_t, O_t)
    #             if accuracy > best_accuracy:
    #                 best_accuracy = accuracy
    #                 best_classifier = classifier
    
    best_classifier = SVC(C = 0.5, kernel = "linear", gamma="scale").fit(I_l, O_l)
    best_accuracy = getAccuracyRate(best_classifier, I_t, O_t)

    print(best_accuracy)
    classifiers.append(best_classifier)
    save_obj([best_classifier, best_accuracy], "classifier1_" + dataset_name + "_" + str(learning_rate), source_path)

    print("Classifier 2 :") # ------ SVM ------

    # best_classifier = None
    # best_accuracy = 0
    # for kernel in ["linear"]: # 200, 300
    #     for C in [0.5, 0.8, 1]:
    #         for gamma in ["scale"]:
    #             classifier = SVC(C = C, kernel = kernel, gamma=gamma).fit(I_l1, O_l)
    #             accuracy = getAccuracyRate(classifier, I_l1, O_l)
    #             if accuracy > best_accuracy:
    #                 best_accuracy = accuracy
    #                 best_classifier = classifier
    # for kernel in ["rbf"]: # 200, 300
    #     for C in [200, 300, 400]:
    #         for gamma in ["auto"]:
    #             classifier = SVC(C = C, kernel = kernel, gamma=gamma).fit(I_l1, O_l)
    #             accuracy = getAccuracyRate(classifier, I_l1, O_l)
    #             if accuracy > best_accuracy:
    #                 best_accuracy = accuracy
    #                 best_classifier = classifier
    
    # print(best_accuracy)
    # classifiers.append(best_classifier)


    best_classifier = SVC(C = 0.5, kernel = "linear", gamma="scale").fit(I_l1, O_l)
    best_accuracy = getAccuracyRate(best_classifier, I_l1, O_l)

    print(best_accuracy)
    classifiers.append(best_classifier)
    save_obj([best_classifier, best_accuracy], "classifier2_" + dataset_name + "_" + str(nb_max_of_objects_to_keep1) + "_" + str(learning_rate), source_path)

    print("Classifier 3 :") # ------ SVM ------
    
    # best_classifier = None
    # best_accuracy = 0
    # for kernel in ["linear"]: # 200, 300
    #     for C in [0.5, 0.8, 1]:
    #         for gamma in ["scale"]:
    #             classifier = SVC(C = C, kernel = kernel, gamma=gamma).fit(I_l2, O_l)
    #             accuracy = getAccuracyRate(classifier, I_l2, O_l)
    #             if accuracy > best_accuracy:
    #                 best_accuracy = accuracy
    #                 best_classifier = classifier
    # for kernel in ["rbf"]: # 200, 300
    #     for C in [200, 300, 400]:
    #         for gamma in ["auto"]:
    #             classifier = SVC(C = C, kernel = kernel, gamma=gamma).fit(I_l2, O_l)
    #             accuracy = getAccuracyRate(classifier, I_l2, O_l)
    #             if accuracy > best_accuracy:
    #                 best_accuracy = accuracy
    #                 best_classifier = classifier
    
    # print(best_accuracy)
    # classifiers.append(best_classifier)


    best_classifier = SVC(C = 0.5, kernel = "linear", gamma="scale").fit(I_l2, O_l)
    best_accuracy = getAccuracyRate(best_classifier, I_l2, O_l)

    print(best_accuracy)
    classifiers.append(best_classifier)
    save_obj([best_classifier, best_accuracy], "classifier3_" + dataset_name + "_" + str(nb_max_of_objects_to_keep2) + "_" + str(learning_rate), source_path)
    
    return classifiers

# *****************************************************************************
def testing_algorithm(dataset_name, classifiers, learning_rate = 80, nb_max_of_objects_to_keep1 = 10, nb_max_of_objects_to_keep2 = 10, source_path = "/content/"):
    
    _, _, I_t, O_t  = load_dataset(dataset_name, learning_rate = learning_rate, source_path = source_path)
    
    # 
    objects = load_obj(dataset_name + "_o", source_path)
    NO = load_obj("NO_" + dataset_name, source_path)
    FO = load_obj("FO_" + dataset_name, source_path)
    dataset_categories = load_obj("categories_" + dataset_name, source_path)
    W = load_obj("W_" + dataset_name, source_path)
    
    cptCorrectPredict = 0
    
    for l in range(0, len(I_t)):
        
        categories = []
        
        # classifier 1
        outValue_without_normalization1 = classifiers[0].predict([I_t[l]])[0]

        # classifier 2
        for category in dataset_categories:
            index_value = []
            test = []
            not_null_objects = 0
            for i in range(0, len(I_t[l])):
                try:
                    value = W[objects[i]] * I_t[l][i]
                except:
                    value = 0
                
                test.append(value)

                if value != 0:
                    index_value.append((i, value))

            
            index_value = sorted(index_value, key=itemgetter(1), reverse=True)[:nb_max_of_objects_to_keep1]
            
            for i in range(0, len(index_value)):
                test[index_value[i][0]] = index_value[i][1]
            
            outValue_with_normalization1 = classifiers[1].predict([test])[0]

            if outValue_with_normalization1 == category:
                categories.append(category)
        
        if len(categories) == 1:
            if categories[0] == O_t[l]:
                cptCorrectPredict += 1
                continue
        categories = []
        
        # classifier 3
        for category in dataset_categories:
            index_value = []
            test = []
            not_null_objects = 0
            for i in range(0, len(I_t[l])):
                try:
                    value = W[objects[i]] * I_t[l][i]/(1 + FO[category][objects[i]] * math.log(NO[category][objects[i]] + 2, 2))
                except:
                    value = 0
                
                test.append(value)

                if value != 0:
                    index_value.append((i, value))

            
            index_value = sorted(index_value, key=itemgetter(1), reverse=True)[:nb_max_of_objects_to_keep2]
            
            for i in range(0, len(index_value)):
                test[index_value[i][0]] = index_value[i][1]
            
            
            outValue_with_normalization2 = classifiers[2].predict([test])[0]
            
            
            
            if outValue_with_normalization2 == category:
                categories.append(category)
                                                                               
        
        print(l, " => ", not_null_objects)        
        
        if len(categories) == 1:
            if categories[0] == O_t[l]:
                cptCorrectPredict += 1
                continue
        
        

        if outValue_without_normalization1 == O_t[l]:
            cptCorrectPredict += 1
            

        
        
            
            
        
    print(cptCorrectPredict / len(I_t)) # Accuracy rate
    


# *****************************************************************************
def prepare_prerequisites(dataset_name, learning_rate = 80, source_path = "/content/"):
    '''

    Parameters
    ----------
    dataset_name : string
        DESCRIPTION.
    Max_value : TYPE
        DESCRIPTION.
    source_path : TYPE
        DESCRIPTION.

    Returns
    -------
    None.

    '''
    # **************************************************
    objects = load_obj(dataset_name + "_o", source_path)
    I_l, O_l, _, _  = load_dataset(dataset_name, learning_rate = learning_rate, source_path = source_path)
    
    # **************************************************
    category_examples = {}
    for i in range(0, len(O_l)):
        try:
            category_examples[O_l[i]].append(I_l[i])
        except:
            category_examples[O_l[i]] = [I_l[i]]
        
    # **************************************************
    # generate two dictionaries emission_probabilities = { (SCi,Oi):value, ..}
    # and weights = { Oi:value }
    NO = {}
    FO = {}
    
    for category in category_examples:
        NO[category] = {}
        FO[category] = {}
        
        for instance in category_examples[category]:
            for i in range(0, len(instance)):
                if instance[i] > 0:
                    try:
                        NO[category][objects[i]] = NO[category][objects[i]] + 1
                        FO[category][objects[i]] = FO[category][objects[i]] + instance[i]

                    except:
                        NO[category][objects[i]] = 1
                        FO[category][objects[i]] = instance[i]
                        
    # **************************************************
    NOall = {}
    FOall = {}
    
    for obj in objects:
        NOall[obj] = 0
        FOall[obj] = 0
    
    for category in NO:
        for obj in NO[category]:
            NOall[obj] = NOall[obj] + NO[category][obj]
            FOall[obj] = FOall[obj] + FO[category][obj]
                            
    # **************************************************
    emission_probabilities = {}
    
    for category in NO:
        emission_probabilities[category] = {}
        for obj in NO[category]:
            emission_probabilities[category][obj] = FO[category][obj] * math.log(NO[category][obj] + 1, 10) - math.log(NOall[obj] * FOall[obj], 10)
    
    # **************************************************
    W = {}
    
    for obj in objects:
        W[obj] = 0
        if NOall[obj] != 0:
            # -----------------------------
            max_NO = 0
            max_FO = 0
            for category in NO:
                try:
                    if NO[category][obj] > max_NO:
                        max_NO = NO[category][obj]
                    if FO[category][obj] > max_FO:
                        max_FO = FO[category][obj]
                except:
                    continue
            
            W[obj] = max_FO * math.log(max_NO + 1, 10) - math.log((NOall[obj] + 1) * FOall[obj], 10)
    

    # **************************************************
    save_obj(NO, "NO_" + dataset_name, source_path)
    save_obj(FO, "FO_" + dataset_name, source_path)
    save_obj(list(category_examples.keys()), "categories_" + dataset_name, source_path)
    save_obj(W, "W_" + dataset_name, source_path)

ModuleNotFoundError: No module named 'tensorflow'

# Run

In [None]:
# *****************************************************************************
# names : LabelMe /|\ MITIndoor /|\ Sun397 /|\ Sun900
source_path = "/content/"
dataset_name = "Sun397"

# params
learning_rate = 80
nb_max_of_objects_to_keep1 = 10
nb_max_of_objects_to_keep2 = 10


print("learn :")
# classifiers = load_obj(dataset_name + "_classifiers_" + str(learning_rate) + "%", source_path)
classifiers = learning_algorithm(dataset_name, learning_rate = learning_rate, nb_max_of_objects_to_keep1 = nb_max_of_objects_to_keep1, nb_max_of_objects_to_keep2 = nb_max_of_objects_to_keep2, source_path = source_path)
save_obj(classifiers, dataset_name + "_classifiers_" + str(learning_rate) + "%", source_path)
print("test :")
testing_algorithm(dataset_name, classifiers, learning_rate = learning_rate, nb_max_of_objects_to_keep1 = nb_max_of_objects_to_keep1, nb_max_of_objects_to_keep2 = nb_max_of_objects_to_keep2, source_path = source_path)