In [None]:
import numpy as np
import pandas
from tqdm import tqdm
from classes.loe import LoE
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, average_precision_score
#logging.basicConfig(level=logging.INFO)

import import_ipynb
import data_analysis

In [None]:
def results_summary_clf(ytrain_true, yval_true, ytest_true, ytrain_pred, yval_pred, ytest_pred):
    # for global model
    Summary_index = ['Training set', 'Validation set', 'Testing set']
    Summary_columns = ['Accuracy', 'F1_score', 'Precision_score']
    Summary_results = pandas.DataFrame(index=Summary_index, columns=Summary_columns)

    # performance of the global model
    Summary_results.loc[Summary_index[0], Summary_columns[0]] = round(accuracy_score(ytrain_true, ytrain_pred)*100, 2)
    Summary_results.loc[Summary_index[0], Summary_columns[1]] = round(f1_score(ytrain_true, ytrain_pred)*100, 2)
    Summary_results.loc[Summary_index[0], Summary_columns[2]] = round(average_precision_score(ytrain_true, ytrain_pred)*100, 2)
    
    Summary_results.loc[Summary_index[1], Summary_columns[0]] = round(accuracy_score(yval_true, yval_pred)*100, 2)
    Summary_results.loc[Summary_index[1], Summary_columns[1]] = round(f1_score(yval_true, yval_pred)*100, 2)
    Summary_results.loc[Summary_index[1], Summary_columns[2]] = round(average_precision_score(yval_true, yval_pred)*100, 2)
    
    Summary_results.loc[Summary_index[2], Summary_columns[0]] = round(accuracy_score(ytest_true, ytest_pred)*100, 2)
    Summary_results.loc[Summary_index[2], Summary_columns[1]] = round(f1_score(ytest_true, ytest_pred)*100, 2)
    Summary_results.loc[Summary_index[2], Summary_columns[2]] = round(average_precision_score(ytest_true, ytest_pred)*100, 2)
    
    return Summary_results

In [None]:
def C_validation_clf(X_train, X_val, y_train, y_val, n_experts):     
    n_samples, n_features = X_train.shape
    C_params, scores = np.arange(0.5, 10.0, 0.5) * n_samples, []

    for C in C_params:
        if n_experts==2:
            loe = LoE(pool_classifiers=[CustomSvm(C=C, random_state=40), CustomSvm(C=C, random_state=40)],
                      step_size=2, iterations=20, maximum_selected_features=None, step_callback=None, random_state=40)
        elif n_experts==3:
            loe = LoE(pool_classifiers=[CustomSvm(C=C, random_state=40), CustomSvm(C=C, random_state=40), CustomSvm(C=C, random_state=40)],
                      step_size=2, iterations=20, maximum_selected_features=None, step_callback=None, random_state=40)

        loe.fit(X=X_train, y=y_train)
        scores.append(accuracy_score(y_val, loe.predict(X_val)))
    return C_params[np.argmax(scores)]
                

In [None]:
class CustomSvm:
    def __init__(self, penalty='l2', loss='squared_hinge', dual='auto', tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000):
        self.penalty=penalty
        self.loss=loss
        self.dual=dual
        self.tol=tol
        self.C=C
        self.multi_class=multi_class
        self.fit_intercept=fit_intercept
        self.intercept_scaling=intercept_scaling
        self.class_weight=class_weight
        self.verbose=verbose
        self.random_state=random_state
        self.max_iter=max_iter
        
        self.Svm = LinearSVC(penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, multi_class=self.multi_class, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, class_weight=self.class_weight, verbose=self.verbose, random_state=self.random_state, max_iter=self.max_iter)
        self.unique_class_value = None
    
    def fit(self, X, y):
        self.unique_class_value = np.unique(y)
        if self.unique_class_value.size == 2:
            self.Svm.fit(X, y)
            
    def predict(self, X):
        return self.Svm.predict(X) if self.unique_class_value.size == 2 else self.unique_class_value[0] * np.ones(X.shape[0])

    def class_weight_(self):
        return self.Svm.class_weight_ if self.unique_class_value.size == 2 else np.ones(2)

    def get_params(self):
        return self.Svm.get_params()
        

In [None]:
def classifier_loe(data, target_name, train_size=0.7, n_experts=2, times=1):
    # uncouping X and y
    X, y = data_analysis.uncouping_x_y(data.copy(), target_name)
    
    for i in tqdm(np.arange(times), desc="For Random Data Split = "+str(times)+" â€¦", total=times, position=0):
        # split the dataset X into the training set X_train and temporary set X_temp
        X_train, X_temp, y_train, y_temp = train_test_split(X, y, train_size = train_size, stratify=y, random_state=i)
        # split the dataset X_temp into the validation set X_val and testing set X_test
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, train_size = 0.5, stratify=y_temp, random_state=0)
        X_train, X_val, X_test = data_analysis.reset_index_data(data_1=X_train, data_2=X_val, data_3=X_test, data_4=None)
        
        # data encoding (target encoding for category variables) and scaling (example : 'TargetEncoder', 'OrdinalEncoder', etc...)
        X_train_enc, X_val_enc, X_test_enc = data_analysis.data_processing(xtrain=X_train.copy(), ytrain=y_train.copy(), xtest_1=X_val.copy(), xtest_2=X_test.copy(), xtest_3=None, enc_type='OneHotEncoder', scale_type='Standardscaler')
        X_train_enc, X_val_enc, X_test_enc = X_train_enc.values.copy(), X_val_enc.values.copy(), X_test_enc.values.copy()

        if i == 0:
            print('*********************************************** The League of Experts ***********************************************')
            print(f'Training_set = {round((train_size * 100))}%, Validation_set = {round(((1 - train_size)/2) * 100)}%, Test_set = {round(((1 - train_size)/2) * 100)}%, , n_experts = {n_experts}, kernel = linear, class_weights = None, C_validation = True, times = {times}')

        C_param =  C_validation_clf(X_train_enc.copy(), X_val_enc.copy(), y_train.copy(), y_val.copy(), n_experts)
        if n_experts==2:
            loe = LoE(pool_classifiers=[CustomSvm(C=C_param, random_state=40), CustomSvm(C=C_param, random_state=40)],
                      step_size=2, iterations=20, maximum_selected_features=None, step_callback=None, random_state=40)
        elif n_experts==3:
            loe = LoE(pool_classifiers=[CustomSvm(C=C_param, random_state=40), CustomSvm(C=C_param, random_state=40), CustomSvm(C=C_param, random_state=40)],
                      step_size=2, iterations=20, maximum_selected_features=None, step_callback=None, random_state=40)
    
        loe.fit(X=X_train_enc, y=y_train)

        # predictions
        y_train_preds = loe.predict(X_train_enc)
        y_val_preds = loe.predict(X_val_enc)
        y_test_preds = loe.predict(X_test_enc)

        # get summary
        summary_random = results_summary_clf(y_train, y_val, y_test, y_train_preds, y_val_preds, y_test_preds)
        # get summary on data random_state
        summary_data_random = summary_random if i == 0 else summary_data_random + summary_random
    summary = (summary_data_random / times).astype('float64').round(2)
    return summary
