### Libraries

In [None]:
import time
import joblib
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from scipy.stats import ks_2samp
from IPython.display import Image

import shap
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RepeatedStratifiedKFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler
from sklearn.tree import export_graphviz

from imblearn.over_sampling import SMOTE, ADASYN

import xgboost as xgb
from plot_learning import *
from sklearn.svm import SVC
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.model_selection import GridSearchCV, learning_curve, RandomizedSearchCV
from sklearn.metrics  import average_precision_score, make_scorer, roc_curve,f1_score, precision_score, recall_score, fbeta_score, auc, roc_auc_score, accuracy_score, confusion_matrix, classification_report,precision_recall_curve
# from skopt import BayesSearchCV
# from skopt.space import Real, Categorical, Integer

import skfuzzy as fuzz
from skfuzzy import control as ctrl

warnings.filterwarnings('ignore')

### Read 

In [None]:
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

X_train = pd.read_csv("X_train.csv", sep = ";")
X_test = pd.read_csv("X_test.csv", sep = ";")

y_train = pd.read_csv("y_train.csv", sep = ";")
y_test = pd.read_csv("y_test.csv", sep = ";")

X_train.head()

In [None]:
## removendo variáveis 
X_train = X_train.drop([
'H1RFV', 'H1LFV', 'H1RRO'
], axis=1)

X_test = X_test.drop([
'H1RFV', 'H1LFV', 'H1RRO'
], axis=1)

In [None]:
X_train.columns

In [None]:
y_train.GR.unique()

In [None]:
y_train.GR.value_counts()

## Dataprep

In [None]:
def prepare_targets(y_train, y_test):
    le = LabelEncoder()
    le.fit(y_train)
    y_train_enc = le.transform(y_train)
    y_test_enc = le.transform(y_test)
    
    return y_train_enc, y_test_enc

In [None]:
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)

In [None]:
print(Counter(y_train_enc).keys())
print(Counter(y_train_enc).values())

In [None]:
print(Counter(y_test_enc).keys())
print(Counter(y_test_enc).values())

In [None]:
file_std = "./models/std.pickle"
# file_smote = "./models/smote.pickle"

std = joblib.load(file_std)
# smote= joblib.load(file_smote)

In [None]:
file_std_smote = "./models/std_smote.pickle"
file_smote = "./models/smote.pickle"

std_smote = joblib.load(file_std_smote)
smote= joblib.load(file_smote)

In [None]:
X_train_std = std.transform(X_train)
X_test_std = std.transform(X_test)

In [None]:
X_train_std_smote = std_smote.transform(X_train)
X_test_std_smote = std_smote.transform(X_test)

In [None]:
X_resampled, y_resampled = smote.fit_resample(X_train_std_smote, y_train_enc)

In [None]:
X_resampled.shape

In [None]:
y_resampled.shape

## Predict

In [None]:
def fit_and_print(model, X_test, y_test):
    y_pred = model.predict(X_test)
    # print("Confusion Matrix: \n", confusion_matrix(y_test_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    index =["A", "B", "C"]
    columns =["A", "B", "C"]
    cm_df = pd.DataFrame(cm,columns,index)                      
    plt.figure(figsize=(8,6))  
    sns.heatmap(cm_df, annot=True)
    print("Classification Report: \n", classification_report(y_test, y_pred))  

### Models

In [None]:
filename_1 = './models/RF_best_bayes.sav'
filename_2 = './models/SVM_best_bayes.sav'
filename_3 = './models/LR_best_bayes.sav'
filename_4 = './models/KNN_best_bayes.sav'
filename_5 = './models/GBM_best_bayes.sav'


RF_best = joblib.load(filename_1)
SVM_best = joblib.load(filename_2)
LR_best = joblib.load(filename_3)
KNN_best = joblib.load(filename_4)
GBM_best = joblib.load(filename_5)

### Voting Classifier

In [None]:
model_RF = pd.Series(RF_best.predict(X_test_std), name="RF")
model_LR = pd.Series(LR_best.predict(X_test_std), name= "LR")
model_SVM = pd.Series(SVM_best.predict(X_test_std), name="SVM")
model_GBM = pd.Series(GBM_best.predict(X_test_std), name="GBM")
model_KNN = pd.Series(KNN_best.predict(X_test_std), name="KNN")

In [None]:
import scipy.stats as stats

tau, p_value = stats.kendalltau(model_RF, model_GBM)
p_value

In [None]:
def fit_and_print_v2(model, X_test):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test_enc, y_pred)
    index =["A", "B", "C"]
    columns =["A", "B", "C"]
    cm_df = pd.DataFrame(cm,columns,index)                      
    plt.figure(figsize=(8,6))  
    sns.heatmap(cm_df, annot=True)
    print("Classification Report: \n", classification_report(y_test_enc, y_pred))  


In [None]:
fit_and_print_v2(RF_best, X_test_std)

## Ensemble 

In [None]:
# Definindo os modelos
models = [
    # ('rf', RF_best),
    # ('svm', SVM_best),
    ('lr', LR_best),
    # ('knn', KNN_best),
    ('gbm', GBM_best)

]

In [None]:
from sklearn.ensemble import StackingClassifier

In [None]:
# Criando o ensemble
ensemble_model = VotingClassifier(estimators=models, voting='soft')

ensemble_model.fit(X_train_std, y_train_enc)

In [None]:
# Obter previsões probabilísticas do ensemble
ensemble_proba_train = ensemble_model.predict(X_train_std)
print("Classification Report: \n", classification_report(y_train_enc, ensemble_proba_train))  

In [None]:
# Obter previsões probabilísticas do ensemble
ensemble_proba = ensemble_model.predict_proba(X_test_std)
fit_and_print_v2(ensemble_model, X_test_std)

In [None]:
data_train=pd.DataFrame()
data_train['y'] = y_train.GR
# data_train["prob_rf"] = RF_best.predict_proba(X_train_std)[:,0]
data_train["prob_lr"] = LR_best.predict_proba(X_train_std)[:,0]
data_train["prob_gbm"] = GBM_best.predict_proba(X_train_std)[:,0]

print("A", data_train[data_train['y']=='A'][['prob_lr', 'prob_gbm']].mean(axis=1).describe())
print("B", data_train[data_train['y']=='B'][['prob_lr', 'prob_gbm']].mean(axis=1).describe())
print("C", data_train[data_train['y']=='C'][['prob_lr', 'prob_gbm']].mean(axis=1).describe())

In [None]:
high = [0.99, 0.004]
medium = [0.28, 0.08]
low = [0.21, 0.04]

In [None]:
data_train=pd.DataFrame()
data_train['y'] = y_train.GR
data_train["prob_rf"] = RF_best.predict_proba(X_train_std)[:,1]
data_train["prob_svm"] = SVM_best.predict_proba(X_train_std)[:,1]
data_train["prob_gbm"] = GBM_best.predict_proba(X_train_std)[:,1]

print("A", data_train[data_train['y']=='A'][['prob_rf', 'prob_svm', 'prob_gbm']].mean(axis=1).describe())
print("B", data_train[data_train['y']=='B'][['prob_rf', 'prob_svm', 'prob_gbm']].mean(axis=1).describe())
print("C", data_train[data_train['y']=='C'][['prob_rf', 'prob_svm', 'prob_gbm']].mean(axis=1).describe())

In [None]:
high = [0.89, 0.03]
medium = [0.35, 0.10]
low = [0.30, 0.13]

In [None]:
data_train=pd.DataFrame()
data_train['y'] = y_train.GR
data_train["prob_rf"] = RF_best.predict_proba(X_train_std)[:,2]
data_train["prob_svm"] = SVM_best.predict_proba(X_train_std)[:,2]
data_train["prob_gbm"] = GBM_best.predict_proba(X_train_std)[:,2]


print("A", data_train[data_train['y']=='A'][['prob_rf', 'prob_svm', 'prob_gbm']].mean(axis=1).describe())
print("B", data_train[data_train['y']=='B'][['prob_rf', 'prob_svm', 'prob_gbm']].mean(axis=1).describe())
print("C", data_train[data_train['y']=='C'][['prob_rf', 'prob_svm', 'prob_gbm']].mean(axis=1).describe())

In [None]:
high = [0.82, 0.07]
medium = [0.12, 0.05]
low = [0.07, 0.01]


In [None]:
########## A ############
high = [0.99, 0.004]
medium = [0.28, 0.08]
low = [0.21, 0.04]
########## B ############
high = [0.89, 0.03]
medium = [0.35, 0.10]
low = [0.30, 0.13]
########## C ############
high = [0.82, 0.07]
medium = [0.12, 0.05]
low = [0.07, 0.01]


### Estimation of mean and standard deviation

In [None]:
def classify_by_rule(data, class_column, id, prob_columns):

    results = {}
    for class_label in data[class_column].unique():
        class_data = data[data[class_column] == class_label]
        mean_probs = class_data[prob_columns].mean(axis=1)
        mean_value = mean_probs.mean()
        std_value = mean_probs.std()

        results[class_label] = {"class": id, "mean": mean_value, "std": std_value}
    
    return results


def prob_class(data_train, train_set, id_class):

    prob_columns = []
    for i, model_probs in enumerate(train_set):
        column_name = f"prob_model_{i+1}"
        data_train[column_name] = model_probs[:, id_class]
        prob_columns.append(column_name)
        return prob_columns


def return_means_std(data_train, train_set):

    classifications = []

    for id_class in range(0, 3):

        prob_columns = prob_class(data_train, train_set, id_class)
        classification = classify_by_rule(data_train, class_column="y", id = id_class, prob_columns=prob_columns)
        classifications.append(classification)

    class_1_high_mean = classifications[0]['A']['mean']
    class_1_medium_mean = classifications[0]['B']['mean']
    class_1_low_mean = classifications[0]['C']['mean']

    class_1_high_std = classifications[0]['A']['std']
    class_1_medium_std = classifications[0]['B']['std']
    class_1_low_std = classifications[0]['C']['std']


    class_2_high_mean = classifications[1]['B']['mean']
    class_2_medium_mean = classifications[1]['C']['mean']
    class_2_low_mean = classifications[1]['A']['mean']

    class_2_high_std = classifications[1]['B']['std']
    class_2_medium_std = classifications[1]['C']['std']
    class_2_low_std = classifications[1]['A']['std']


    class_3_high_mean = classifications[2]['C']['mean']
    class_3_medium_mean = classifications[2]['B']['mean']
    class_3_low_mean = classifications[2]['A']['mean']

    class_3_high_std = classifications[2]['C']['std']
    class_3_medium_std = classifications[2]['B']['std']
    class_3_low_std = classifications[2]['A']['std']

    mean_values  = [[class_1_high_mean, class_1_medium_mean, class_1_low_mean], [class_2_high_mean, class_2_medium_mean, class_2_low_mean], [class_3_high_mean, class_3_medium_mean, class_3_low_mean]]

    std_values  = [[class_1_high_std, class_1_medium_std, class_1_low_std], [class_2_high_std, class_2_medium_std, class_2_low_std], [class_3_high_std, class_3_medium_std, class_3_low_std]]
    
    return mean_values, std_values

## Sistema Fuzzy

In [None]:
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

class FuzzyClassifier:
    def __init__(self, mean_values, std_values):
        self.prob_classe1 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'Probability for category A')
        self.prob_classe2 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'Probability for category B')
        self.prob_classe3 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'Probability for category C')
        self.classe_final = ctrl.Consequent(np.arange(1, 4, 1), 'Final classification')

        m_c1_high = mean_values[0][0]
        m_c1_medium = mean_values[0][1]
        m_c1_low = mean_values[0][2]

        m_c2_high = mean_values[1][0]
        m_c2_medium = mean_values[1][1]
        m_c2_low = mean_values[1][2]

        m_c3_high = mean_values[2][0]
        m_c3_medium = mean_values[2][1]
        m_c3_low = mean_values[2][2]

        s_c1_high = std_values[0][0]
        s_c1_medium = std_values[0][1]
        s_c1_low = std_values[0][2]

        s_c2_high = std_values[1][0]
        s_c2_medium = std_values[1][1]
        s_c2_low = std_values[1][2]

        s_c3_high = std_values[2][0]
        s_c3_medium = std_values[2][1]
        s_c3_low = std_values[2][2]


        self._define_membership_functions(m_c1_high, m_c1_medium, m_c1_low, m_c2_high, m_c2_medium, m_c2_low, m_c3_high, 
                                          m_c3_medium, m_c3_low, s_c1_high, s_c1_medium, s_c1_low, s_c2_high, s_c2_medium,
                                          s_c2_low, s_c3_high, s_c3_medium, s_c3_low)

        self.classe_final['Category A'] = fuzz.trimf(self.classe_final.universe, [0.5, 1, 1.5])
        self.classe_final['Category B'] = fuzz.trimf(self.classe_final.universe, [1.5, 2, 2.5])
        self.classe_final['Category C'] = fuzz.trimf(self.classe_final.universe, [2.5, 3, 3.5])

        self.classe_final.defuzzify_method = 'centroid'#'mom'

        self.rules = self._define_rules()

        self.classification_ctrl = ctrl.ControlSystem(self.rules)
        self.classification = ctrl.ControlSystemSimulation(self.classification_ctrl)

    def _define_membership_functions(self, m_c1_high, m_c1_medium, m_c1_low, m_c2_high, m_c2_medium, m_c2_low, m_c3_high, 
                                          m_c3_medium, m_c3_low, s_c1_high, s_c1_medium, s_c1_low, s_c2_high, s_c2_medium,
                                          s_c2_low, s_c3_high, s_c3_medium, s_c3_low):
        

        self.prob_classe1['low'] = fuzz.gaussmf(self.prob_classe1.universe, m_c1_low, s_c1_low)
        self.prob_classe1['medium'] = fuzz.gaussmf(self.prob_classe1.universe, m_c1_medium, s_c1_medium)
        self.prob_classe1['high'] = fuzz.gaussmf(self.prob_classe1.universe, m_c1_high, s_c1_high)

        self.prob_classe2['low'] = fuzz.gaussmf(self.prob_classe2.universe, m_c2_low, s_c2_low)
        self.prob_classe2['medium'] = fuzz.gaussmf(self.prob_classe2.universe, m_c2_medium, s_c2_medium)
        self.prob_classe2['high'] = fuzz.gaussmf(self.prob_classe2.universe, m_c2_high, s_c2_high)

        self.prob_classe3['low'] = fuzz.gaussmf(self.prob_classe3.universe, m_c3_low, s_c3_low)
        self.prob_classe3['medium'] = fuzz.gaussmf(self.prob_classe3.universe, m_c3_medium, s_c3_medium)
        self.prob_classe3['high'] = fuzz.gaussmf(self.prob_classe3.universe, m_c3_high, s_c3_high)

    def _define_rules(self):
        rules = [
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['medium'] & self.prob_classe3['high'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['low'] & self.prob_classe3['medium'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['medium'] & self.prob_classe3['medium'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['medium'] & self.prob_classe3['low'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['high'] & self.prob_classe3['low'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['high'] & self.prob_classe3['medium'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['low'] & self.prob_classe2['high'] & self.prob_classe3['high'], self.classe_final['Category C']),

                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['low'] & self.prob_classe3['low'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['low'] & self.prob_classe3['medium'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['low'] & self.prob_classe3['high'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['medium'] & self.prob_classe3['low'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['medium'] & self.prob_classe3['medium'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['medium'] & self.prob_classe3['high'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['high'] & self.prob_classe3['low'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['high'] & self.prob_classe3['medium'], self.classe_final['Category B']),
                ctrl.Rule(self.prob_classe1['medium'] & self.prob_classe2['high'] & self.prob_classe3['high'], self.classe_final['Category C']),

                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['low'] & self.prob_classe3['low'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['low'] & self.prob_classe3['medium'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['low'] & self.prob_classe3['high'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['medium'] & self.prob_classe3['low'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['medium'] & self.prob_classe3['medium'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['medium'] & self.prob_classe3['high'], self.classe_final['Category C']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['high'] & self.prob_classe3['low'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['high'] & self.prob_classe3['medium'], self.classe_final['Category A']),
                ctrl.Rule(self.prob_classe1['high'] & self.prob_classe2['high'] & self.prob_classe3['high'], self.classe_final['Category C'])
        ]
        return rules

    def classify(self, dataset):
        results = []
        for i in range(len(dataset[0])):
            models_probabilities = [model[i] for model in dataset]

            prob_C1 = np.max([prob[0] for prob in models_probabilities])

            prob_C2 = np.max([prob[1] for prob in models_probabilities])

            prob_C3 = np.max([prob[2] for prob in models_probabilities])


            self.classification.input['Probability for category A'] = prob_C1
            self.classification.input['Probability for category B'] = prob_C2
            self.classification.input['Probability for category C'] = prob_C3

            self.classification.compute()
            results.append(int(round(self.classification.output['Final classification'])))

        return [0 if r == 1 else 1 if r == 2 else 2 for r in results]


## Results

#### RF + GBM

In [None]:

models = [
    ('rf', RF_best),
    ('gbm', GBM_best),
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_rf_gbm = ensemble_model.predict_proba(X_train_std)

In [None]:
train_set = [ensemble_proba_train_rf_gbm]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)

In [None]:
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_rf_gbm = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_rf_gbm]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

#### RF + GBM + LR

In [None]:

models = [
    ('rf', RF_best),
    ('gbm', GBM_best),
    ('lr', LR_best),
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_rf_gbm_lr = ensemble_model.predict_proba(X_train_std)

In [None]:
train_set = [ensemble_proba_train_rf_gbm_lr]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)

resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_rf_gbm_lr = ensemble_model.predict_proba(X_test_std)

In [None]:
test_set = [ensemble_proba_test_rf_gbm_lr]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

#### RF + LR

In [None]:
models = [
    ('rf', RF_best),
    ('lr', LR_best),
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_rf_lr = ensemble_model.predict_proba(X_train_std)
ensemble_train_rf_lr = ensemble_model.predict(X_train_std)

In [None]:
train_set = [ensemble_proba_train_rf_lr]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_rf_lr = ensemble_model.predict_proba(X_test_std)

ensemble_test_rf_lr = ensemble_model.predict(X_test_std)

In [None]:
test_set = [ensemble_proba_test_rf_lr]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

#### GBM + LR

In [None]:
models = [
    ('rf', RF_best),
    ('gbm', GBM_best),
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_lr_gbm = ensemble_model.predict_proba(X_train_std)

ensemble_train_lr_gbm = ensemble_model.predict(X_train_std)

In [None]:
train_set = [ensemble_proba_train_lr_gbm]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_lr_gbm = ensemble_model.predict_proba(X_test_std)

ensemble_test_lr_gbm = ensemble_model.predict(X_test_std)

In [None]:
test_set = [ensemble_proba_test_lr_gbm]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### RF + GBM + SVM

In [None]:
models = [
    ('rf', RF_best),
    ('gbm', GBM_best),
    ('svm', SVM_best)
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_lr_gbm_svm = ensemble_model.predict_proba(X_train_std)

train_set = [ensemble_proba_train_lr_gbm_svm]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_lr_gbm_svm = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_lr_gbm_svm]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### RF + GBM + KNN

In [None]:
models = [
    ('rf', RF_best),
    ('gbm', GBM_best),
    ('knn', KNN_best)
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_lr_gbm_knn = ensemble_model.predict_proba(X_train_std)

train_set = [ensemble_proba_train_lr_gbm_knn]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_lr_gbm_knn = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_lr_gbm_knn]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### RF + KNN

In [None]:
models = [
    ('rf', RF_best),
    ('knn', KNN_best)
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_rf_knn = ensemble_model.predict_proba(X_train_std)

train_set = [ensemble_proba_train_rf_knn]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_rf_knn = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_rf_knn]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### GBM + SVM

In [None]:
models = [
    ('gbm', GBM_best),
    ('svm', SVM_best)
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_gbm_svm = ensemble_model.predict_proba(X_train_std)

train_set = [ensemble_proba_train_gbm_svm]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_gbm_svm = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_gbm_svm]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### GBM + KNN

In [None]:
models = [
    ('gbm', GBM_best),
    ('knn', KNN_best)
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_gbm_knn = ensemble_model.predict_proba(X_train_std)

train_set = [ensemble_proba_train_gbm_knn]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_gbm_knn = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_gbm_knn]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### RF + SVM

In [None]:
models = [
    ('rf', RF_best),
    ('svm', SVM_best)
]

ensemble_model = VotingClassifier(estimators=models, voting='soft')
ensemble_model.fit(X_train_std, y_train_enc)

ensemble_proba_train_rf_svm = ensemble_model.predict_proba(X_train_std)

train_set = [ensemble_proba_train_rf_svm]

data_train = pd.DataFrame()
data_train['y'] = y_train.GR

means_, std_ = return_means_std(data_train, train_set)

classifier = FuzzyClassifier(means_, std_)
resultados_train = classifier.classify(train_set)

print(f'Resultados da classificação: {resultados_train}')
print("Classification Report: \n", classification_report(y_train_enc,  resultados_train))
print("confusion matrixt: \n",confusion_matrix(y_train_enc, resultados_train))

In [None]:
ensemble_proba_test_rf_svm = ensemble_model.predict_proba(X_test_std)

test_set = [ensemble_proba_test_rf_svm]

resultados_test= classifier.classify(test_set)

print(f'Resultados da classificação: {resultados_test}')
print("Classification Report: \n", classification_report(y_test_enc,  resultados_test))
print("confusion matrixt: \n",confusion_matrix(y_test_enc, resultados_test))

### Representation of rules with graphs

In [None]:
import numpy as np
import networkx as nx
import plotly.graph_objects as go

# Create the graph
G = nx.DiGraph()

# Add nodes for the fuzzy variables
G.add_node('Prob. A', type='input A')
G.add_node('Prob. B', type='input B')
G.add_node('Prob. C', type='input C')
G.add_node('Final classification', type='output')

# Add edges representing the rules
for idx, rule in enumerate(classifier.rules):
    rule_name = f'Rule {idx + 1}'
    G.add_node(rule_name, type='rule')

    # Connect the antecedent variables to the rule
    G.add_edge('Prob. A', rule_name)
    G.add_edge('Prob. B', rule_name)
    G.add_edge('Prob. C', rule_name)

    # Connect the rule to the consequent variable
    G.add_edge(rule_name, 'Final classification')

# Generate starting positions for nodes
pos = nx.spring_layout(G, dim=3, seed=42, k=0.5)  

# Manually adjust positions for "Prob. A", "Prob. B" and "Prob. C"
pos['Prob. A'] = np.array([-1.5, 0, 0.5])  
pos['Prob. B'] = np.array([0, 1.5, -0.5])  
pos['Prob. C'] = np.array([1.5, 0, 0.5]) 

x_nodes = [pos[node][0] for node in G.nodes]
y_nodes = [pos[node][1] for node in G.nodes]
z_nodes = [pos[node][2] for node in G.nodes]

edge_x = []
edge_y = []
edge_z = []

for edge in G.edges:
    x0, y0, z0 = pos[edge[0]]
    x1, y1, z1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])
    edge_z.extend([z0, z1, None])

node_colors = []
for node in G.nodes:
    if G.nodes[node]['type'] == 'input A':
        node_colors.append('green')  # Prob. A
    elif G.nodes[node]['type'] == 'input B':
        node_colors.append('orange')  # Prob. B
    elif G.nodes[node]['type'] == 'input C':
        node_colors.append('red')  # Prob. C
    elif G.nodes[node]['type'] == 'output':
        node_colors.append('blue')  # Output
    else:
        node_colors.append('lightblue')  # Rules

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=edge_x, y=edge_y, z=edge_z,
    mode='lines',
    line=dict(color='gray', width=2),
    hoverinfo='none'
))

fig.add_trace(go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers+text',
    marker=dict(size=10, color=node_colors, line=dict(width=0.1, color='black')),
    text=list(G.nodes),
    textposition='top center',
    hoverinfo='text'
))

fig.update_layout(
    title='Grafo de Regras Fuzzy (3D)',
    scene=dict(
        xaxis=dict(visible=False),
        yaxis=dict(visible=False),
        zaxis=dict(visible=False)
    ),
    margin=dict(l=0, r=0, t=10, b=0),
    paper_bgcolor='white'  # Fundo geral branco
)

fig.show()


In [None]:
prob_classe1 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'Probability for category A')
prob_classe2 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'Probability for category B')
prob_classe3 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'Probability for category C')
classe_final = ctrl.Consequent(np.arange(1, 4, 1), 'Final classification')


In [None]:
prob_classe1['low'] = fuzz.gaussmf(prob_classe1.universe, 0.09, 0.03)
prob_classe1['medium'] = fuzz.gaussmf(prob_classe1.universe, 0.14, 0.06)
prob_classe1['high'] = fuzz.gaussmf(prob_classe1.universe, 0.81, 0.09)

prob_classe1.view()

In [None]:
prob_classe2['low'] = fuzz.gaussmf(prob_classe2.universe, 0.15, 0.08)
prob_classe2['medium'] = fuzz.gaussmf(prob_classe2.universe, 0.25, 0.07)
prob_classe2['high'] = fuzz.gaussmf(prob_classe2.universe, 0.76, 0.06)
prob_classe2.view()

In [None]:
prob_classe3['low'] =  fuzz.gaussmf(prob_classe3.universe, 0.03, 0.01)
prob_classe3['medium'] = fuzz.gaussmf(prob_classe3.universe, 0.09, 0.03)
prob_classe3['high'] = fuzz.gaussmf(prob_classe3.universe, 0.65, 0.09)
prob_classe3.view()

In [None]:
# prob_classe3 = ctrl.Antecedent(np.arange(0, 1.1, 0.01), 'prob_classe3')
classe_final = ctrl.Consequent(np.arange(1, 4, 1), 'Final classification')

classe_final['category A'] = fuzz.trimf(classe_final.universe, [0.5, 1, 1.8]) 
classe_final['category B'] = fuzz.trimf(classe_final.universe, [1.4, 2, 2.5]) 
classe_final['category C'] = fuzz.trimf(classe_final.universe, [2.2, 3, 3.5])

In [None]:
classe_final.view()