In [17]:
# Imports
import anfis
import os
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from pyfume.Clustering import Clusterer
from pyfume.EstimateAntecendentSet import AntecedentEstimator
from pyfume.EstimateConsequentParameters import ConsequentEstimator
from pyfume.SimpfulModelBuilder import SugenoFISBuilder
from pyfume.Tester import SugenoFISTester
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from numpy import clip, column_stack, argmax, vectorize

In [18]:
# Import and clean the data
column_names = ["Clump_Thickness", "Uniformity_of_Cell_Size", "Uniformity_of_Cell_Shape", "Marginal_Adhesion", "Single_Epithelial_Cell_Size", "Bland_Chromatin", "Normal_Nucleoli", "Mitoses", "Something", "Cancer"]
var_names = ["Clump_Thickness", "Uniformity_of_Cell_Size", "Uniformity_of_Cell_Shape", "Marginal_Adhesion", "Single_Epithelial_Cell_Size", "Bland_Chromatin", "Normal_Nucleoli", "Mitoses", "Something"]
data = pd.read_csv(os.path.abspath('wbco.csv'),names=column_names)
data.head()
rows_with_question_mark = data[data.isin(["?"]).any(axis=1)].index
data = data.drop(rows_with_question_mark)
data = data.astype('int')
data['Cancer'] = data['Cancer'].astype('int')
column_names = column_names[:-1]

# Set seed
seed = 7

# Normalize the dataset
scaler_input = MinMaxScaler()
scaler_output = MinMaxScaler()

# Choose predictors and output
X = scaler_input.fit_transform(data[column_names].values)
y = scaler_output.fit_transform(data["Cancer"].values.reshape(-1, 1))

In [19]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = np.squeeze(y_train)

In [20]:
## Train 0 vs all model

# Create 0-vs-all target vector
y_train_0_vs_all = vectorize({0:1,1:0,2:0}.get)(y_train)
# Cluster the input-output space
cl = Clusterer(x_train=X_train, y_train=y_train_0_vs_all, nr_clus=10)
clust_centers, part_matrix, _ = cl.cluster(method='fcm')
# Estimate membership functions parameters
ae = AntecedentEstimator(X_train, part_matrix)
antecedent_params = ae.determineMF()
# Estimate consequent parameters
ce = ConsequentEstimator(X_train, y_train_0_vs_all, part_matrix)
conseq_params_0_vs_all = ce.suglms()
# Build first-order Takagi-Sugeno model
modbuilder = SugenoFISBuilder(antecedent_params, conseq_params_0_vs_all, var_names, save_simpful_code=False)
ts_0_vs_all = modbuilder.get_model()

 * Detected 10 rules / clusters
 * Detected Sugeno model type


In [21]:
## Train 1 vs all model

# Create 1-vs-all target vector
y_train_1_vs_all = vectorize({0:0,1:1,2:0}.get)(y_train)
# Cluster the input-output space
cl = Clusterer(x_train=X_train, y_train=y_train_1_vs_all, nr_clus=10)
clust_centers, part_matrix, _ = cl.cluster(method='fcm')
# Estimate membership functions parameters
ae = AntecedentEstimator(X_train, part_matrix)
antecedent_params = ae.determineMF()
# Estimate consequent parameters
ce = ConsequentEstimator(X_train, y_train_1_vs_all, part_matrix)
conseq_params_1_vs_all = ce.suglms()
# Build first-order Takagi-Sugeno model
modbuilder = SugenoFISBuilder(antecedent_params, conseq_params_1_vs_all, var_names, save_simpful_code=False)
ts_1_vs_all = modbuilder.get_model()

 * Detected 10 rules / clusters
 * Detected Sugeno model type


In [22]:
## Train 2 vs all model

# Create 2-vs-all target vector
y_train_2_vs_all = vectorize({0:0,1:0,2:1}.get)(y_train)
# Cluster the input-output space
cl = Clusterer(x_train=X_train, y_train=y_train_2_vs_all, nr_clus=10)
clust_centers, part_matrix, _ = cl.cluster(method='fcm')
# Estimate membership functions parameters
ae = AntecedentEstimator(X_train, part_matrix)
antecedent_params = ae.determineMF()
# Estimate consequent parameters
ce = ConsequentEstimator(X_train, y_train_2_vs_all, part_matrix)
conseq_params_2_vs_all = ce.suglms()
# Build first-order Takagi-Sugeno model
modbuilder = SugenoFISBuilder(antecedent_params, conseq_params_2_vs_all, var_names, save_simpful_code=False)
ts_2_vs_all = modbuilder.get_model()

 * Detected 10 rules / clusters
 * Detected Sugeno model type


In [23]:
## Get class probabilities predictions for each ova model

# Class probabilities predictions for 0 vs all
modtester = SugenoFISTester(ts_0_vs_all, X_test, var_names)
y_pred_probs_0_vs_all = clip(modtester.predict()[0], 0, 1)
y_pred_probs_0_vs_all = column_stack((1 - y_pred_probs_0_vs_all, y_pred_probs_0_vs_all))

# Class probabilities predictions for 1 vs all
modtester = SugenoFISTester(ts_1_vs_all, X_test, var_names)
y_pred_probs_1_vs_all = clip(modtester.predict()[0], 0, 1)
y_pred_probs_1_vs_all = column_stack((1 - y_pred_probs_1_vs_all, y_pred_probs_1_vs_all))

# Class probabilities predictions for 2 vs all
modtester = SugenoFISTester(ts_2_vs_all, X_test, var_names)
y_pred_probs_2_vs_all = clip(modtester.predict()[0], 0, 1)
y_pred_probs_2_vs_all = column_stack((1 - y_pred_probs_2_vs_all, y_pred_probs_2_vs_all))

# Aggregate class probabilities and get class predictions
y_pred_probs = column_stack((y_pred_probs_0_vs_all[:,1],y_pred_probs_0_vs_all[:,0],y_pred_probs_0_vs_all[:,0])) +\
               column_stack((y_pred_probs_1_vs_all[:,0],y_pred_probs_1_vs_all[:,1],y_pred_probs_1_vs_all[:,0])) +\
               column_stack((y_pred_probs_2_vs_all[:,0],y_pred_probs_2_vs_all[:,0],y_pred_probs_2_vs_all[:,1]))
y_pred_probs = y_pred_probs/y_pred_probs.sum(axis=1,keepdims=1)
y_pred = argmax(y_pred_probs,axis=1)

In [24]:
# Compute classification metrics
acc_score = accuracy_score(y_test, y_pred)
print("Accuracy: {:.3f}".format(acc_score))
rec_score = recall_score(y_test, y_pred)
print("Recall: {:.3f}".format(rec_score))
prec_score = precision_score(y_test, y_pred)
print("Precision Score: {:.3f}".format(prec_score))
F1_score = f1_score(y_test, y_pred)
print("F1-Score: {:.3f}".format(F1_score))
kappa = cohen_kappa_score(y_test, y_pred)
print("Kappa Score: {:.3f}".format(kappa))

Accuracy: 0.985
Recall: 0.983
Precision Score: 0.983
F1-Score: 0.983
Kappa Score: 0.970


In [25]:
## Optimize models

# Optimize 0-vs-all ANFIS model
anf_0_vs_all = anfis.ANFIS(X_train, y_train_0_vs_all, ts_0_vs_all, conseq_params_0_vs_all)
anf_0_vs_all.trainHybridJangOffLine(epochs=20)

# Optimize 1-vs-all ANFIS model
anf_1_vs_all = anfis.ANFIS(X_train, y_train_1_vs_all, ts_1_vs_all, conseq_params_1_vs_all)
anf_1_vs_all.trainHybridJangOffLine(epochs=20)

# Optimize 2-vs-all ANFIS model
anf_2_vs_all = anfis.ANFIS(X_train, y_train_2_vs_all, ts_2_vs_all, conseq_params_2_vs_all)
anf_2_vs_all.trainHybridJangOffLine(epochs=20)

AttributeError: module 'anfis' has no attribute 'ANFIS'

In [None]:
## Get class probabilities predictions for each ova model

# Class probabilities predictions for 0 vs all
y_pred_probs_0_vs_all = clip(anfis.predict(anf_0_vs_all, X_test), 0, 1)
y_pred_probs_0_vs_all = column_stack((1 - y_pred_probs_0_vs_all, y_pred_probs_0_vs_all))

# Class probabilities predictions for 1 vs all
y_pred_probs_1_vs_all = clip(anfis.predict(anf_1_vs_all, X_test), 0, 1)
y_pred_probs_1_vs_all = column_stack((1 - y_pred_probs_1_vs_all, y_pred_probs_1_vs_all))

# Class probabilities predictions for 2 vs all
y_pred_probs_2_vs_all = clip(anfis.predict(anf_2_vs_all, X_test), 0, 1)
y_pred_probs_2_vs_all = column_stack((1 - y_pred_probs_2_vs_all, y_pred_probs_2_vs_all))

# Aggregate class probabilities and get class predictions
y_pred_probs = column_stack((y_pred_probs_0_vs_all[:,1],y_pred_probs_0_vs_all[:,0],y_pred_probs_0_vs_all[:,0])) +\
               column_stack((y_pred_probs_1_vs_all[:,0],y_pred_probs_1_vs_all[:,1],y_pred_probs_1_vs_all[:,0])) +\
               column_stack((y_pred_probs_2_vs_all[:,0],y_pred_probs_2_vs_all[:,0],y_pred_probs_2_vs_all[:,1]))
y_pred_probs = y_pred_probs/y_pred_probs.sum(axis=1,keepdims=1)
y_pred = argmax(y_pred_probs,axis=1)

In [None]:
# Compute classification metrics
acc_score_final = accuracy_score(y_test, y_pred)
print("Accuracy: {:.3f}".format(acc_score))
rec_score_final = recall_score(y_test, y_pred)
print("Recall: {:.3f}".format(rec_score))
prec_score_final = precision_score(y_test, y_pred)
print("Precision Score: {:.3f}".format(prec_score))
F1_score_final = f1_score(y_test, y_pred)
print("F1-Score: {:.3f}".format(F1_score))
kappa_final = cohen_kappa_score(y_test, y_pred)
print("Kappa Score: {:.3f}".format(kappa))