In [28]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from pyfume.Clustering import Clusterer
from pyfume.EstimateAntecendentSet import AntecedentEstimator
from pyfume.EstimateConsequentParameters import ConsequentEstimator
from pyfume.SimpfulModelBuilder import SugenoFISBuilder
from pyfume.Tester import SugenoFISTester
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from numpy import clip, column_stack, argmax

In [29]:
column_names = ["Clump_Thickness", "Uniformity_of_Cell_Size", "Uniformity_of_Cell_Shape", "Marginal_Adhesion", "Single_Epithelial_Cell_Size", "Bland_Chromatin", "Normal_Nucleoli", "Mitoses", "Something", "Cancer"]
data = pd.read_csv(os.path.abspath('wbco.csv'),names=column_names)
data.head()
rows_with_question_mark = data[data.isin(["?"]).any(axis=1)].index
data = data.drop(rows_with_question_mark)

data = data.astype('int')
data['Cancer'] = data['Cancer'].astype('int')

column_names = column_names[:-1]

X = data[column_names].values
y = data["Cancer"].values.reshape(-1, 1)

In [30]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = np.squeeze(y_train) 


In [31]:
# Cluster the input-output space
cl = Clusterer(x_train=X_train, y_train=y_train, nr_clus=10)
clust_centers, part_matrix, _ = cl.cluster(method='fcm')

In [32]:
#Estimate membership functions parameters
ae = AntecedentEstimator(X_train, part_matrix)
antecedent_params = ae.determineMF()

In [33]:
#Estimate consequent parameters
ce = ConsequentEstimator(X_train, y_train, part_matrix)
conseq_params = ce.suglms()

In [34]:
modbuilder = SugenoFISBuilder(antecedent_params, conseq_params, column_names, save_simpful_code=False)
model = modbuilder.get_model()

 * Detected 10 rules / clusters
 * Detected Sugeno model type


In [35]:
modtester = SugenoFISTester(model, X_test, column_names)
y_pred = modtester.predict()[0]
y_pred = y_pred.astype(int)

In [36]:
# %% Compute classification metrics
acc_score = accuracy_score(y_test, y_pred)
print("Accuracy: {:.3f}".format(acc_score))
rec_score = recall_score(y_test, y_pred)
print("Recall: {:.3f}".format(rec_score))
prec_score = precision_score(y_test, y_pred)
print("Precision Score: {:.3f}".format(prec_score))
F1_score = f1_score(y_test, y_pred)
print("F1-Score: {:.3f}".format(F1_score))
kappa = cohen_kappa_score(y_test, y_pred)
print("Kappa Score: {:.3f}".format(kappa))

Accuracy: 0.723
Recall: 0.345
Precision Score: 1.000
F1-Score: 0.513
Kappa Score: 0.378
