In [1]:
import pandas as pd
from FuzzySystemHandler import FuzzySystemHandler
from GeneticAlgorithmHandler import GeneticAlgorithmHandler
from sklearn.model_selection import train_test_split
import json

In [2]:
dataset = pd.read_excel(f"..\dataset\dadoscancer_2classes.xlsx")

train, test = train_test_split(dataset, test_size=0.2, shuffle=True, stratify=dataset[['Class']])
train, valid = train_test_split(train, test_size=0.2, shuffle=True, stratify=train[['Class']])

train.to_csv("..\dataset\dadoscancer_2classes-train.csv")
valid.to_csv("..\dataset\dadoscancer_2classes-valid.csv")
test.to_csv("..\dataset\dadoscancer_2classes-test.csv")

In [3]:
binary_features = ['In PanCan Pathway Analysis', 'Post Initial Therapy', 'Prior Diagnosis', 'Radiation Therapy', 'Tissue Prospective Indicator', 'Ethnicity Category', 'Sex']

feature_sets = [(c, 2 if c in binary_features else 3) for c in train.columns.tolist() if c not in ['index', 'Class']]

In [4]:
fuzz_sys = FuzzySystemHandler(train, feature_columns_sets=feature_sets,
                                target_column='Class', target_nsets=2)

In [5]:
ga = GeneticAlgorithmHandler(fuzzy_system=fuzz_sys, max_rules=20)

In [6]:
best_hof, best_acc = None, 0

aval_best_gen = {}

for n in range(20):
    hof, pop = ga.run(ngen=300, npop=300, cxpb=0.65, mutpb=0.05)
    i_unpack = ga.unpack_individual(hof)
    fuzz_valid_select = FuzzySystemHandler(valid, feature_columns_sets=feature_sets,
                                target_column='Class', target_nsets=2, scaler=fuzz_sys.get_scaler())

    fuzz_valid_select.set_new_sets(i_unpack)
    fuzz_valid_select.set_rules_system(i_unpack)

    acc, fail = fuzz_valid_select.evaluate_system()
    if best_acc< acc:
        best_hof = hof
        best_acc = acc
    print(f"Best at {n}: {acc}")
    ga.objective_function(hof, print_all=True)

    aval_best_gen[n] = {
        'id': n,
        'hof': hof,
        'acc_test': acc,
        'fail_test': fail
    }

gen	nevals	avg    	std       	min   	max    
0  	300   	1.61449	0.00896097	1.5267	1.62988
1  	174   	1.59633	0.0528295 	0.89637	1.61992
2  	203   	1.55933	0.106352  	0.860627	1.61175
3  	207   	1.48007	0.181517  	0.710855	1.60945
4  	191   	1.36655	0.231712  	0.612078	1.60716
5  	208   	1.20827	0.292215  	0.607884	1.59601
6  	212   	1.06288	0.312028  	0.403891	1.58958
7  	203   	0.893877	0.29931   	0.334495	1.56777
8  	210   	0.738991	0.237827  	0.334495	1.56019
9  	199   	0.641861	0.176321  	0.308335	1.49284
10 	188   	0.581402	0.13429   	0.308335	1.28011
11 	197   	0.552291	0.129197  	0.22617 	1.44615
12 	206   	0.50282 	0.112382  	0.246998	0.925335
13 	202   	0.4833  	0.167362  	0.150552	1.48557 
14 	206   	0.447264	0.151917  	0.150552	1.38666 
15 	191   	0.41059 	0.152048  	0.150552	0.957185
16 	196   	0.382298	0.168976  	0.14075 	0.927486
17 	186   	0.329082	0.156472  	0.136752	0.884629
18 	207   	0.293551	0.150077  	0.136752	0.987668
19 	210   	0.276962	0.148998  	0.133148	0.7272

In [7]:
individual = ga.unpack_individual(best_hof)
ga.objective_function(best_hof, print_all=True)

acc=0.95 enable_rules_perc=0.55 rules_per_class=[10, 1] fail_to_predict=0.0 invalid_perc=0.0 repeated_rules_perc=0.0 slots_used_perc=0.11616161616161616 perc_invalid_pair=0.0


(0.03266758494031224,)

In [8]:
ga.feasible(best_hof)

True

In [9]:
ga.distance_func(best_hof)

0.11616161616161616

In [10]:
print(json.dumps(individual, indent=4) )

{
    "rules": [
        {
            "Enable": 1,
            "Consequent": 0,
            "Antecedents": [
                0,
                1,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                1,
                0,
                0,
                0,
                0,
                0,
                0,
                1
            ]
        },
        {
            "Enable": 1,
            "Consequent": 0,
            "Antecedents": [
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                2,
                1,
                0,
                0,
                0
            ]
        },
        {
            "Enable": 1,
            "Consequent": 0,
 

In [11]:
fuzz_test = FuzzySystemHandler(test, feature_columns_sets=feature_sets,
                                target_column='Class', target_nsets=2, scaler=fuzz_sys.get_scaler())

fuzz_test.set_new_sets(individual)
fuzz_test.set_rules_system(individual)

acc, fail = fuzz_test.evaluate_system()

print(f"acc={acc} fail={fail}")

acc=0.9130434782608695 fail=0.0


In [12]:
json.dump(individual, open(f'cancer-sys.json', 'w', encoding='utf-8'))
json.dump(aval_best_gen, open(f'cancer-sys-all-best.json', 'w', encoding='utf-8'))

In [13]:
import joblib
joblib.dump(fuzz_sys.get_scaler(), "cancer-sys-scaler.save") 

['cancer-sys-scaler.save']