In [1]:
import pandas as pd
import matplotlib.pyplot as plt
pd.options.display.float_format = "{:,.2f}".format

from experiments import *

import sys

sys.path.insert(1, '../fuzzylearn/')

from fuzzylearn import *
from fuzzylearn.fuzzifiers import LinearFuzzifier, CrispFuzzifier,ExponentialFuzzifier,QuantileLinearPiecewiseFuzzifier, QuantileConstantPiecewiseFuzzifier
from fuzzylearn.kernel import GaussianKernel, LinearKernel, HyperbolicKernel, PolynomialKernel, HomogeneousPolynomialKernel
from fuzzylearn import solve_optimization_gurobi

from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, QuantileTransformer, RobustScaler, PowerTransformer, Normalizer
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, cross_val_score

dataset = pd.read_excel("dataset/IncidentiModificato.xlsx")
dataset = dataset.set_index("VERBALE")

In [2]:
import datetime as dt

dataset.DATA = dataset.DATA.apply(lambda d: (d - dt.datetime(1970,1,1)).days)

dataset.head()

Unnamed: 0_level_0,DATA,SESSO,ANNI,PESO,ALTEZZA,BMI,Mezzo,Testa:Neurocranio,Testa:Splancnocranio,Testa:Telencefalo,...,II raggio sx.1,III raggio sx.1,IV raggio sx.1,V raggio sx.1,Art. coxo-femorale dx,Art. coxo-femorale sx,Rotula o Ginocchio dx,Rotula o Ginocchio sx,Caviglia dx,Caviglia sx
VERBALE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85567,10893,0,81,84.0,1.75,27.43,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
85829,10970,1,69,69.0,1.62,26.29,1,4,4,4,...,0,0,0,0,0,0,0,0,0,0
85977,11026,1,71,67.0,1.55,27.89,1,2,0,1,...,0,0,0,0,0,0,0,0,0,0
86220,11122,1,54,60.0,1.59,23.73,1,4,0,0,...,0,0,0,0,0,0,0,0,0,0
86247,11130,1,78,69.0,1.67,24.74,1,2,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
import logging

# create logger
f_logger = logging.getLogger(__name__)
f_logger.setLevel(logging.INFO)

# create console handler and set level to debug
file = logging.FileHandler('esperimenti.log')
file.setLevel(logging.INFO)

# create formatter
formatter = logging.Formatter('%(asctime)s -%(message)s')

# add formatter to ch
file.setFormatter(formatter)

# add ch to logger
f_logger.addHandler(file)

In [4]:
import json

"""
with open('esperimenti.json', "w") as write_file:
    json.dump({}, write_file)
with open('fuzzifiers.json',"w") as write_file:
    json.dump({},write_file)
with open('esperimenti_kernels.json',"w") as write_file:
    json.dump({},write_file)
with open('fuzzifiers_kernels.json',"w") as write_file:
    json.dump({},write_file)
with open('fuzzifiers_tsne.json',"w") as write_file:
    json.dump({},write_file)
"""

'\nwith open(\'esperimenti.json\', "w") as write_file:\n    json.dump({}, write_file)\nwith open(\'fuzzifiers.json\',"w") as write_file:\n    json.dump({},write_file)\nwith open(\'esperimenti_kernels.json\',"w") as write_file:\n    json.dump({},write_file)\nwith open(\'fuzzifiers_kernels.json\',"w") as write_file:\n    json.dump({},write_file)\nwith open(\'fuzzifiers_tsne.json\',"w") as write_file:\n    json.dump({},write_file)\n'

In [18]:
risultati = pd.read_json('esperimenti20var.json')

print(risultati[list(risultati.columns)[4:]].to_latex())

\begin{tabular}{lllll}
\toprule
{} &               LesioniPCA10 &        LesioniPCA10\_0 &               LesioniPCA20 &   LesioniPCA20\_0 \\
\midrule
c         &                        0.1 &                   0.1 &                        0.1 &              0.1 \\
fuzzifier &            LinearFuzzifier &  ExponentialFuzzifier &       ExponentialFuzzifier &  LinearFuzzifier \\
k         &  HyperbolicKernel(1, 0.01) &        LinearKernel() &  HyperbolicKernel(1, 0.01) &   LinearKernel() \\
score     &                      -0.30 &                 -0.25 &                      -0.29 &            -0.25 \\
\bottomrule
\end{tabular}



In [5]:
def addestra(model_class, X, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = None, scaling=StandardScaler(), dim_reduction=None):
        
    if logger is not None:
        logger.info('Inizio Addestramento')
        
    X_std = scaling.fit_transform(X)  if scaling is not None else X
    
    X_std = dim_reduction.fit_transform(X_std) if dim_reduction is not None else X_std
    
    clf = GridSearchCV(estimator=model_class(), param_grid=model_selection_grid, cv=num_fold_grid_search, n_jobs=-1)
    clf.fit(X_std,y)
    
    grid = clf.best_params_
    for i, j in grid.items():
        grid[i] = str(j)

    val = cross_val_score(clf, X_std, y, cv=num_fold_cross_val)
    
    if logger is not None:
        logger.info('%s',str(clf.best_params_))
        logger.info('Scaler: %s', str(scaling))
        logger.info('Scores: %s',str(val))
        logger.info("Fine addestramento")
        
    return val, grid

In [6]:
def esperimento(dataset,columns,model_class, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = None, scaling=StandardScaler(), dim_reduction=None,label=None):
    grid = {}
    if logger is not None:
        name = label if label is not None else str(columns)
        logger.info('ESPERIMENTO CON %s',name)
        
    dataset_values = dataset[columns].values
    result, grid = addestra(FuzzyInductor,dataset_values,y,model_selection_grid,num_fold_grid_search,num_fold_cross_val,dim_reduction=dim_reduction,scaling=scaling,logger = logger)
    
    if logger is not None:
        logger.info('Score: %s',str(result.mean()))
        logger.info("FINE ESPERIMENTO\n")
    
    return result.mean(), grid

In [7]:
def esperimento_registrato(dataset,columns,model_class, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = None, file_json = None, scaling=StandardScaler(), dim_reduction=None,label=None):
    grid = {}
    sd_index = label if label is not None else str(columns)
    if file_json is not None:
        with open(file_json, "r") as read_file:
            esperimenti = json.load(read_file)
    else:
        esperimenti = {}
    if sd_index not in esperimenti.keys():
        score, grid = esperimento(dataset,columns,model_class, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = logger, scaling=scaling, dim_reduction=dim_reduction,label=sd_index)
            
        esperimenti[sd_index] = grid
        esperimenti[sd_index]['score'] = score
        with open(file_json, "w") as write_file:
            json.dump(esperimenti, write_file)
    col = pd.DataFrame(index=esperimenti[sd_index].keys())
    col[sd_index] = esperimenti[sd_index].values()
    return col

In [8]:
sigmas = [.1,.255,.3,.4,.5,.6,.7,.8,.9]
c_space = np.logspace(-4,3,10)
params_grid = { 'c':c_space,
               'k':[GaussianKernel(sigma) for sigma in sigmas],
               'fuzzifier':[LinearFuzzifier,ExponentialFuzzifier, CrispFuzzifier, 
              QuantileConstantPiecewiseFuzzifier, QuantileLinearPiecewiseFuzzifier]
}
dataset_labels = dataset['Mezzo'].values

# Esperimenti

Esperimenti con i totali

In [9]:
columns_noPCA = [['Totale'],['Tot Testa','Tot Torace','Tot Addome','Tot Scheletro'],['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']]
labels = ['Totale','Totali distretti','Totali']

In [10]:
[esperimento_registrato(dataset,cs,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',label=l) for cs,l in zip(columns_noPCA,labels)]

[                                                      Totale
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.1)
 score                                                  -0.31,
                                             Totali distretti
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.7)
 score                                                  -0.30,
                                                       Totali
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.7)
 score                                                  -0.30]

Esperimenti con le lesioni

In [11]:
labels_lesioni = ['Lesioni 5 componenti','Lesioni 10 componenti','Lesioni 15 componenti','Lesioni 20 componenti']
columns_lesioni = list(dataset.columns[7:27])

In [12]:
for c,l in zip([5,10,15,20],labels_lesioni):
    dim_red = PCA(n_components=c)
    esperimento_registrato(dataset,columns_lesioni,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',dim_reduction=dim_red,label=l)

In [13]:
labels_totali_lesioni = ['LesioniTotali5','LesioniTotali10','LesioniTotali15','LesioniTotali20']
columns_totali_lesioni = columns_lesioni + ['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']

In [14]:
for c,l in zip([5,10,15,20],labels_totali_lesioni):
    dim_red = PCA(n_components=c)
    esperimento_registrato(dataset,columns_totali_lesioni,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',dim_reduction=dim_red,label=l)

Esperimenti con anagrafica

In [15]:
columns_anagrafica = list(dataset.columns[1:5])

In [16]:
columns_anagrafica_totale = columns_anagrafica + ['Totale']
columns_anagrafica_totali = columns_anagrafica + ['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']
columns_anagrafica_all = columns_anagrafica + columns_totali_lesioni

In [17]:
label_anagrafica_totale = 'Anagrafica_Totale'
label_anagrafica_totali = 'Anagrafica_Totali'
labels_anagrafica_all = ['Anagrafica_Lesioni_Totali5','Anagrafica_Lesioni_Totali10','Anagrafica_Lesioni_Totali15','Anagrafica_Lesioni_Totali20']

In [18]:
[esperimento_registrato(dataset,cs,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',label=l) for cs,l in zip([columns_anagrafica_totale,columns_anagrafica_totali],[label_anagrafica_totale,label_anagrafica_totali])]

[                                           Anagrafica_Totale
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.9)
 score                                                  -0.32,
                                            Anagrafica_Totali
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.QuantileConstant...
 k                                        GaussianKernel(0.1)
 score                                                  -0.33]

In [19]:
for c,l in zip([5,10,15,20],labels_anagrafica_all):
    dim_red = PCA(n_components=c)
    esperimento_registrato(dataset,columns_anagrafica_all,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',dim_reduction=dim_red,label=l)

Esperimenti con BMI

In [20]:
BMI = ['BMI']

In [21]:
columns_BMI_totale = columns_anagrafica_totale + BMI
columns_BMI_totali = columns_anagrafica_totali + BMI
columns_BMI_all = columns_anagrafica_all + BMI

In [22]:
label_BMI_totale = "BMI_Totale"
label_BMI_totali = "BMI_Totali"
labels_BMI_all=['BMI_Lesioni_Totali5','BMI_Lesioni_Totali10','BMI_Lesioni_Totali15','BMI_Lesioni_Totali20','BMI_Lesioni_Totali25','BMI_Lesioni_Totali30']

In [23]:
[esperimento_registrato(dataset,cs,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',label=l) for cs,l in zip([columns_BMI_totale,columns_BMI_totali],[label_BMI_totale,label_BMI_totali])]

[                                                BMI_Totale
 c                                     0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>
 k                                      GaussianKernel(0.9)
 score                                                -0.36,
                                                   BMI_Totali
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.1)
 score                                                  -0.46]

In [24]:
for c,l in zip([5,10,15,20,25,30],labels_BMI_all):
    dim_red = PCA(n_components=c)
    esperimento_registrato(dataset,columns_BMI_all,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',dim_reduction=dim_red,label=l)

Esperimenti con data

In [25]:
data = ['DATA']

In [26]:
columns_data_totale = columns_anagrafica_totale + data
columns_data_totali = columns_anagrafica_totali + data
columns_data_all = columns_anagrafica_all + data

In [27]:
label_data_totale = "Data_Totale"
label_data_totali = "Data_Totali"
labels_data_all=['Data_Lesioni_Totali5','Data_Lesioni_Totali10','Data_Lesioni_Totali15','Data_Lesioni_Totali20','Data_Lesioni_Totali25','Data_Lesioni_Totali30']

In [28]:
[esperimento_registrato(dataset,cs,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',label=l) for cs,l in zip([columns_data_totale,columns_data_totali],[label_data_totale,label_data_totali])]

[                                                 Data_Totale
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.9)
 score                                                  -0.38,
                                                  Data_Totali
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.QuantileLinearPi...
 k                                        GaussianKernel(0.9)
 score                                                  -0.45]

In [29]:
for c,l in zip([5,10,15,20,25,30],labels_data_all):
    dim_red = PCA(n_components=c)
    esperimento_registrato(dataset,columns_data_all,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',dim_reduction=dim_red,label=l)

esperimenti data e bmi

In [30]:
data_bmi = ['DATA','BMI']

In [31]:
columns_db_totale = columns_anagrafica_totale + BMI
columns_db_totali = columns_anagrafica_totali + BMI
columns_all = columns_anagrafica_all + BMI

In [32]:
label_db_totale = "DataBMI_Totale"
label_db_totali = "DataBMI_Totali"
labels_all=['All5','All10','All15','All20','All25','All30']

In [33]:
[esperimento_registrato(dataset,cs,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',label=l) for cs,l in zip([columns_db_totale,columns_db_totali],[label_db_totale,label_db_totali])]

[                                              DataBMI_Totale
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
 k                                        GaussianKernel(0.9)
 score                                                  -0.33,
                                               DataBMI_Totali
 c                                       0.021544346900318846
 fuzzifier  <class 'fuzzylearn.fuzzifiers.QuantileLinearPi...
 k                                        GaussianKernel(0.1)
 score                                                  -0.33]

In [34]:
for c,l in zip([5,10,15,20,25,30],labels_all):
    dim_red = PCA(n_components=c)
    esperimento_registrato(dataset,columns_all,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti.json',dim_reduction=dim_red,label=l)


In [35]:
risultati_gaussian = pd.read_json("esperimenti.json")
risultati_gaussian

Unnamed: 0,Totale,Totali distretti,Totali,Lesioni 5 componenti,Lesioni 10 componenti,Lesioni 15 componenti,Lesioni 20 componenti,LesioniTotali5,LesioniTotali10,LesioniTotali15,...,Data_Lesioni_Totali25,Data_Lesioni_Totali30,DataBMI_Totale,DataBMI_Totali,All5,All10,All15,All20,All25,All30
c,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,...,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846
fuzzifier,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...
k,GaussianKernel(0.1),GaussianKernel(0.7),GaussianKernel(0.7),GaussianKernel(0.9),GaussianKernel(0.9),GaussianKernel(0.5),GaussianKernel(0.3),GaussianKernel(0.9),GaussianKernel(0.9),GaussianKernel(0.3),...,GaussianKernel(0.6),GaussianKernel(0.6),GaussianKernel(0.9),GaussianKernel(0.1),GaussianKernel(0.255),GaussianKernel(0.5),GaussianKernel(0.8),GaussianKernel(0.4),GaussianKernel(0.4),GaussianKernel(0.4)
score,-0.31,-0.30,-0.30,-0.31,-0.47,-0.46,-0.32,-0.33,-0.33,-0.38,...,-0.45,-0.38,-0.33,-0.33,-0.37,-0.44,-0.44,-0.32,-0.33,-0.33


### Proviamo ad aggiungere Kernel diversi da quello Gaussiano

Eseguiamo solo gli esperimenti che ci hanno dato i risultati migliori

In [36]:
degrees = list(range(1,11))
offsets = list(range(0,6))
params_grid_kernels = { 'c':c_space,
               'k': [LinearKernel()] + [PolynomialKernel(d) for d in degrees] + 
                    [HomogeneousPolynomialKernel(d) for d in degrees] + [HyperbolicKernel(1,o) for o in offsets],
               'fuzzifier':[LinearFuzzifier,ExponentialFuzzifier, CrispFuzzifier, 
              QuantileConstantPiecewiseFuzzifier, QuantileLinearPiecewiseFuzzifier]
}

In [37]:
for c,l in zip([columns_BMI_all,columns_all,columns_lesioni,columns_totali_lesioni],['BMI_Lesioni_Totali20K','All20K','Lesioni20K','LesioniTotali20K']):
    dim_red = PCA(n_components=20)
    esperimento_registrato(dataset,c,FuzzyInductor,dataset_labels,params_grid_kernels,3,3,logger = f_logger,file_json = 'esperimenti_kernels.json',dim_reduction=dim_red,label=l)

In [38]:
for c,l in zip([['Totale'],['Tot Testa','Tot Torace','Tot Addome','Tot Scheletro'],['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']],['TotaleK','TotaliDistrettiK','TotaliK']):
    esperimento_registrato(dataset,c,FuzzyInductor,dataset_labels,params_grid_kernels,3,3,logger = f_logger,file_json = 'esperimenti_kernels.json',label=l)

In [39]:
risultati_other_kernels = pd.read_json("esperimenti_kernels.json")
risultati_other_kernels

Unnamed: 0,TotaleK,TotaliK,TotaliDistrettiK,LesioniTotali20K,Lesioni20K,All20K,BMI_Lesioni_Totali20K
c,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846
fuzzifier,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>
k,HomogeneousPolynomialKernel(4),"HyperbolicKernel(1, 1)","HyperbolicKernel(1, 1)","HyperbolicKernel(1, 0)","HyperbolicKernel(1, 0)","HyperbolicKernel(1, 0)","HyperbolicKernel(1, 0)"
score,-0.31,-0.31,-0.32,-0.29,-0.29,-0.29,-0.29


### Proviamo T-SNE

In [40]:
from sklearn.manifold import TSNE

In [41]:
"""
with open('esperimenti_tsne.json',"w") as write_file:
    json.dump({},write_file)
"""

'\nwith open(\'esperimenti_tsne.json\',"w") as write_file:\n    json.dump({},write_file)\n'

Prova tsne con la riduzione a 5,10,15,20,25,30 componenti sul miglior sottodataset degli esperimenti gaussiani

In [42]:
best_exp=[col for col in risultati_gaussian.columns if risultati_gaussian[col]['score'] == risultati_gaussian.loc['score'].max()]
best_exp

['BMI_Lesioni_Totali20']

In [43]:
for c,l in zip([5,10,15,20,25,30],list(map(lambda x: str(x)+"tsne",labels_BMI_all))):
    dim_red = TSNE(n_components=c,method='exact')
    esperimento_registrato(dataset,columns_BMI_all,FuzzyInductor,dataset_labels,params_grid,3,3,logger = f_logger,file_json = 'esperimenti_tsne.json',dim_reduction=dim_red,label=l)

In [44]:
risultati_tsne = pd.read_json('esperimenti_tsne.json')
risultati_tsne

Unnamed: 0,BMI_Lesioni_Totali5tsne,BMI_Lesioni_Totali10tsne,BMI_Lesioni_Totali15tsne,BMI_Lesioni_Totali20tsne,BMI_Lesioni_Totali25tsne,BMI_Lesioni_Totali30tsne
c,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846
fuzzifier,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...
k,GaussianKernel(0.1),GaussianKernel(0.1),GaussianKernel(0.1),GaussianKernel(0.1),GaussianKernel(0.1),GaussianKernel(0.9)
score,-0.33,-0.33,-0.33,-0.33,-0.33,-0.46


Compariamoli con i risultati con kernel gaussian della PCA

In [45]:
risultati_gaussian[labels_BMI_all]

Unnamed: 0,BMI_Lesioni_Totali5,BMI_Lesioni_Totali10,BMI_Lesioni_Totali15,BMI_Lesioni_Totali20,BMI_Lesioni_Totali25,BMI_Lesioni_Totali30
c,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846
fuzzifier,<class 'fuzzylearn.fuzzifiers.QuantileConstant...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...,<class 'fuzzylearn.fuzzifiers.QuantileLinearPi...
k,GaussianKernel(0.255),GaussianKernel(0.5),GaussianKernel(0.8),GaussianKernel(0.4),GaussianKernel(0.4),GaussianKernel(0.4)
score,-0.38,-0.44,-0.44,-0.29,-0.33,-0.33


## Classe 0

In [46]:
# create logger
f0_logger = logging.getLogger(__name__)
f0_logger.setLevel(logging.INFO)

# create console handler and set level to debug
file0 = logging.FileHandler('esperimenti0.log')
file0.setLevel(logging.INFO)

# create formatter
formatter = logging.Formatter('%(asctime)s -%(message)s')

# add formatter to ch
file0.setFormatter(formatter)

# add ch to logger
f0_logger.addHandler(file0)

In [47]:
params_grid_0 = { 'c':c_space,
               'k': [LinearKernel()] + [PolynomialKernel(d) for d in degrees] + 
                    [HomogeneousPolynomialKernel(d) for d in degrees] + [HyperbolicKernel(1,o) for o in offsets] + 
                    [GaussianKernel(sigma) for sigma in sigmas],
                'fuzzifier': [LinearFuzzifier,ExponentialFuzzifier, CrispFuzzifier, 
              QuantileConstantPiecewiseFuzzifier, QuantileLinearPiecewiseFuzzifier]
}

In [48]:
mu0 = list(map(lambda x: 0 if x else 1,dataset_labels))

Effettuiamo solo gli esperimenti più ragionevoli con l'altra classe.

In [49]:
risultati = risultati_gaussian.join(risultati_other_kernels)
[col for col in risultati.columns if risultati[col]['score']>= -0.33]

['Totale',
 'Totali distretti',
 'Totali',
 'Lesioni 5 componenti',
 'Lesioni 20 componenti',
 'Anagrafica_Totale',
 'BMI_Lesioni_Totali20',
 'All20',
 'TotaleK',
 'TotaliK',
 'TotaliDistrettiK',
 'LesioniTotali20K',
 'Lesioni20K',
 'All20K',
 'BMI_Lesioni_Totali20K']

In [50]:
for c,l in zip([columns_BMI_all,columns_all,columns_lesioni,columns_totali_lesioni],['BMI_Lesioni_Totali20_0','All20_0','Lesioni20_0','LesioniTotali20_0']):
    dim_red = PCA(n_components=20)
    esperimento_registrato(dataset,c,FuzzyInductor,mu0,params_grid_0,3,3,logger = f0_logger,file_json = 'esperimenti0.json',dim_reduction=dim_red,label=l)

In [51]:
for c,l in zip([['Totale'],['Tot Testa','Tot Torace','Tot Addome','Tot Scheletro'],['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']],['Totale_0','TotaliDistretti_0','Totali_0']):
    esperimento_registrato(dataset,c,FuzzyInductor,mu0,params_grid_0,3,3,logger = f0_logger,file_json = 'esperimenti0.json',label=l)

In [52]:
risultati0 = pd.read_json("esperimenti0.json")
risultati0

Unnamed: 0,BMI_Lesioni_Totali5_0,BMI_Lesioni_Totali10_0,BMI_Lesioni_Totali15_0,BMI_Lesioni_Totali20_0,Anagrafica_Totale_0,Lesioni5_0,Lesioni10_0,Lesioni15_0,Lesioni20_0,Totale_0,TotaliDistretti_0,Totali_0,All5_0,All10_0,All15_0,All20_0,LesioniTotali5_0,LesioniTotali10_0,LesioniTotali15_0,LesioniTotali20_0
c,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846,0.021544346900318846
fuzzifier,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.ExponentialFuzzi...,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>
k,"HyperbolicKernel(1, 2)","HyperbolicKernel(1, 2)",LinearKernel(),LinearKernel(),"HyperbolicKernel(1, 0)","HyperbolicKernel(1, 1)",LinearKernel(),LinearKernel(),LinearKernel(),GaussianKernel(0.9),"HyperbolicKernel(1, 1)","HyperbolicKernel(1, 1)","HyperbolicKernel(1, 2)","HyperbolicKernel(1, 2)",LinearKernel(),LinearKernel(),"HyperbolicKernel(1, 1)","HyperbolicKernel(1, 2)",LinearKernel(),LinearKernel()
score,-0.25,-0.26,-0.26,-0.25,-0.28,-0.25,-0.25,-0.25,-0.24,-0.27,-0.26,-0.25,-0.25,-0.26,-0.26,-0.25,-0.25,-0.26,-0.25,-0.25


#### Proviamo scaler diversi sul subdataset che ha dato i risultati migliori

In [53]:
best_exp = [col for col in risultati.columns if risultati[col]['score'] == risultati.loc['score'].max()]
best_exp0 = [col for col in risultati0.columns if risultati0[col]['score'] == risultati0.loc['score'].max()]
best_exp,best_exp0

(['Lesioni20K'], ['Lesioni20_0'])

In [54]:
dim_red=PCA(n_components=20)
scalers = [StandardScaler(),MinMaxScaler(), MaxAbsScaler(), QuantileTransformer(output_distribution='normal'), 
           QuantileTransformer(output_distribution='uniform'), Normalizer(),
           RobustScaler(quantile_range=(25, 75)), PowerTransformer(method='yeo-johnson')]

In [55]:
"""
scalers_table_1 = {}
scalers_table_0 = {}

"""

'\nscalers_table_1 = {}\nscalers_table_0 = {}\n\n'

In [56]:
X = dataset[columns_lesioni].values

In [57]:
best_params_esp1 = {
    'c': [0.021544346900318846],
    'k': [HyperbolicKernel(1,0)],
    'fuzzifier': [ExponentialFuzzifier]
}

In [58]:
best_params_esp0 = {
    'c': [0.021544346900318846],
    'k': [LinearKernel()],
    'fuzzifier': [LinearFuzzifier]
}

In [59]:
"""
for scaler in scalers:
    scores, _ = addestra(FuzzyInductor, X, dataset_labels, best_params_esp1, 3, 3, logger = f_logger, scaling=scaler, dim_reduction=dim_red)
    scalers_table_1[str(scaler)] = scores.mean()
"""

'\nfor scaler in scalers:\n    scores, _ = addestra(FuzzyInductor, X, dataset_labels, best_params_esp1, 3, 3, logger = f_logger, scaling=scaler, dim_reduction=dim_red)\n    scalers_table_1[str(scaler)] = scores.mean()\n'

In [60]:
"""
for scaler in scalers:
    scores , _ = addestra(FuzzyInductor, X, mu0, best_params_esp0, 3, 3, logger = f0_logger, scaling=scaler, dim_reduction=dim_red)
    scalers_table_0[str(scaler)] = scores.mean()
"""

'\nfor scaler in scalers:\n    scores , _ = addestra(FuzzyInductor, X, mu0, best_params_esp0, 3, 3, logger = f0_logger, scaling=scaler, dim_reduction=dim_red)\n    scalers_table_0[str(scaler)] = scores.mean()\n'

In [61]:
#scalers_table_best1 = pd.DataFrame.from_dict(scalers_table_1,orient='index')

In [62]:
#scalers_table_best0 = pd.DataFrame.from_dict(scalers_table_0,orient='index')

In [63]:
#scalers_table_best1 = scalers_table_best1.rename(columns={0:best_exp[0]})

In [64]:
#scalers_table_best0 = scalers_table_best0.rename(columns={0:best_exp0[0]})

In [65]:
#scalers_table = scalers_table_best1.join(scalers_table_best0)
#scalers_table.to_json("scalers.json")

In [66]:
scalers_table = pd.read_json("scalers.json")
scalers_table

Unnamed: 0,Lesioni20K,Lesioni20_0
"StandardScaler(copy=True, with_mean=True, with_std=True)",-0.29,-0.24
"MinMaxScaler(copy=True, feature_range=(0, 1))",-0.32,-0.24
MaxAbsScaler(copy=True),-0.32,-0.24
"QuantileTransformer(copy=True, ignore_implicit_zeros=False, n_quantiles=1000,\n output_distribution='normal', random_state=None,\n subsample=100000)",-0.28,-0.24
"QuantileTransformer(copy=True, ignore_implicit_zeros=False, n_quantiles=1000,\n output_distribution='uniform', random_state=None,\n subsample=100000)",-0.29,-0.25
"Normalizer(copy=True, norm='l2')",-0.54,-0.34
"RobustScaler(copy=True, quantile_range=(25, 75), with_centering=True,\n with_scaling=True)",-0.29,-0.24
"PowerTransformer(copy=True, method='yeo-johnson', standardize=True)",-0.28,-0.26


# Fuzzifier table

In [67]:
def fuzzifier_table(dataset,fuzzifiers,columns,model_class, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = None, scaling=StandardScaler(), dim_reduction=None,label=None):
    sd_index = label if label is not None else str(columns)
    table = pd.DataFrame({sd_index:[None for f in fuzzifiers]},index=[fuzzifiers_to_string[f] for f in fuzzifiers])

    X = dataset[columns].values
    
    X_std = scaling.fit_transform(X)  if scaling is not None else X
    
    X_std = dim_reduction.fit_transform(X_std) if dim_reduction is not None else X_std
    
    clf = GridSearchCV(estimator=model_class(), param_grid=model_selection_grid, cv=num_fold_grid_search, n_jobs=-1)
    clf.fit(X_std,y)
    
    bs_grid = clf.best_params_
    
    for i,j in bs_grid.items():
        bs_grid[i] = [j]
    
    for fuzzifier in fuzzifiers:
        params_fuzzifier = bs_grid
        params_fuzzifier['fuzzifier'] = [fuzzifier]
        score, _ = esperimento(dataset,columns,model_class, y, params_fuzzifier, num_fold_grid_search, num_fold_cross_val, logger = logger, scaling=scaling, dim_reduction=dim_reduction,label=sd_index)
        table.loc[fuzzifiers_to_string[fuzzifier]][sd_index] = score
    return table

In [68]:
def incidenti_fuzzifier_table(dataset,subdatasets,fuzzifiers,model_class, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = None, scaling=StandardScaler(), dim_reduction=None, file_json=None,labels=None):
    table = pd.read_json(file_json) if file_json is not None else pd.DataFrame(index=[fuzzifiers_to_string[f] for f in fuzzifiers])
    if len(table.index) == 0:
        table = pd.DataFrame(index=[fuzzifiers_to_string[f] for f in fuzzifiers])
    df_cols = labels if labels is not None else subdatasets
    for col,sd in zip(df_cols,subdatasets):
        if str(col) not in table.columns:
            table[str(col)] = fuzzifier_table(dataset,fuzzifiers,sd,model_class, y, model_selection_grid, num_fold_grid_search, num_fold_cross_val, logger = logger, scaling = scaling,dim_reduction=dim_reduction,label=str(col))
            if file_json is not None:
                 with open(file_json, "w") as write_file:
                    json.dump(table.to_dict(), write_file)
    return table

In [69]:
fuzzifiers_class = [LinearFuzzifier,ExponentialFuzzifier, CrispFuzzifier, QuantileConstantPiecewiseFuzzifier, QuantileLinearPiecewiseFuzzifier]
fuzzifiers_to_string = {LinearFuzzifier: "LinearFuzzifier",
                        ExponentialFuzzifier:"ExponentialFuzzifier",
                        CrispFuzzifier: "CrispFuzzifier",
                        QuantileConstantPiecewiseFuzzifier: " QuantileConstantPiecewiseFuzzifier",
                        QuantileLinearPiecewiseFuzzifier: "QuantileLinearPiecewiseFuzzifier"}

params_grid_noFuzzifier = {'c':c_space,
       'k':[GaussianKernel(sigma) for sigma in sigmas],   
}

Totali

In [70]:
ft = incidenti_fuzzifier_table(dataset,columns_noPCA,fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,file_json='fuzzifiers.json',labels=labels)

In [71]:
ft.to_json('fuzzifiers.json')

Lesioni

In [72]:
for i,l in zip([5,10,15,20],labels_lesioni):
    dim_red=PCA(n_components=i)
    t = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

In [73]:
for i,l in zip([5,10,15,20],labels_totali_lesioni):
    dim_red=PCA(n_components=i)
    t = incidenti_fuzzifier_table(dataset,[columns_totali_lesioni],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

Anagrafica

In [74]:
for cs,l in zip([columns_anagrafica_totale,columns_anagrafica_totali],[label_anagrafica_totale,label_anagrafica_totali]):
    t = incidenti_fuzzifier_table(dataset,[cs],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

In [75]:
for i,l in zip([5,10,15,20],labels_anagrafica_all):
    dim_red=PCA(n_components=i)
    t = incidenti_fuzzifier_table(dataset,[columns_anagrafica_all],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

BMI

In [76]:
for cs,l in zip([columns_BMI_totale,columns_BMI_totali],[label_BMI_totale,label_BMI_totali]):
    t = incidenti_fuzzifier_table(dataset,[cs],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

In [77]:
for i,l in zip([5,10,15,20,25,30],labels_BMI_all):
    dim_red=PCA(n_components=i)
    t = incidenti_fuzzifier_table(dataset,[columns_BMI_all],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

Data

In [78]:
for cs,l in zip([columns_data_totale,columns_data_totali],[label_data_totale,label_data_totali]):
    t = incidenti_fuzzifier_table(dataset,[cs],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

In [79]:
for i,l in zip([5,10,15,20,25,30],labels_data_all):
    dim_red=PCA(n_components=i)
    t = incidenti_fuzzifier_table(dataset,[columns_data_all],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

All

In [80]:
for cs,l in zip([columns_db_totale,columns_db_totali],[label_db_totale,label_db_totali]):
    t = incidenti_fuzzifier_table(dataset,[cs],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

In [81]:
for i,l in zip([5,10,15,20,25,30],labels_all):
    dim_red=PCA(n_components=i)
    t = incidenti_fuzzifier_table(dataset,[columns_all],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers.json',labels=[l])
    t.to_json('fuzzifiers.json')

In [82]:
t['Anagrafica_Lesioni_Totali20']

 QuantileConstantPiecewiseFuzzifier   -0.38
CrispFuzzifier                        -0.46
ExponentialFuzzifier                  -0.46
LinearFuzzifier                       -0.46
QuantileLinearPiecewiseFuzzifier      -0.37
Name: Anagrafica_Lesioni_Totali20, dtype: float64

In [83]:
t

Unnamed: 0,Totale,Totali distretti,Totali,Lesioni 5 componenti,Lesioni 10 componenti,Lesioni 15 componenti,Lesioni 20 componenti,LesioniTotali5,LesioniTotali10,LesioniTotali15,...,All5,All10,All15,All20,All25,All30,PesoLesioni20,AltezzaLesioni20,BMILesioniNokgh20,TotaliDistretti_MSOrdini
QuantileConstantPiecewiseFuzzifier,-0.53,-0.54,-0.53,-0.57,-0.51,-0.45,-0.32,-0.53,-0.45,-0.45,...,-0.38,-0.44,-0.45,-0.32,-0.33,-0.33,-0.37,-0.35,-0.37,-0.49
CrispFuzzifier,-0.55,-0.61,-0.6,-0.62,-0.63,-0.48,-0.46,-0.65,-0.5,-0.48,...,-0.46,-0.46,-0.46,-0.46,-0.46,-0.46,-0.45,-0.41,-0.41,-0.54
ExponentialFuzzifier,-0.31,-0.3,-0.3,-0.31,-0.42,-0.46,-0.46,-0.33,-0.45,-0.46,...,-0.46,-0.46,-0.46,-0.46,-0.46,-0.46,-0.29,-0.29,-0.29,-0.31
LinearFuzzifier,-0.31,-0.31,-0.31,-0.32,-0.57,-0.45,-0.46,-0.36,-0.46,-0.45,...,-0.46,-0.46,-0.46,-0.46,-0.46,-0.46,-0.29,-0.29,-0.29,-0.32
QuantileLinearPiecewiseFuzzifier,-0.53,-0.58,-0.56,-0.57,-0.53,-0.46,-0.32,-0.55,-0.46,-0.45,...,-0.37,-0.44,-0.44,-0.32,-0.33,-0.33,-0.37,-0.32,-0.35,-0.48


Altri kernel

In [84]:
params_grid_kernels_noFuzzifier = { 'c':c_space,
               'k': [LinearKernel()] + [PolynomialKernel(d) for d in degrees] + 
                    [HomogeneousPolynomialKernel(d) for d in degrees] + [HyperbolicKernel(1,o) for o in offsets],
}

In [85]:
for c,l in zip([['Totale'],['Tot Testa','Tot Torace','Tot Addome','Tot Scheletro'],['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']],['TotaleK','TotaliDistrettiK','TotaliK']):
    tk= incidenti_fuzzifier_table(dataset,[c],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_kernels_noFuzzifier,3,3,logger=f_logger,file_json='fuzzifiers_kernels.json',labels=[l])
    tk.to_json('fuzzifiers_kernels.json')

In [86]:
for c,l in zip([columns_BMI_all,columns_all,columns_lesioni,columns_totali_lesioni],['BMI_Lesioni_Totali20K','All20K','Lesioni20K','LesioniTotali20K']):
    dim_red = PCA(n_components=20)
    tk= incidenti_fuzzifier_table(dataset,[c],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_kernels_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers_kernels.json',labels=[l])
    tk.to_json('fuzzifiers_kernels.json')

In [87]:
tk = pd.read_json("fuzzifiers_kernels.json")
tk

Unnamed: 0,TotaleK,BMI_Lesioni_Totali20K,All20K,Lesioni20K,LesioniTotali20K,TotaliDistrettiK,TotaliK
QuantileConstantPiecewiseFuzzifier,-0.56,-0.43,-0.44,-0.35,-0.41,-0.49,-0.46
CrispFuzzifier,-0.43,-0.43,-0.43,-0.43,-0.45,-0.54,-0.49
ExponentialFuzzifier,-0.31,-0.29,-0.29,-0.29,-0.29,-0.31,-0.31
LinearFuzzifier,-0.58,-0.29,-0.29,-0.29,-0.29,-0.32,-0.31
QuantileLinearPiecewiseFuzzifier,-0.56,-0.42,-0.41,-0.36,-0.39,-0.49,-0.46


t-SNE

In [88]:
labels_BMI_all_tsne = list(map(lambda x: str(x)+"tsne",labels_BMI_all))

for i,l in zip([5,10,15,20,25,30],labels_BMI_all_tsne):
    dim_red=TSNE(n_components=i,method='exact')
    t = incidenti_fuzzifier_table(dataset,[columns_BMI_all],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_noFuzzifier,3,3,logger=f_logger,dim_reduction=dim_red,file_json='fuzzifiers_tsne.json',labels=[l])
    t.to_json('fuzzifiers_tsne.json')

In [89]:
ftsample_tsne = pd.read_json('fuzzifiers_tsne.json')
ftsample_tsne

Unnamed: 0,BMI_Lesioni_Totali5tsne,BMI_Lesioni_Totali10tsne,BMI_Lesioni_Totali15tsne,BMI_Lesioni_Totali20tsne,BMI_Lesioni_Totali25tsne,BMI_Lesioni_Totali30tsne
QuantileConstantPiecewiseFuzzifier,-0.33,-0.33,-0.33,-0.33,-0.33,-0.46
CrispFuzzifier,-0.46,-0.46,-0.46,-0.46,-0.46,-0.46
ExponentialFuzzifier,-0.46,-0.46,-0.46,-0.46,-0.46,-0.46
LinearFuzzifier,-0.46,-0.46,-0.46,-0.46,-0.46,-0.46
QuantileLinearPiecewiseFuzzifier,-0.33,-0.33,-0.33,-0.33,-0.33,-0.46


Classe 0

In [90]:
mu0 = list(map(lambda x: 0 if x else 1,dataset_labels))

In [91]:
params_grid_0_noFuzzifier = params_grid_kernels_noFuzzifier = { 'c':c_space,
               'k': [LinearKernel()] + [PolynomialKernel(d) for d in degrees] + 
                    [HomogeneousPolynomialKernel(d) for d in degrees] + [HyperbolicKernel(1,o) for o in offsets] + 
                    [GaussianKernel(sigma) for sigma in sigmas]
}

In [92]:
"""
with open('fuzzifiers0.json',"w") as write_file:
    json.dump({},write_file)
"""

'\nwith open(\'fuzzifiers0.json\',"w") as write_file:\n    json.dump({},write_file)\n'

In [93]:
for c,l in zip([['Totale'],['Tot Testa','Tot Torace','Tot Addome','Tot Scheletro'],['Totale','Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']],['Totale_0','TotaliDistretti_0','Totali_0']):
    tk= incidenti_fuzzifier_table(dataset,[c],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,file_json='fuzzifiers0.json',labels=[l])
    tk.to_json('fuzzifiers0.json')

In [94]:
for c,l in zip([columns_BMI_all,columns_all,columns_lesioni,columns_totali_lesioni],['BMI_Lesioni_Totali20_0','All20_0','Lesioni20_0','LesioniTotali20_0']):
    dim_red = PCA(n_components=20)
    t0= incidenti_fuzzifier_table(dataset,[c],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,dim_reduction=dim_red,file_json='fuzzifiers0.json',labels=[l])
    t0.to_json('fuzzifiers0.json')

In [95]:
dim_red = PCA(n_components=5)
l='Lesioni5_0'
t0= incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,dim_reduction=dim_red,file_json='fuzzifiers0.json',labels=[l])
t0.to_json('fuzzifiers0.json')

In [96]:
l='Anagrafica_Totale_0'
t0= incidenti_fuzzifier_table(dataset,[columns_anagrafica_totale],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,file_json='fuzzifiers0.json',labels=[l])
t0.to_json('fuzzifiers0.json')

In [97]:
for c,l in zip([10,15],['Lesioni10_0','Lesioni15_0']):
    dim_red=PCA(n_components=c)
    t0 = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,dim_reduction=dim_red,file_json='fuzzifiers0.json',labels=[l])
    t0.to_json('fuzzifiers0.json')

In [98]:
for c,l in zip([5,10,15],['All5_0','All10_0','All15_0']):
    dim_red=PCA(n_components=c)
    t0 = incidenti_fuzzifier_table(dataset,[columns_all],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,dim_reduction=dim_red,file_json='fuzzifiers0.json',labels=[l])
    t0.to_json('fuzzifiers0.json')

In [99]:
for c,l in zip([5,10,15],['LesioniTotali5_0','LesioniTotali10_0','LesioniTotali15_0']):
    dim_red=PCA(n_components=c)
    t0 = incidenti_fuzzifier_table(dataset,[columns_totali_lesioni],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,dim_reduction=dim_red,file_json='fuzzifiers0.json',labels=[l])
    t0.to_json('fuzzifiers0.json')

In [100]:
for c,l in zip([5,10,15],['BMI_Lesioni_Totali5_0','BMI_Lesioni_Totali110_0','BMI_Lesioni_Totali15_0']):
    dim_red=PCA(n_components=c)
    t0 = incidenti_fuzzifier_table(dataset,[columns_BMI_all],fuzzifiers_class,FuzzyInductor,mu0,params_grid_0_noFuzzifier,3,3,logger=f0_logger,dim_reduction=dim_red,file_json='fuzzifiers0.json',labels=[l])
    t0.to_json('fuzzifiers0.json')

In [101]:
ft0 = pd.read_json("fuzzifiers0.json")
ft0

Unnamed: 0,BMI_Lesioni_Totali20_0,Totale_0,TotaliDistretti_0,Totali_0,All20_0,Lesioni20_0,LesioniTotali20_0,Lesioni5_0,Anagrafica_Totale_0,Lesioni10_0,...,LesioniTotali5_0,LesioniTotali10_0,LesioniTotali15_0,BMI_Lesioni_Totali5_0,BMI_Lesioni_Totali110_0,BMI_Lesioni_Totali15_0,PesoLesioni20_0,AltezzaLesioni20_0,BMILesioniNokgh20_0,TotaliDistretti_MSOrdine_0
QuantileConstantPiecewiseFuzzifier,-0.38,-0.31,-0.29,-0.29,-0.38,-0.36,-0.34,-0.31,-0.29,-0.35,...,-0.28,-0.32,-0.34,-0.32,-0.34,-0.38,-0.37,-0.37,-0.32,-0.29
CrispFuzzifier,-0.54,-0.38,-0.33,-0.31,-0.54,-0.54,-0.54,-0.31,-0.34,-0.53,...,-0.32,-0.31,-0.54,-0.31,-0.32,-0.54,-0.54,-0.54,-0.34,-0.33
ExponentialFuzzifier,-0.26,-0.27,-0.26,-0.25,-0.26,-0.25,-0.25,-0.26,-0.28,-0.25,...,-0.25,-0.26,-0.25,-0.26,-0.26,-0.26,-0.25,-0.26,-0.25,-0.26
LinearFuzzifier,-0.25,-0.27,-0.26,-0.26,-0.25,-0.24,-0.25,-0.25,-0.28,-0.26,...,-0.25,-0.26,-0.25,-0.25,-0.26,-0.27,-0.25,-0.25,-0.25,-0.26
QuantileLinearPiecewiseFuzzifier,-0.38,-0.32,-0.3,-0.3,-0.38,-0.36,-0.34,-0.3,-0.3,-0.35,...,-0.28,-0.31,-0.34,-0.3,-0.32,-0.38,-0.37,-0.37,-0.33,-0.3


### Introduciamo generator diversi

In [102]:
sigma_space = [i for i in range (1,11)]

params_grid_generator = {
    'c': c_space,
    'k': [GaussianKernel(sigma) for sigma in sigma_space] + [LinearKernel()] + [PolynomialKernel(d) for d in degrees] + 
         [HomogeneousPolynomialKernel(d) for d in degrees] + [HyperbolicKernel(1,o) for o in offsets] + [GaussianKernel(sigma) for sigma in sigmas],             
    'sample_generator': [None, lambda x: -8+x*16, lambda x: -16+x*32]
}
l = 'Lesioni20Gen'

In [103]:
#tgen = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_generator,3,3,logger=f_logger,dim_reduction=PCA(n_components=20),labels=[l])

In [104]:
pd.read_json("sample_generators_example.json")

Unnamed: 0,Lesioni20Gen
QuantileConstantPiecewiseFuzzifier,-0.36
CrispFuzzifier,-0.43
ExponentialFuzzifier,-0.29
LinearFuzzifier,-0.29
QuantileLinearPiecewiseFuzzifier,-0.36


Generatore migliore è l'identità

### MODEL SELECTION MIRATA

In [105]:
ordini_grandezza=[.01,.1,1,10,100,1000]
params_grid_ordini_grandezza = {
    'c': ordini_grandezza,
    'k': [GaussianKernel(sigma) for sigma in ordini_grandezza] + [LinearKernel()]
}

In [106]:
#l = 'Lesioni20OrdineGrandezza'
#ftog = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_ordini_grandezza,3,3,logger=f_logger,dim_reduction=PCA(n_components=20),labels=[l])
#ftog

In [107]:
#l = 'Lesioni20OrdineGrandezza_0'
#ftog0 = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,mu0,params_grid_ordini_grandezza,3,3,logger=f0_logger,dim_reduction=PCA(n_components=20),labels=[l])
#ftog0

#### Dai log apprendo che l'ordine di grandezza sia per c che per sigma è .1

In [108]:
valori_mirati = np.arange(.1,1,.1)
params_grid_mirata = {
    'c': valori_mirati,
    'k': [GaussianKernel(sigma) for sigma in valori_mirati] + [LinearKernel()]
}

In [109]:
#l = 'Lesioni20Grandezza.1'
#ft01 = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,dataset_labels,params_grid_mirata,3,3,logger=f_logger,dim_reduction=PCA(n_components=20),labels=[l])
#ft01

In [110]:
#l = 'Lesioni20Grandezza.1_0'
#ft01_0 = incidenti_fuzzifier_table(dataset,[columns_lesioni],fuzzifiers_class,FuzzyInductor,mu0,params_grid_mirata,3,3,logger=f0_logger,dim_reduction=PCA(n_components=20),labels=[l])
#ft01_0

#### Riconcentriamoci su totali distretti

In [133]:
totali_distretti = ['Tot Testa','Tot Torace','Tot Addome','Tot Scheletro']
X = dataset[totali_distretti].values
y = dataset['Mezzo'].values

In [134]:
ordini = [.001,.1,1,10,100,1000]
pg_ordini = {
    'c' : ordini,
    'k' : [LinearKernel()]+\
        [GaussianKernel(sigma) for sigma in ordini]+\
        [HyperbolicKernel(1,offset) for offset in ordini]
}

In [135]:
l = 'TotaliDistretti_MSOrdini'
ft = incidenti_fuzzifier_table(dataset,[totali_distretti],fuzzifiers_class,FuzzyInductor,\
                                dataset_labels,pg_ordini,3,3,logger=f_logger,\
                                file_json='fuzzifiers.json',labels=[l])
ft.to_json('fuzzifiers.json')

In [136]:
l= 'TotaliDistretti_MSOrdine_0'

In [137]:
ftb0 = incidenti_fuzzifier_table(dataset,[totali_distretti],fuzzifiers_class,FuzzyInductor,mu0,pg_ordini,3,3,\
                                 logger=f0_logger,file_json='fuzzifiers0.json',labels=[l])
ftb0.to_json('fuzzifiers0.json')