In [1]:
import pandas as pd
import matplotlib.pyplot as plt
pd.options.display.float_format = "{:,.2f}".format

import json
import logging

import sys

sys.path.insert(1, '../fuzzylearn/')

from fuzzylearn import *
from fuzzylearn.fuzzifiers import LinearFuzzifier, CrispFuzzifier,ExponentialFuzzifier,QuantileLinearPiecewiseFuzzifier, QuantileConstantPiecewiseFuzzifier
from fuzzylearn.kernel import GaussianKernel, LinearKernel, HyperbolicKernel, PolynomialKernel, HomogeneousPolynomialKernel
from fuzzylearn import solve_optimization_gurobi

from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, QuantileTransformer, RobustScaler, PowerTransformer, Normalizer
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, cross_val_score

from sklearn.manifold import TSNE

In [2]:
dfo = pd.read_excel("dataset/DATABASE_UNITO_modificato.xlsx")
dfo = dfo.set_index("VERBALE")

In [3]:
import datetime as dt

dfo.DATA = dfo.DATA.apply(lambda d: (d - dt.datetime(1970,1,1)).days)

dfo.head()

Unnamed: 0_level_0,DATA,SESSO,ANNI,PESO,ALTEZZA,BMI,Mezzo,Testa:Neurocranio,Testa:Splancnocranio,Testa:Telencefalo,...,II raggio sx.1,III raggio sx.1,IV raggio sx.1,V raggio sx.1,Art. coxo-femorale dx,Art. coxo-femorale sx,Rotula o Ginocchio dx,Rotula o Ginocchio sx,Caviglia dx,Caviglia sx
VERBALE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85567,10893,0,81,84.0,1.75,27.43,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
85829,10970,1,69,69.0,1.62,26.29,1,4,4,4,...,0,0,0,0,0,0,0,0,0,0
85977,11026,1,71,67.0,1.55,27.89,1,2,0,1,...,0,0,0,0,0,0,0,0,0,0
86220,11122,1,54,60.0,1.59,23.73,1,4,0,0,...,0,0,0,0,0,0,0,0,0,0
86247,11130,1,78,69.0,1.67,24.74,1,2,0,0,...,0,0,0,0,0,0,0,0,0,0


# Dataset con PCA di gruppi sensati

## Estrazione valori

In [43]:
def get_reduction_columns(df,columns,ratio=.95,dim_reduction=PCA):
    values = df[columns].values
    for c in range(2,min(len(df),len(columns))+1):
        dim_red = dim_reduction(n_components=c)
        values_d = dim_red.fit_transform(values)
        if sum(dim_red.explained_variance_ratio_) >= ratio : break
    return values_d

### Cranio

In [44]:
columns_cranio = list(dfo.columns[32:50])

In [45]:
values_cranio = get_reduction_columns(dfo,columns_cranio)

### Cervicale

In [46]:
columns_cervicale = list(dfo.columns[55:62])

In [47]:
values_cervicale = get_reduction_columns(dfo,columns_cervicale)

### Toracico

In [48]:
columns_toracico = list(dfo.columns[62:74])

In [49]:
values_toracico = get_reduction_columns(dfo,columns_toracico)

### Lombare

In [50]:
columns_lombare = list(dfo.columns[74:79])

In [51]:
values_lombare = get_reduction_columns(dfo,columns_lombare)

### Gabbia Toracica

#### Emilato dx

In [52]:
columns_torace_emidx = list(filter(lambda x : 'costa' not in x.lower(),list(dfo.columns[85:183]))) 

In [53]:
values_torace_emidx = get_reduction_columns(dfo,columns_torace_emidx)

#### Emilato sx

In [54]:
columns_torace_emisx = list(filter(lambda x: 'costa' not in x.lower(),list(dfo.columns[183:281])))

In [55]:
values_torace_emisx = get_reduction_columns(dfo,columns_torace_emisx)

#### Linea mediana

In [56]:
columns_torace_mediana = list(dfo.columns[282:284])

In [57]:
values_torace_mediana = get_reduction_columns(dfo,columns_torace_mediana)

### Bacino

In [58]:
columns_bacino = list(dfo.columns[285:295])

In [59]:
values_bacino = get_reduction_columns(dfo,columns_bacino)

### Arti Superiori

In [60]:
columns_artisup = list(dfo.columns[296:330])

In [61]:
values_artisup = get_reduction_columns(dfo,columns_artisup)

### Arti inferiori

In [62]:
columns_artinf = list(dfo.columns[330:])

In [63]:
values_artinf = get_reduction_columns(dfo,columns_artinf)

## Costruzione dataset

In [64]:
def create_subdf(values,label):
    return pd.DataFrame(values,columns=[label+str(i) for i in range(0,len(values[0]))],index=dfo.index)

In [65]:
df_cranio = create_subdf(values_cranio,'cranio')

df_cervicale = create_subdf(values_cervicale,'rachide_cervicale')

df_toracico = create_subdf(values_toracico,'rachide_toracico')

df_lombare = create_subdf(values_lombare,'rachide_lombare')

df_gt_emidx = create_subdf(values_torace_emidx,'torace_emidx')

df_gt_emisx = create_subdf(values_torace_emisx,'torace_emisx')

df_gt_mediana = create_subdf(values_torace_mediana,'torace_mediana')

df_bacino = create_subdf(values_bacino,'bacino')

df_artsup = create_subdf(values_artisup,'arti_superiori')

df_artinf = create_subdf(values_artinf,'arti_inferiori')

In [66]:
df_pca = df_cranio.join(df_cervicale).join(df_toracico).join(df_lombare).join(df_gt_emidx).join(df_gt_emisx)\
         .join(df_gt_mediana).join(df_bacino).join(df_artsup).join(df_artinf)

In [67]:
len(dfo.columns), len(df_pca.columns)

(366, 105)

In [68]:
df_pca.to_csv('dataset_pca.csv')

## Esperimenti

In [4]:
from experiments import *

### Overfitting

In [70]:
f = FuzzyInductor()
f.fit(df_pca.values,dfo['Mezzo'].values)
score = f.score(df_pca.values,dfo['Mezzo'].values)

100%|██████████| 100/100 [00:51<00:00,  1.93it/s]


In [71]:
'{:.6f}'.format(score)

'-0.000024'

In [3]:
classes = (1,0)
index = df_pca.index
values = df_pca.values
labels = dfo['Mezzo'].values
mu0 = list(map(lambda x: 0 if x else 1,labels))

NameError: name 'df_pca' is not defined

### Proviamo i migliori classificatori ottenuti fino ad adesso

In [3]:
be1 = FuzzyInductor(fuzzifier=ExponentialFuzzifier,c=0.021544346900318846,k=HyperbolicKernel(1,0))
be0 = FuzzyInductor(fuzzifier=LinearFuzzifier,c=0.021544346900318846,k=LinearKernel())
bes = [be1,be0]

In [74]:
perf_train, perf_test =best_estimator_holdout(bes,index,values, labels,
                                              classes,0.7,classify,5)

100%|██████████| 100/100 [00:23<00:00,  4.27it/s]
100%|██████████| 100/100 [00:40<00:00,  2.47it/s]
100%|██████████| 100/100 [00:32<00:00,  3.10it/s]
100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
100%|██████████| 100/100 [00:29<00:00,  3.34it/s]
100%|██████████| 100/100 [00:26<00:00,  3.79it/s]
100%|██████████| 100/100 [00:28<00:00,  3.53it/s]
100%|██████████| 100/100 [00:32<00:00,  3.10it/s]
100%|██████████| 100/100 [00:24<00:00,  4.05it/s]
100%|██████████| 100/100 [00:22<00:00,  4.38it/s]


In [75]:
perf_train, perf_test

(0.28571428571428575, 0.3794871794871795)

### Model selection Ordini Grandezza

In [5]:
ordini = [.01,.1,1,10,100,1000]
params_grid = {
    'c' : ordini,
    'k' : [GaussianKernel(sigma) for sigma in ordini] + [LinearKernel()] + \
          [HyperbolicKernel(1,o) for o in ordini]
}

In [5]:
import logging

# create logger
f_logger = logging.getLogger(__name__)
f_logger.setLevel(logging.INFO)

# create console handler and set level to debug
file = logging.FileHandler('esperimenti_autoencoder.log')
file.setLevel(logging.INFO)

# create formatter
formatter = logging.Formatter('%(asctime)s -%(message)s')

# add formatter to ch
file.setFormatter(formatter)

# add ch to logger
f_logger.addHandler(file)

In [6]:
filejson = 'json_result/fuzzifiers_autoencoder.json'

In [6]:
l = 'df_pca_MS_OrdineGrandezza'

In [80]:
#ft = incidenti_fuzzifier_table(df_pca,[list(df_pca.columns)],fuzzifiers_class,FuzzyInductor,labels,params_grid, \
                               #3,3,logger=f_logger,scaling=None,labels=[l])

I migliori risultati arrivano con c dell'ordine .1 e parametro del kernel dell'ordine delle unità

### Model Selection fine

In [81]:
valori_offset = range(0,10)
c_space = np.arange(.1,1,.1)
pg_fine = {
    'c' : c_space,
    'k' : [HyperbolicKernel(1,o) for o in valori_offset]
}

In [82]:
l = 'df_pca_MS_fine'

In [83]:
#ft = incidenti_fuzzifier_table(df_pca,[list(df_pca.columns)],fuzzifiers_class,FuzzyInductor,labels,params_grid, \
                                #3,3,logger=f_logger,scaling=None,labels=[l])

## Classe 0

In [84]:
params_grid0 = {
    'c': [.1],
    'k': [GaussianKernel(sigma) for sigma in ordini] + [LinearKernel()] +\
        [HyperbolicKernel(1,o) for o in ordini] + [PolynomialKernel(d) for d in ordini if d not in [.01,.1]]
}

In [85]:
l = 'df_pca_MS_0'

In [86]:
ft = incidenti_fuzzifier_table(df_pca,[list(df_pca.columns)],fuzzifiers_class,FuzzyInductor,mu0,params_grid0, \
                               3,3,logger=f_logger,scaling=None,labels=[l],file_json=filejson)

In [87]:
ft

Unnamed: 0,df_autoencoder_MS,df_autoencoder_MSfine,df_autoencoder_MS_0,df_autoencoder_MSfine_0,df_pca_MS_0,df_pca_MSfine_0,TSNE2_MSOrdine,TSNE2_MSFine,TSNE2_MSOrdine_0
QuantileConstantPiecewiseFuzzifier,-0.38,-0.38,-0.31,-0.31,-0.3,-0.29,-0.43,-0.43,-0.36
CrispFuzzifier,-0.43,-0.43,-0.37,-0.37,-0.31,-0.31,-0.44,-0.44,-0.32
ExponentialFuzzifier,-0.29,-0.29,-0.26,-0.26,-0.23,-0.23,-0.29,-0.29,-0.28
LinearFuzzifier,-0.29,-0.29,-0.26,-0.26,-0.24,-0.24,-0.3,-0.3,-0.28
QuantileLinearPiecewiseFuzzifier,-0.38,-0.38,-0.31,-0.31,-0.33,-0.33,-0.44,-0.44,-0.35


In [88]:
params_grid_fine0 = {
    'c': [.1],
    'k': [HyperbolicKernel(1,o) for o in range(0,10,1)]
}
l = 'df_pca_MSfine_0'

In [89]:
ft = incidenti_fuzzifier_table(df_pca,[list(df_pca.columns)],fuzzifiers_class,FuzzyInductor,mu0,params_grid_fine0, \
                               3,3,logger=f_logger,scaling=None,labels=[l],file_json=filejson)

In [90]:
ft

Unnamed: 0,df_autoencoder_MS,df_autoencoder_MSfine,df_autoencoder_MS_0,df_autoencoder_MSfine_0,df_pca_MS_0,df_pca_MSfine_0,TSNE2_MSOrdine,TSNE2_MSFine,TSNE2_MSOrdine_0
QuantileConstantPiecewiseFuzzifier,-0.38,-0.38,-0.31,-0.31,-0.3,-0.29,-0.43,-0.43,-0.36
CrispFuzzifier,-0.43,-0.43,-0.37,-0.37,-0.31,-0.31,-0.44,-0.44,-0.32
ExponentialFuzzifier,-0.29,-0.29,-0.26,-0.26,-0.23,-0.23,-0.29,-0.29,-0.28
LinearFuzzifier,-0.29,-0.29,-0.26,-0.26,-0.24,-0.24,-0.3,-0.3,-0.28
QuantileLinearPiecewiseFuzzifier,-0.38,-0.38,-0.31,-0.31,-0.33,-0.33,-0.44,-0.44,-0.35


In [91]:
scalers = [StandardScaler(),MinMaxScaler(), MaxAbsScaler(), QuantileTransformer(output_distribution='normal'), 
           QuantileTransformer(output_distribution='uniform'), Normalizer(),
           RobustScaler(quantile_range=(25, 75)), PowerTransformer(method='yeo-johnson')]

In [92]:
grid = {
    'c': [.1],
    'k': [HyperbolicKernel(1,1)]
}

In [93]:
"""
scalers_exp, _ = [esperimento(df_pca,list(df_pca.columns),FuzzyInductor,labels,grid,3,3,\
                logger = f_logger,scaling=scaler,label=l)\
                for scaler,l in zip(scalers,[repr(s) for s in scalers])]
"""

'\nscalers_exp, _ = [esperimento(df_pca,list(df_pca.columns),FuzzyInductor,labels,grid,3,3,                logger = f_logger,scaling=scaler,label=l)                for scaler,l in zip(scalers,[repr(s) for s in scalers])]\n'

In [94]:
"""[esperimento(df_pca,list(df_pca.columns),FuzzyInductor,mu0,grid,3,3,\
                logger = f_logger,scaling=scaler,label=l)\
                for scaler,l in zip(scalers,[repr(s) for s in scalers])]"""

'[esperimento(df_pca,list(df_pca.columns),FuzzyInductor,mu0,grid,3,3,                logger = f_logger,scaling=scaler,label=l)                for scaler,l in zip(scalers,[repr(s) for s in scalers])]'

## Proviamo una diversa defuzzification

In [8]:
from defuzzification import *
from functools import partial

be = FuzzyInductor(c=0.1,k=HyperbolicKernel(1,1))
bes = [be,be]

In [96]:
"""
results_alpha_cuts = pd.DataFrame()
for alpha in np.arange(0.,1.05,.05):
    perf_train, perf_test = best_estimator_holdout(bes,df_pca.index,df_pca.values, labels,
                                              classes,0.7,partial(alpha_cut,alpha,0),5)
    results_alpha_cuts = \
    results_alpha_cuts.append({'alpha_cut': alpha,'train_err': perf_train,'test_err': perf_test},ignore_index=True)
"""

"\nresults_alpha_cuts = pd.DataFrame()\nfor alpha in np.arange(0.,1.05,.05):\n    perf_train, perf_test = best_estimator_holdout(bes,df_pca.index,df_pca.values, labels,\n                                              classes,0.7,partial(alpha_cut,alpha,0),5)\n    results_alpha_cuts =     results_alpha_cuts.append({'alpha_cut': alpha,'train_err': perf_train,'test_err': perf_test},ignore_index=True)\n"

In [26]:
results_alpha_cuts = pd.read_json("alpha_cuts_defuzzification.json")
results_alpha_cuts

Unnamed: 0,alpha_cut,test_err,train_err
0,0.0,0.42,0.48
1,0.05,0.48,0.45
2,0.1,0.41,0.43
3,0.15,0.36,0.39
4,0.2,0.36,0.33
5,0.25,0.33,0.33
6,0.3,0.31,0.32
7,0.35,0.28,0.31
8,0.4,0.29,0.28
9,0.45,0.26,0.29


Alpha cut 0.45 è il migliore

# Dataset PCA 1 componente 

In [10]:
dataset = pd.read_excel("dataset/IncidentiModificato.xlsx")
dataset = dataset.set_index("VERBALE")

import datetime as dt

dataset.DATA = dataset.DATA.apply(lambda d: (d - dt.datetime(1970,1,1)).days)

dataset.head()

Unnamed: 0_level_0,DATA,SESSO,ANNI,PESO,ALTEZZA,BMI,Mezzo,Testa:Neurocranio,Testa:Splancnocranio,Testa:Telencefalo,...,Scheletro:Rachide-cervicale,Scheletro:Rachide-toracico,Scheletro:Rachide-lombare,Scheletro:Bacino-e-sacro,Scheletro:Complesso-sterno/claveo/costale,Tot Testa,Tot Torace,Tot Addome,Tot Scheletro,Totale
VERBALE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85567,10893,0,81,84.0,1.75,27.43,0,1,0,0,...,0,3,0,3,3,2,0,3,9,14
85829,10970,1,69,69.0,1.62,26.29,1,4,4,4,...,0,0,0,0,4,20,7,1,4,32
85977,11026,1,71,67.0,1.55,27.89,1,2,0,1,...,0,0,0,0,4,6,0,0,4,10
86220,11122,1,54,60.0,1.59,23.73,1,4,0,0,...,0,0,0,0,4,5,3,2,4,14
86247,11130,1,78,69.0,1.67,24.74,1,2,0,0,...,0,0,0,0,4,2,0,2,4,8


In [11]:
anagrafica = dataset[list(dataset.columns)[1:6]]
labels = dataset['Mezzo']
lesioni = dataset[list(dataset.columns)[7:27]]

In [12]:
labels_zone = ['Testa','Torace','Addome','Scheletro']

In [10]:
df_compresso = anagrafica.copy()
for i,l in zip(range(0,len(lesioni.columns),5),labels_zone):
    zona = lesioni[list(lesioni.columns)[i:(i+5)]]
    component = PCA(n_components=1).fit_transform(zona.values)
    component_to_values = [v[0] for v in component]
    serie = pd.Series(component_to_values,index=zona.index)
    df_compresso[l] = serie

In [11]:
df_compresso.tail()

Unnamed: 0_level_0,SESSO,ANNI,PESO,ALTEZZA,BMI,Testa,Torace,Addome,Scheletro
VERBALE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
101618,1,82,80.2,1.62,30.56,-1.64,-0.92,-1.29,-3.13
101288E,1,70,75.6,1.57,30.67,-1.11,-1.5,-1.29,-1.02
E-97586,0,45,45.0,1.77,14.36,-2.17,-0.91,-1.29,1.65
X,0,54,89.6,1.68,31.75,-1.38,-0.6,-0.15,0.66
X1,1,86,50.0,1.55,20.81,-0.9,-0.6,1.03,1.65


### Esperimenti

#### Tutte le colonne

In [19]:
ordini = [.01,.1,1,10,100,1000]
pg_ordini = {
    'c': ordini,
    'k': [GaussianKernel(sigma) for sigma in ordini]+\
         [LinearKernel()]+\
         [HyperbolicKernel(1,o) for o in ordini]
}

In [20]:
l='dfPCA1_MSOrdine'

In [21]:
ft = incidenti_fuzzifier_table(df_compresso,[list(df_compresso.columns)],fuzzifiers_class,FuzzyInductor,labels.values,\
                               pg_ordini, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

In [22]:
mu0 = [1 if not label else 0 for label in labels.values]

In [23]:
l = 'dfPCA1_MSOrdine_0'

In [24]:
ft = incidenti_fuzzifier_table(df_compresso,[list(df_compresso.columns)],fuzzifiers_class,FuzzyInductor,mu0,\
                               pg_ordini, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

In [25]:
l = 'dfPCA1_MSFine_0'
pg_fine0 = {
    'c': [.1],
    'k': [HyperbolicKernel(1,o) for o in [.01,.02,.03,.04,.05,.06,.07,.08,.09]]
}

In [26]:
ft = incidenti_fuzzifier_table(df_compresso,[list(df_compresso.columns)],fuzzifiers_class,FuzzyInductor,mu0,\
                               pg_fine0, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

In [27]:
pg_ordinif = pg_ordini
pg_ordinif['fuzzifier'] = [ExponentialFuzzifier,LinearFuzzifier,CrispFuzzifier,\
                           QuantileLinearPiecewiseFuzzifier,QuantileConstantPiecewiseFuzzifier]
l = 'dfPCA1_MSOrdineF'

In [16]:
esperimento_registrato(df_compresso,list(df_compresso.columns),FuzzyInductor, labels.values, pg_ordinif, 3, 3, logger = f_logger, scaling=StandardScaler(), dim_reduction=None,label=l)

  array_means[:, np.newaxis]) ** 2,
100%|██████████| 100/100 [00:29<00:00,  3.41it/s]
  array_means[:, np.newaxis]) ** 2,
100%|██████████| 100/100 [00:15<00:00,  6.52it/s]
  array_means[:, np.newaxis]) ** 2,
100%|██████████| 100/100 [00:15<00:00,  6.39it/s]
  array_means[:, np.newaxis]) ** 2,
100%|██████████| 100/100 [00:15<00:00,  6.41it/s]


Unnamed: 0,dfPCA1_MSOrdineF
c,0.1
fuzzifier,<class 'fuzzylearn.fuzzifiers.LinearFuzzifier'>
k,"HyperbolicKernel(1, 1)"
score,-0.38


#### Lesioni

In [19]:
columns_lesioni = list(df_compresso.columns)[5:]

In [33]:
l= 'dfPCA1Lesioni_MSOrdine'

In [36]:
ft = incidenti_fuzzifier_table(df_compresso,[columns_lesioni],fuzzifiers_class,FuzzyInductor,labels.values,\
                               pg_ordini, 3,3,logger=f_logger,scaling=None,labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

Risultati simili

### PCA 2 COMPONENTI

In [13]:
labels_zone2 = list(zip(['Testa','Torace','Addome','Scheletro'],list(map(lambda x: x+"2",labels_zone))))
labels_zone2

[('Testa', 'Testa2'),
 ('Torace', 'Torace2'),
 ('Addome', 'Addome2'),
 ('Scheletro', 'Scheletro2')]

In [14]:
df_compresso2 = anagrafica.copy()
for i,l in zip(range(0,len(lesioni.columns),5),labels_zone2): 
    l1,l2 = l
    zona = lesioni[list(lesioni.columns)[i:(i+5)]]
    component = PCA(n_components=2).fit_transform(zona.values)
    component_to_values1 = [v[0] for v in component]
    serie = pd.Series(component_to_values1,index=zona.index)
    df_compresso2[l1] = serie
    component_to_values2 = [v[1] for v in component]
    serie = pd.Series(component_to_values2,index=zona.index)
    df_compresso2[l2] = serie
df_compresso2.tail()

Unnamed: 0_level_0,SESSO,ANNI,PESO,ALTEZZA,BMI,Testa,Testa2,Torace,Torace2,Addome,Addome2,Scheletro,Scheletro2
VERBALE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
101618,1,82,80.2,1.62,30.56,-1.64,-0.01,-0.92,0.18,-1.29,0.14,-3.13,0.75
101288E,1,70,75.6,1.57,30.67,-1.11,0.37,-1.5,-0.37,-1.29,0.14,-1.02,-1.19
E-97586,0,45,45.0,1.77,14.36,-2.17,-0.39,-0.91,0.19,-1.29,0.14,1.65,1.02
X,0,54,89.6,1.68,31.75,-1.38,-0.88,-0.6,0.57,-0.15,-0.35,0.66,-0.41
X1,1,86,50.0,1.55,20.81,-0.9,-0.95,-0.6,0.57,1.03,-1.68,1.65,0.44


In [30]:
l = 'dfPCA2_MSOrdine'

In [31]:
ft = incidenti_fuzzifier_table(df_compresso2,[list(df_compresso2.columns)],fuzzifiers_class,FuzzyInductor,labels.values,\
                               pg_ordini, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

In [32]:
l = 'dfPCA2_MSOrdine_0'

In [33]:
ft = incidenti_fuzzifier_table(df_compresso2,[list(df_compresso2.columns)],fuzzifiers_class,FuzzyInductor,mu0,\
                               pg_ordini, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

In [17]:
l = 'dfPCA2Lesioni_0'
pg_ordini = {'c': [0.1],
 'k': [GaussianKernel(0.01),
  GaussianKernel(0.1),
  GaussianKernel(1),
  GaussianKernel(10),
  GaussianKernel(100),
  GaussianKernel(1000),
  LinearKernel(),
  HyperbolicKernel(1, 0.01),
  HyperbolicKernel(1, 0.1),
  HyperbolicKernel(1, 1),
  HyperbolicKernel(1, 10),
  HyperbolicKernel(1, 100),
  HyperbolicKernel(1, 1000)],
 'fuzzifier': [ExponentialFuzzifier,
  LinearFuzzifier]}

In [18]:
columns_lesioni2 = list(df_compresso2.columns)[5:]

In [44]:
ft = incidenti_fuzzifier_table(df_compresso2,[columns_lesioni2],fuzzifiers_class,FuzzyInductor,mu0,\
                               pg_ordini, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

100%|██████████| 100/100 [00:49<00:00,  2.03it/s]
100%|██████████| 100/100 [00:42<00:00,  2.36it/s]
100%|██████████| 100/100 [00:26<00:00,  3.83it/s]
100%|██████████| 100/100 [00:26<00:00,  3.78it/s]
100%|██████████| 100/100 [00:26<00:00,  3.77it/s]
100%|██████████| 100/100 [00:38<00:00,  2.60it/s]
100%|██████████| 100/100 [00:36<00:00,  2.75it/s]
100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
100%|██████████| 100/100 [00:34<00:00,  2.91it/s]
100%|██████████| 100/100 [00:52<00:00,  1.89it/s]
100%|██████████| 100/100 [00:34<00:00,  2.93it/s]
100%|██████████| 100/100 [00:34<00:00,  2.93it/s]
100%|██████████| 100/100 [00:33<00:00,  3.00it/s]
100%|██████████| 100/100 [00:45<00:00,  2.21it/s]
100%|██████████| 100/100 [00:25<00:00,  3.89it/s]
100%|██████████| 100/100 [00:26<00:00,  3.76it/s]
100%|██████████| 100/100 [00:26<00:00,  3.82it/s]
100%|██████████| 100/100 [00:38<00:00,  2.58it/s]
100%|██████████| 100/100 [00:26<00:00,  3.84it/s]
100%|██████████| 100/100 [00:27<00:00,  3.61it/s]


In [20]:
l = 'dfPCA2Lesioni'
ft = incidenti_fuzzifier_table(df_compresso2,[columns_lesioni2],fuzzifiers_class,FuzzyInductor,labels.values,\
                               pg_ordini, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
100%|██████████| 100/100 [00:31<00:00,  3.14it/s]
100%|██████████| 100/100 [00:21<00:00,  4.63it/s]
100%|██████████| 100/100 [00:21<00:00,  4.57it/s]
100%|██████████| 100/100 [00:22<00:00,  4.51it/s]
100%|██████████| 100/100 [00:31<00:00,  3.18it/s]
100%|██████████| 100/100 [00:21<00:00,  4.62it/s]
100%|██████████| 100/100 [00:22<00:00,  4.53it/s]
100%|██████████| 100/100 [00:21<00:00,  4.59it/s]
100%|██████████| 100/100 [00:31<00:00,  3.20it/s]
100%|██████████| 100/100 [00:21<00:00,  4.64it/s]
100%|██████████| 100/100 [00:22<00:00,  4.51it/s]
100%|██████████| 100/100 [00:21<00:00,  4.63it/s]
100%|██████████| 100/100 [00:31<00:00,  3.20it/s]
100%|██████████| 100/100 [00:22<00:00,  4.54it/s]
100%|██████████| 100/100 [00:22<00:00,  4.50it/s]
100%|██████████| 100/100 [00:22<00:00,  4.54it/s]
100%|██████████| 100/100 [00:31<00:00,  3.14it/s]
100%|██████████| 100/100 [00:21<00:00,  4.67it/s]
100%|██████████| 100/100 [00:22<00:00,  4.45it/s]


In [48]:
l='dfPCA2LesioniFine_0'
pg_fine = {'c': [0.1],
 'k': [HyperbolicKernel(1, o) for o in np.arange(0.01,0.1,0.01)],
 'fuzzifier': [LinearFuzzifier]
}

ft = incidenti_fuzzifier_table(df_compresso2,[columns_lesioni2],fuzzifiers_class,FuzzyInductor,mu0,\
                               pg_fine, 3,3,logger=f_logger,scaling=StandardScaler(),labels=[l],\
                               file_json=filejson)
ft.to_json(filejson)

100%|██████████| 100/100 [00:43<00:00,  2.32it/s]
100%|██████████| 100/100 [00:43<00:00,  2.31it/s]
100%|██████████| 100/100 [00:29<00:00,  3.34it/s]
100%|██████████| 100/100 [00:30<00:00,  3.31it/s]
100%|██████████| 100/100 [00:31<00:00,  3.13it/s]
100%|██████████| 100/100 [00:45<00:00,  2.21it/s]
100%|██████████| 100/100 [00:30<00:00,  3.31it/s]
100%|██████████| 100/100 [00:22<00:00,  4.47it/s]
100%|██████████| 100/100 [00:22<00:00,  4.42it/s]
100%|██████████| 100/100 [00:31<00:00,  3.22it/s]
100%|██████████| 100/100 [00:21<00:00,  4.63it/s]
100%|██████████| 100/100 [00:22<00:00,  4.44it/s]
100%|██████████| 100/100 [00:22<00:00,  4.54it/s]
100%|██████████| 100/100 [00:31<00:00,  3.13it/s]
100%|██████████| 100/100 [00:22<00:00,  4.47it/s]
100%|██████████| 100/100 [00:21<00:00,  4.57it/s]
100%|██████████| 100/100 [00:22<00:00,  4.55it/s]
100%|██████████| 100/100 [00:31<00:00,  3.20it/s]
100%|██████████| 100/100 [00:21<00:00,  4.63it/s]
100%|██████████| 100/100 [00:22<00:00,  4.52it/s]


In [49]:
ft

Unnamed: 0,df_autoencoder_MS,df_autoencoder_MSfine,df_autoencoder_MS_0,df_autoencoder_MSfine_0,df_pca_MS_0,df_pca_MSfine_0,TSNE2_MSOrdine,TSNE2_MSFine,TSNE2_MSOrdine_0,dfPCA1_MSOrdine,dfPCA1_MSOrdine_0,dfPCA1_MSFine_0,dfPCA1Lesioni_MSOrdine,dfPCA1_MSOrdineF,dfPCA2_MSOrdine,dfPCA2_MSOrdine_0,dfPCA2Lesioni_0,dfPCA2LesioniFine_0
QuantileConstantPiecewiseFuzzifier,-0.38,-0.38,-0.31,-0.31,-0.3,-0.29,-0.43,-0.43,-0.36,-0.41,-0.3,-0.29,-0.45,-0.41,-0.42,-0.26,-0.25,-0.25
CrispFuzzifier,-0.43,-0.43,-0.37,-0.37,-0.31,-0.31,-0.44,-0.44,-0.32,-0.46,-0.32,-0.32,-0.45,-0.46,-0.42,-0.34,-0.28,-0.28
ExponentialFuzzifier,-0.29,-0.29,-0.26,-0.26,-0.23,-0.23,-0.29,-0.29,-0.28,-0.28,-0.26,-0.26,-0.29,-0.28,-0.27,-0.24,-0.23,-0.23
LinearFuzzifier,-0.29,-0.29,-0.26,-0.26,-0.24,-0.24,-0.3,-0.3,-0.28,-0.28,-0.26,-0.26,-0.3,-0.28,-0.27,-0.24,-0.23,-0.23
QuantileLinearPiecewiseFuzzifier,-0.38,-0.38,-0.31,-0.31,-0.33,-0.33,-0.44,-0.44,-0.35,-0.41,-0.33,-0.33,-0.45,-0.41,-0.39,-0.28,-0.27,-0.27


In [32]:
be1 = FuzzyInductor(c=.1,k=HyperbolicKernel(1,.01),fuzzifier=LinearFuzzifier)
be0 = FuzzyInductor(c=.1,k=HyperbolicKernel(1,.01),fuzzifier=ExponentialFuzzifier)
bes = [be1,be0]
classes = (1,0)

In [39]:
values = StandardScaler().fit_transform(df_compresso2.values)
index = df_compresso2.index
y = labels.values

In [40]:
perf_train, perf_test =best_estimator_holdout(bes,index,values, y,
                                              classes,0.7,classify,5)

100%|██████████| 100/100 [00:25<00:00,  3.87it/s]
100%|██████████| 100/100 [00:24<00:00,  4.08it/s]
100%|██████████| 100/100 [00:24<00:00,  4.08it/s]
100%|██████████| 100/100 [00:24<00:00,  4.13it/s]
100%|██████████| 100/100 [00:23<00:00,  4.20it/s]
100%|██████████| 100/100 [00:23<00:00,  4.20it/s]
100%|██████████| 100/100 [00:24<00:00,  4.05it/s]
100%|██████████| 100/100 [00:24<00:00,  4.06it/s]
100%|██████████| 100/100 [00:24<00:00,  4.01it/s]
100%|██████████| 100/100 [00:24<00:00,  4.09it/s]


In [41]:
perf_train, perf_test

(0.31868131868131866, 0.3282051282051282)

In [44]:
"""
results_alpha_cuts = pd.DataFrame()
for alpha in np.arange(.1,.95,.05):
    perf_train, perf_test = best_estimator_holdout(bes,index,values, y,
                                              classes,0.7,partial(alpha_cut,alpha,0),2)
    results_alpha_cuts = \
    results_alpha_cuts.append({'alpha_cut': alpha,'train_err': perf_train,'test_err': perf_test},ignore_index=True)
"""

"\nresults_alpha_cuts = pd.DataFrame()\nfor alpha in np.arange(.1,.95,.05):\n    perf_train, perf_test = best_estimator_holdout(bes,index,values, y,\n                                              classes,0.7,partial(alpha_cut,alpha,0),2)\n    results_alpha_cuts =     results_alpha_cuts.append({'alpha_cut': alpha,'train_err': perf_train,'test_err': perf_test},ignore_index=True)\n"

In [43]:
results_alpha_cuts

Unnamed: 0,alpha_cut,test_err,train_err
0,0.1,0.51,0.44
1,0.15,0.46,0.46
2,0.2,0.42,0.46
3,0.25,0.46,0.42
4,0.3,0.45,0.37
5,0.35,0.35,0.4
6,0.4,0.29,0.32
7,0.45,0.36,0.26
8,0.5,0.32,0.31
9,0.55,0.33,0.3
