In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')

import sys,os
sys.path.append('../')
from collections import Counter

from copy import copy,deepcopy

import numpy as np
import pandas as pd


import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('bmh')

from tqdm import tqdm_notebook as tqdm

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from sklearn.preprocessing import StandardScaler,PowerTransformer,MinMaxScaler,RobustScaler
from sklearn import preprocessing

from efcm.cluster import EFCM_LS1
from metrics import FMeasure, MPC, PE
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics.cluster import pair_confusion_matrix

### Load data

In [None]:
#### Data test Raw
dataset_names =['dataset_01_raw_test','dataset_02_raw_test','dataset_03_raw_test']
 
data_01 = pd.read_csv(os.path.join('../input/', '.'.join([dataset_names[0], 'csv'])))
data_02 = pd.read_csv(os.path.join('../input/', '.'.join([dataset_names[1], 'csv'])))
data_03 = pd.read_csv(os.path.join('../input/', '.'.join([dataset_names[2], 'csv'])))

### View data

In [None]:
data_01.head()

In [None]:
data_02.head()

In [None]:
data_03.head()

In [None]:
print('Shape',data_01.shape,data_02.shape,data_03.shape)

In [None]:
X_data_01 = data_01.drop(columns='Target').values.copy()
y_data_01 = data_01.Target.values.copy()
X_data_02 = data_02.drop(columns='Target').values.copy()
y_data_02 = data_02.Target.values.copy()
X_data_03 = data_03.drop(columns='Target').values.copy()
y_data_03 = data_03.Target.values.copy()

In [None]:
try:
    assert y_data_01.all() == y_data_02.all() ==y_data_03.all()
    y = y_data_01
except:
    print('Target invalid')

### Parâmetros de T_u e T_V no diretório -- ../run_best_params

 - Dataset_01 : T_u=0.9  T_v=30.0
 - Dataset_02 : T_u=1.0  T_v=1000.0
 - Dataset_03 : T_u=5.0  T_v=10.0

### Evaluate

#### Dataset 1

In [None]:
clf1 = EFCM_LS1(n_cluster=7, theta_u=.9, theta_v=30.0, epsilon=1e-10, tol_iter=150, init='dirichlet')
u1 = []
epoch = 50
model1 = []
_ = [model1.append(deepcopy(clf1)) for _ in range(epoch)]
for j in range(epoch):
    model1[j].fit(X_data_01)
    print('run:{} loss: {} '.format(j,model1[j].loss))
    u1.append(model1[j].loss)

In [None]:
u1 = np.array(u1)
np.argmin(u1),u1[np.argmin(u1)]

In [None]:
fuzzy1 = model1[np.argmin(u1)].fuzzy_matrix
crisp1 = model1[np.argmin(u1)].labels_

In [None]:
print("Dataset 1:")
print("MPC: {}\tPE: {}".format(MPC(fuzzy1), PE(fuzzy1)))
print("ARS: {}\tFM: {}".format(adjusted_rand_score(y_data_01, crisp1), FMeasure(y_data_01, crisp1)))
print(Counter(crisp1))

In [None]:
# Features importance
ax = sns.heatmap(model1[np.argmin(u1)].fuzzy_weight_matrix[6:7])
plt.show()

In [None]:
ax = sns.heatmap(fuzzy1)
plt.show()

In [None]:
model1[np.argmin(u1)].fuzzy_weight_matrix

#### Dataset 2

In [None]:
clf2 = EFCM_LS1(n_cluster=7,theta_u=1.0 ,theta_v=1000.0,epsilon=1e-10,tol_iter=150,init='dirichlet')
u2 = []
epoch = 50
model2 = []
_ = [model2.append(deepcopy(clf2)) for _ in range(epoch)]
for j in range(epoch):
    model2[j].fit(X_data_02)
    print('run:{} loss: {} '.format(j,model2[j].loss))
    u2.append(model2[j].loss)

In [None]:
fuzzy2 = model2[np.argmin(u2)].fuzzy_matrix
crisp2 = model2[np.argmin(u2)].labels_

In [None]:
print("Dataset 2:")
print("MPC: {}\tPE: {}".format(MPC(fuzzy2), PE(fuzzy2)))
print("ARS: {}\tFM: {}".format(adjusted_rand_score(y_data_02, crisp2), FMeasure(y_data_02, crisp2)))
print(Counter(crisp2))

In [None]:
ax = sns.heatmap(fuzzy2)
plt.show()

In [None]:
model2[np.argmin(u2)].fuzzy_weight_matrix

In [None]:
# Features importance
ax = sns.heatmap(model2[np.argmin(u2)].fuzzy_weight_matrix[6:7])
plt.show()

#### Dataset 3

In [None]:
clf3 = EFCM_LS1(n_cluster=7,theta_u=5.0 ,theta_v=10.0,epsilon=1e-10,tol_iter=150,init='dirichlet')
u3 = []
epoch = 50
model3 = []
_ = [model3.append(deepcopy(clf3)) for _ in range(epoch)]
for j in range(epoch):
    model3[j].fit(X_data_03)
    print('run:{} loss: {} '.format(j,model3[j].loss))
    u3.append(model3[j].loss)

In [None]:
fuzzy3 = model3[np.argmin(u3)].fuzzy_matrix
crisp3 = model3[np.argmin(u3)].labels_

In [None]:
print("Dataset 3:")
print("MPC: {}\tPE: {}".format(MPC(fuzzy3), PE(fuzzy3)))
print("ARS: {}\tFM: {}".format(adjusted_rand_score(y_data_03, crisp3), FMeasure(y_data_03, crisp3)))
print(Counter(crisp3))

In [None]:
ax = sns.heatmap(fuzzy3)
plt.show()

In [None]:
model3[np.argmin(u3)].fuzzy_weight_matrix

In [None]:
# Features importance
ax = sns.heatmap(model3[np.argmin(u3)].fuzzy_weight_matrix[0:1])
plt.show()