In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from kmodes.kmodes import KModes
from matplotlib.pyplot import xticks
from sklearn import preprocessing
import ipywidgets as widgets
from ipywidgets import VBox,Box,HBox
import qviz as qv
from qviz import qb1,log


In [None]:
%matplotlib inline
# setting up input source
ds=qb1.Source(keyspace="caixa",table_name="bizum")
#Qviz visualizer
qviz = qv.Qviz(source=ds)



# Few UI elements
l = widgets.Button(
    description = "Run clustering",
    buttom_style='success',
    icon='running'
)


n_cluster = widgets.FloatRangeSlider(
    value=[2, 10],
    min=1,
    max=20,
    step=1,
    description='Number to cluster to use:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)



# The fuction to call when clicking the button. 

def run(b):
    results = Box()
    data = qviz.selected
    i = len(results_tab.children)
    title = widgets.HTML(value=f"<h2>Runnin on {len(data)} samples with [{n_cluster.value[0]},{n_cluster.value[1]}] clusters</h2>")
    results_tab.children = tuple(list(results_tab.children)+[VBox([title,results])])
    
    results_tab.set_title(i,"Sample "+str(i))
    progress = widgets.IntProgress(
        value=0,
        min=n_cluster.value[0],
        max=n_cluster.value[1],
        step=1,
        description='Running:',
        bar_style='info', # 'success', 'info', 'warning', 'danger' or ''
        orientation='horizontal',
    )
    tab = widgets.Tab()

    res_box = widgets.VBox([progress,tab])

    results.children = tuple(list(results.children)+[res_box])
   
    categorical_cols = ['ind_autenticacion_reforzada',
 'tipo_autenticacion',
 'autenticacion_esperada',
 'estado_op_dec',
 'estado_op_dec_ltx',
 'motivo_estado',
 'estado_op_trf',
 'num_particion',
 'cod_tipo_terminal',
 'empresa',
 'motivo_anulacion']


    data_cat = data[categorical_cols]
    data_cat.loc[:,'importe']=pd.cut(data.loc[:,'importe_operacion'], [0, 100, 200, 300, 400, 500], 
                          labels=['0-100', '100-200', '200-300', '300-400','400-500'])
    data_cat.isnull().sum()*100/data_cat.shape[0]
    data_cat_copy = data_cat.copy()

    le = preprocessing.LabelEncoder()
    data_cat= data_cat.apply(le.fit_transform)
    cost = []


    for num_clusters in list(range(1,10)):
        kmode = KModes(n_clusters=num_clusters, init = "Cao", n_init = 1)
        kmode.fit_predict(data_cat)
        cost.append(kmode.cost_)
        progress.value +=1

    y = np.array([i for i in range(1,10,1)])
    
    tab_contents = ['Centroids', 'Type of authentication', 'Operation State', 'Partitions', 'Company','Pairplot']
    children = [widgets.Output() for name in tab_contents]
    for i,name in enumerate(tab_contents):
        tab.set_title(i,name)
    
    tab.children = children
     
 
    with children[0]:
        plt.plot(y,cost)
        plt.show()
    
    with children[1]:
        km_cao = KModes(n_clusters=2, init = "Cao", n_init = 1)
        fitClusters_cao = km_cao.fit_predict(data_cat)
        data_cat = data_cat_copy.reset_index()
        clustersDf = pd.DataFrame(fitClusters_cao)
        clustersDf.columns = ['cluster_predicted']
        combinedDf = pd.concat([data_cat, clustersDf], axis = 1).reset_index()
        combinedDf = combinedDf.drop(['index', 'level_0'], axis = 1)
        cluster_0 = combinedDf[combinedDf['cluster_predicted'] == 0]
        cluster_1 = combinedDf[combinedDf['cluster_predicted'] == 1]
        plt.subplots(figsize = (15,5))
        sb.countplot(x=combinedDf['ind_autenticacion_reforzada'],order=combinedDf['ind_autenticacion_reforzada'].value_counts().index,hue=combinedDf['cluster_predicted'])
        plt.show()
        
        plt.subplots(figsize = (15,5))
        sb.countplot(x=combinedDf['tipo_autenticacion'],order=combinedDf['tipo_autenticacion'].value_counts().index,hue=combinedDf['cluster_predicted'])
        plt.show()
        
    with children[2]:
        f, axs = plt.subplots(1,3,figsize = (15,5))
        sb.countplot(x=combinedDf['estado_op_dec'],order=combinedDf['estado_op_dec'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[0])
        sb.countplot(x=combinedDf['estado_op_dec_ltx'],order=combinedDf['estado_op_dec_ltx'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[1])
        sb.countplot(x=combinedDf['motivo_estado'],order=combinedDf['motivo_estado'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[2])

        plt.tight_layout()
        plt.show()
        
    with children[3]:
        f, axs = plt.subplots(1,3,figsize = (15,5))
        sb.countplot(x=combinedDf['estado_op_trf'],order=combinedDf['estado_op_trf'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[0])
        sb.countplot(x=combinedDf['num_particion'],order=combinedDf['num_particion'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[1])
        sb.countplot(x=combinedDf['cod_tipo_terminal'],order=combinedDf['cod_tipo_terminal'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[2])

        plt.tight_layout()
        plt.show()
        
    with children[4]:
        f, axs = plt.subplots(1,3,figsize = (15,5))
        sb.countplot(x=combinedDf['empresa'],order=combinedDf['empresa'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[0])
        sb.countplot(x=combinedDf['motivo_anulacion'],order=combinedDf['motivo_anulacion'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[1])
        sb.countplot(x=combinedDf['importe'],order=combinedDf['importe'].value_counts().index,hue=combinedDf['cluster_predicted'],ax=axs[2])

        plt.tight_layout()
        plt.show()
    
    with children[5]:
        nu = qviz.selected.nunique()
        high_card = qviz.selected[nu[nu>1].index]
        sb.pairplot(high_card)
        plt.show()
    progress.close()



# Registering the function on the button
l.on_click(run)

results_tab = widgets.Tab([qviz.selected_sheet])
results_tab.set_title(0,"Preview")

# Defining the layout
VBox(
    [ds,qviz,
     HBox([n_cluster,l]),
     results_tab
    ]
)