<center><h1>Processing results</h1></center>

## Summary:


1. [Regional LSSVM](#r-lssvm)

    1.1 [Accuracy in train and test sets](#r-lssvm_accuracy)

    1.2 [Eigenvalues analysis](#r-lssvm_eigenvalues)
    
    1.3 [Clustering metrics analysis](#r-lssvm_metrics)


# 1. Comparing Global, Local and Regional LSSVM <a class="anchor" id="r-lssvm"></a>

Loading results:

In [7]:
import pandas as pd
import numpy as np

from utils import load_csv_as_pandas
from evaluation import cm2f1, cm2acc, cm2sen, cm2esp

df = {
    'global'    : load_csv_as_pandas(path="results/local-results/cbic/temp_glssvm_cbic"),
    'local'     : load_csv_as_pandas(path="results/local-results/cbic/temp_llssvm_cbic/results"),
    'regional'  : load_csv_as_pandas(path="results/regional-results/temp_rlssvm_somfix/results")
}

In [8]:
datasets = np.unique(df['global']['dataset_name'].values).tolist()
datasets

['pk', 'vc2c', 'vc3c', 'wf24f', 'wf2f', 'wf4f']

In [9]:
# loading simulation results
df_results = pd.read_csv("results/regional-results/ROLS - all - n_res=100 - 2019-07-10.csv")
df_results.head()

Unnamed: 0,dataset_name,random_state,alpha0,sigma0,nEpochs,cm_tr,cm_ts
0,vc2c,127815836,0.1,3.0,100,[156 13 15 64],[32 9 7 14]
1,vc2c,127815836,0.1,10.0,100,[153 16 16 63],[32 9 6 15]
2,vc2c,127815836,0.1,6.5,100,[162 7 21 58],[34 7 9 12]
3,vc2c,127815836,0.1,10.0,300,[157 12 21 58],[32 9 8 13]
4,vc2c,127815836,0.1,6.5,300,[155 14 19 60],[33 8 7 14]


In [10]:
# TODO: vou ter que separar os resultados por parâmetros do SOM porque
# fiz a otimização de hiper-parâmetros erroneamente
# hyperparameters grid search:
num = 3
alphas = np.linspace(0.1, 0.5,  num=num).tolist()
sigmas = np.linspace(3,    10,   num=num).tolist()
epochs = np.linspace(100,  500, num=num, dtype='int').tolist()

som_params = [
    {
     "alpha0"  : alpha0
    ,"sigma0"  : sigma0
    ,"nEpochs" : nEpochs
    }
    for alpha0       in alphas
    for sigma0       in sigmas
    for nEpochs      in epochs
]

# header = list(som_params[0].keys()) + ['Minimum', 'Maximum', 'Median', 'Mean', 'Std. Deviation']
header = list(som_params[0].keys()) +\
        ["acc2filter", "Accuracy", "Sens.", "Spec.", "F1"]

df_ds = {dataset_name: {'tr': None, 'ts': None} for dataset_name in datasets}

for dataset_name in datasets: # For this specific dataset
    print(dataset_name)
    df = df_results.loc[df_results['dataset_name'] == dataset_name] # get simulation results
    
    
    
    # matriz que guardará resultados numéricos
    df_data = {
        'tr': np.empty((len(som_params), len(header)), dtype=object),
        'ts': np.empty((len(som_params), len(header)), dtype=object)
    }
    for set_ in ['tr', 'ts']:
        print(set_)
        count = 0
        for params in som_params:
            df_case = df.loc[(df['alpha0']  == params['alpha0']) & 
                             (df['sigma0']  == params['sigma0']) &
                             (df['nEpochs'] == params['nEpochs'])]


            # converting confusion matrix from string to numpy array
            cm = np.array(
                [
                    [int(x) for x in result[1:-1].split()] 
                    for result in df_case[f'cm_{set_}'].values
                ]
            )
                        
            if len(cm.shape) < 2:
#                 display(cm)
#                 display(cm.shape)
                df_case
                continue

            length = cm.shape[1]
            cm_side = int(np.sqrt(length))

            acc  = [0]*len(cm)
            sens = [0]*len(cm)
            spec = [0]*len(cm)
            f1   = [0]*len(cm)
            for i in range(len(cm)):
                cm_temp = np.reshape(cm[i], (cm_side,cm_side))

                acc[i]  = cm2acc(cm_temp)*100
                sens[i] = cm2sen(cm_temp)*100
                spec[i] = cm2esp(cm_temp)*100
                f1[i]   = cm2f1(cm_temp)*100

            df_data[set_][count,:] = np.matrix([
                params['alpha0'], params['sigma0'], params['nEpochs'], np.mean(acc), 
                "{:.2f} \$\pm\$ {:.2f}".format(np.mean(acc), np.std(acc)), 
                "{:.2f}".format(np.mean(sens)),
                "{:.2f}".format(np.mean(spec)), 
                "{:.2f}".format(np.mean(f1))
            ], dtype=object)

            count+=1

        df_ds[dataset_name][set_] = pd.DataFrame(df_data[set_], columns=header)
        #display(df_ds[dataset_name][set_].head())
    print('-'*100,'\n'*2)

pk
tr
ts
---------------------------------------------------------------------------------------------------- 


vc2c
tr
ts
---------------------------------------------------------------------------------------------------- 


vc3c
tr
ts
---------------------------------------------------------------------------------------------------- 


wf24f
tr
ts
---------------------------------------------------------------------------------------------------- 


wf2f
tr
ts
---------------------------------------------------------------------------------------------------- 


wf4f
tr
ts
---------------------------------------------------------------------------------------------------- 




In [11]:
for set_ in ['tr', 'ts']:
    print(set_)
    df_ds.keys()
    df_temp = {}
    for dataset_name, df_ts_ts in df_ds.items():
        df_temp[dataset_name] = df_ts_ts[set_]
        
    data = np.array([df.sort_values(
        'acc2filter', ascending=False).iloc[0,:].values for df in df_temp.values()
    ])
    idx_label = list(df_ds.keys())
    df_rols = pd.DataFrame(data, columns=header, index=[idx_label])
    display(df_rols)

tr


Unnamed: 0,alpha0,sigma0,nEpochs,acc2filter,Accuracy,Sens.,Spec.,F1
pk,0.1,10.0,500,94.205128,94.21 \$\pm\$ 4.10,97.13,85.22,96.2
vc2c,0.5,10.0,500,86.858871,86.86 \$\pm\$ 1.78,76.07,91.9,78.7
vc3c,0.5,10.0,100,85.741935,85.74 \$\pm\$ 1.57,92.85,82.47,92.77
wf24f,0.5,10.0,300,89.248167,89.25 \$\pm\$ 0.86,95.59,86.6,95.68
wf2f,0.1,10.0,300,91.23648,91.24 \$\pm\$ 1.58,96.57,88.78,96.63
wf4f,0.5,10.0,100,88.637718,88.64 \$\pm\$ 2.29,95.25,85.14,95.47


ts


Unnamed: 0,alpha0,sigma0,nEpochs,acc2filter,Accuracy,Sens.,Spec.,F1
pk,0.1,3.0,300,87.051282,87.05 \$\pm\$ 5.67,91.29,74.53,91.29
vc2c,0.1,10.0,100,83.241935,83.24 \$\pm\$ 4.50,71.37,89.43,73.54
vc3c,0.1,6.5,300,81.822581,81.82 \$\pm\$ 4.46,90.52,78.53,90.35
wf24f,0.5,10.0,300,86.001832,86.00 \$\pm\$ 1.22,94.21,82.96,94.28
wf2f,0.1,10.0,300,91.186813,91.19 \$\pm\$ 1.75,96.55,88.64,96.62
wf4f,0.5,6.5,300,88.372711,88.37 \$\pm\$ 2.03,95.13,84.64,95.36


## 1.1 Boxplots dos conjuntos de treino e teste <a class="anchor" id="boxplot_tr_ts"></a>

In [12]:
from IPython.core.display import display, HTML
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True) # enabling plot within jupyter notebook

set_dict = {'treino': 'cm_tr', 'teste': 'cm_ts'}

for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))
    
    data = {}
    for classifier in df.keys():
        df_dataset = df[classifier].loc[df[classifier]['dataset_name'] == dataset_name]
        
        n_exp = len(df_dataset)
        data[classifier] = {
            'treino': [None]*n_exp, 
            'teste' : [None]*n_exp
        }

        for set_ in set_dict:
            cm_series = df_dataset[set_dict[set_]].values
            
            for i in range(n_exp):
                temp_cm = np.frombuffer(eval(cm_series[i]), dtype='int64')
                cm = temp_cm.reshape( int( len(temp_cm)**(1/2) ) ,-1)
                
                # acuracia:
                data[classifier][set_][i] = cm2acc(cm)*100
                
    boxs = []
    for set_ in set_dict:
        for classifier in df.keys():
            cor = {
                'global':   "rgba(44, 160, 101, 0.5)",
                'local':    "rgba(93, 164, 214, 0.5)",
                'regional': "rgba(155, 89, 182,1.0)"
            }
            boxs.append(
                go.Box(
                    y = data[classifier][set_],
                    x = ["{}{} {}-LSSVM".format(set_[0].upper(), set_[1:],
                                                  classifier[0].upper())
                        ]*len(data[classifier][set_]),
                    name = "{}-LSSVM".format(classifier[0].upper()),
                    boxmean='sd',
                    marker_color=cor[classifier],
#                     showlegend = False if set_=='treino' else True
                    )
            )
    


    layout = go.Layout(
#         title = "Acurácia nos conjuntos de treino e teste [<b>{}</b>]".format(dataset_name),
        yaxis=dict(title="Acurácia (%)"),
        showlegend=False,
        legend=dict(x=.875, y=1)
    )

    fig = go.Figure(data=boxs,layout=layout)
    width = 2
    fig.add_trace(go.Scatter(
        x=['Treino G-LSSVM', 'Treino L-LSSVM', 'Treino R-LSSVM', 'Treino G-LSSVM'],
        y=[np.mean(data['global']['treino']), np.mean(data['local']['treino']),
           np.mean(data['regional']['treino']), np.mean(data['global']['treino'])],
        mode='lines+markers',
        line=dict(
            color="RoyalBlue",
            dash="dashdot",
#             dash="dot",
            width=width
        ),
        showlegend=False
    ))
    
    fig.add_trace(go.Scatter(
        x=['Teste G-LSSVM', 'Teste L-LSSVM', 'Teste R-LSSVM', 'Teste G-LSSVM'],
        y=[np.mean(data['global']['teste']), np.mean(data['local']['teste']),
           np.mean(data['regional']['teste']), np.mean(data['global']['teste'])],
        mode='lines+markers',
        line=dict(
            color="RoyalBlue",
            dash="dashdot",
#             dash="dot",
            width=width
        ),
        showlegend=False
    ))
    
    fig.update_layout(
        margin=dict(l=20, r=5, t=5, b=20),
#         paper_bgcolor="LightSteelBlue",
    )
    
    fig.show()
    
    fig.write_image("images/r-lssvm_{}.pdf".format(dataset_name))
    
    display(HTML('<hr>'))


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



KeyError: 'dataset_name'

## 1.2 Eigenvalues analysis <a class="anchor" id="r-lssvm_eigenvalues"></a>

In [13]:
import plotly.graph_objects as go
import itertools
from IPython.core.display import display, HTML
      

for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))
    print(" ")
    
    # get dataframe of the specific dataset
    df_dataset = df.loc[df['dataset_name'] == dataset_name]

    print("{}: {} runs. ".format(dataset_name, len(df_dataset)))
    
    hps_names = ["$k_{opt}$ [CV]","$\gamma_{opt}$ [CV]","$\sigma_{opt}$ [CV]"]
    hps_comb = np.unique(df_dataset[hps_names].values, axis=0) # hyperparam combinations
    for comb in hps_comb:
        df_comb = df_dataset.loc[(df_dataset[hps_names[0]]==comb[0]) & 
                                 (df_dataset[hps_names[1]]==comb[1]) &
                                 (df_dataset[hps_names[2]]==comb[2]) 
                                ]
        
        
        eigen_string = df_comb['eigenvalues'].values
        dtypes       = df_comb['eigenvalues_dtype'].values
        eigenvalues_list = [None]*len(dtypes)
        cond_list = []#np.empty(len(dtypes)) # conditioning
        for i in range( len(dtypes) ):
            eigvals_full = np.frombuffer(eval(eigen_string[i]), 
                                     dtype=dtypes[i])
            
            nan_indices = np.argwhere(np.isnan(eigvals_full))
            eigvals_list = [None]*len(nan_indices)
            last_nan=-1
            for j in range(len(nan_indices)):
            #     print(nan_indices[i][0])
                eigvals_list[j] = eigvals_full[last_nan+1:nan_indices[j][0]]
                last_nan=nan_indices[j][0]
                
                modules = np.absolute(eigvals_list[j])
                cond_list.append(np.amax(modules)/np.amin(modules))
            
            eigenvalues_list[i] = eigvals_list
        
        
        freq = len(eigenvalues_list)
        
        # list of lists to single list
        merged = list(itertools.chain(*eigenvalues_list)) 
        
        eigenvalues = np.concatenate(merged)
        cond_worst = np.amax(cond_list)

#         x = eigenvalues.real.tolist()
#         y = eigenvalues.imag.tolist()

#         fig = go.Figure()
#         fig.add_trace(
#             go.Histogram2dContour(x=x, y=y, colorscale='Hot', reversescale=True, xaxis='x', yaxis='y')
#         )
#         fig.add_trace(
#             go.Scatter(x=x, y=y, xaxis='x', yaxis='y', mode='markers',
#                 marker=dict(color='rgba(0,0,0,0.4)', size=6)
#             ))
#         fig.add_trace(go.Histogram(y=y,
#                 xaxis = 'x2',
#                 marker = dict(
#                     color = 'rgba(0,0,0,1)'
#                 )
#             ))
#         fig.add_trace(go.Histogram(x=x,
#                 yaxis = 'y2',
#                 marker = dict(
#                     color = 'rgba(0,0,0,1)'
#                 )
#             ))


#         title = "Eigenvalues in <b>{}</b> dataset with ".format(dataset_name)
#         title+="<b>k_opt={}; gamma={:.2E}; sigma={:.2E}</b>".format(comb[0], comb[1], comb[2])
#         title+= " [<b>{} instances</b>]".format(freq)
#         fig.update_layout(
#             title = title,
#             xaxis = dict(zeroline=False, showgrid=False,
#                 domain = [0,0.85], 
#                 title='Real part'
#             ),
#             yaxis = dict(zeroline = False, 
#                 domain = [0,0.85],
#                 title='Imaginary part'
#             ),
#             xaxis2 = dict(zeroline = False, showgrid = False,
#                 domain = [0.85,1]
#             ),
#             yaxis2 = dict(zeroline = False, showgrid = False,
#                 domain = [0.85,1]
#             ),
#             bargap = 0,
#             hovermode = 'closest',
#             showlegend = False
#         )

#         fig.show()

        print("case: ", end='')
        print("k_opt={}; gamma={:.2E}; sigma={:.2E}".format(int(comb[0]), comb[1], comb[2]), end=' ')
        print("[{:}]".format(str(freq)+" instances"))
        print("dtypes = {}".format(np.unique(dtypes)))
        print("Worst conditioning: {:.2E}".format(cond_worst))
        print("-"*55)
        
        print("\n")
    
    display(HTML('<hr>'))
#     print("\n"+"#"*100+"\n")


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



 
pk: 0 runs. 


KeyError: "None of [Index(['$k_{opt}$ [CV]', '$\\gamma_{opt}$ [CV]', '$\\sigma_{opt}$ [CV]'], dtype='object')] are in the [columns]"

## 1.3 Clustering metrics analysis <a class="anchor" id="r-lssvm_metrics"></a>

In [None]:
import plotly.graph_objects as go
from IPython.core.display import display, HTML

for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))
    
    for model_type in ['regional']:#['local', 'regional']:
        n_metrics = int((len(df[model_type].columns[6:-4])-1)/4)

        metric_names = [' ']*n_metrics
        count=0
        for text in list(df[model_type].columns[-4-n_metrics:-4]):
            metric_names[count] = text[10:-1]
            count+=1
        
        # get dataframe of the specific dataset
        df_dataset = df[model_type].loc[df[model_type]['dataset_name'] == dataset_name]

        # boxplot to k_{opt}
        fig = go.Figure(data=[go.Histogram(
            x=df_dataset["$k_{opt}$ [CV]"].values.tolist(),
            xbins_size=1,
            marker_color='rgb(55, 83, 109)'
        )])
        fig.update_layout(
#             title = "Distribuição do k_opt para as {} rodadas no conjunto <b>{}</b> e modelagem <b>{}</b>".format(
#                 len(df_dataset), dataset_name, model_type),
            xaxis_title = 'Número de agrupamentos [regiões locais]',
            yaxis_title = 'Frequência',
            bargap=0.1, # gap between bars of adjacent location coordinates
        )
    #     fig.update_xaxes(range=[0, 4])
        fig.update_layout(
            margin=dict(l=20, r=5, t=5, b=20),
#             paper_bgcolor="LightSteelBlue",
        )
#         fig.show()
#         fig.write_image("images/r-lssvm_k_opt_dist_{}.pdf".format(dataset_name))

    for model_type in ['regional']:#['local', 'regional']:
        n_metrics = int((len(df[model_type].columns[6:-4])-1)/4)

        metric_names = [' ']*n_metrics
        count=0
        for text in list(df[model_type].columns[-4-n_metrics:-4]):
            metric_names[count] = text[10:-1]
            count+=1
            
        # get dataframe of the specific dataset and k_opt
        temp = 6+2*n_metrics
        df_dataset = df[model_type].loc[df[model_type]['dataset_name'] == dataset_name
                                       ].iloc[:,temp:(temp+n_metrics+1)]

        x = metric_names
        y = [0]*len(metric_names)
        for i in range(len(df_dataset)):
            for j in range(len(metric_names)):
                column_name = "$k_{opt}$"+" [{}]".format(metric_names[j])
                if df_dataset[column_name].values[i] == df_dataset["$k_{opt}$ [CV]"].values[i]:
                    y[j]+=1
        fig = go.Figure()
        for i in range(len(x)):
            fig.add_trace(go.Bar(
                x=[x[i]], 
                y=[y[i]],
                text=y[i],textposition='auto'))


    #     fig = go.Figure([go.Bar(
    #         x=x,
    #         y=y,
    #         text=y,
    #         textposition='auto',
    #         marker_color='lightsalmon'
    #     )]
    #     )
        fig.update_layout(
#             title_text='Frequência de acerto da proposta ótima para cada métrica no '+ \
#                         'conjunto <b>{}</b> e modelagem <b>{}</b>'.format(dataset_name, model_type),
            yaxis_title = 'Frequência de acerto do k_opt',
            showlegend=False
        )
        fig.update_layout(
            margin=dict(l=20, r=5, t=5, b=20),
#             paper_bgcolor="LightSteelBlue",
        )
        fig.show()
        fig.write_image("images/r-lssvm_metrics-k_opt-hit-frequency_{}.pdf".format(dataset_name))

    display(HTML('<hr>'))


# Comparing L-LSSVM and R-LSSVM:

## $k_{opt}$ comparision

In [None]:
import plotly.graph_objects as go
from IPython.core.display import display, HTML

for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))
    

    # get dataframe of the specific dataset
    df_local    = df['local'].loc[df['local']['dataset_name'] == dataset_name]
    df_regional = df['regional'].loc[df['regional']['dataset_name'] == dataset_name]


    animals=['giraffes', 'orangutans', 'monkeys']
    fig = go.Figure(data=[
        go.Bar(
            name='L-LSSVM', 
            x=df_local["$k_{opt}$ [CV]"].value_counts().index.tolist(), 
            y=df_local["$k_{opt}$ [CV]"].value_counts().values
        ),
        go.Bar(
            name='R-LSSVM', 
            x=df_regional["$k_{opt}$ [CV]"].value_counts().index.tolist(), 
            y=df_regional["$k_{opt}$ [CV]"].value_counts().values
        )
    ])
    # Change the bar mode
    fig.update_layout(barmode='group')
    fig.update_layout(
#         title = "Distribuição do k_opt para as {} rodadas no conjunto <b>{}</b> e modelagem <b>{}</b>".format(
#             len(df_dataset), dataset_name, model_type),
        xaxis_title = 'Número de agrupamentos',
        yaxis_title = 'Frequência',
        bargap=0.4, # gap between bars of adjacent location coordinates
    )
    fig.update_layout(legend=dict(x=.86, y=1))


#         # boxplot to k_{opt}
#         fig = go.Figure(data=[go.Histogram(
#             x=df_dataset["$k_{opt}$ [CV]"].values.tolist(),
#             xbins_size=1,
#             marker_color='rgb(55, 83, 109)'
#         )])

    fig.update_layout(
        margin=dict(l=20, r=5, t=5, b=20),
#         paper_bgcolor="LightSteelBlue"
    )
    
    fig.show()
    fig.write_image("images/r_l-lssvm_k_opt_dist_{}.pdf".format(dataset_name))
    
    display(HTML('<hr>'))

## # of empty regions

In [None]:
import plotly.graph_objects as go
from IPython.core.display import display, HTML

for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))

    # get dataframe of the specific dataset
    df_local    = df['local'].loc[df['local']['dataset_name'] == dataset_name]
    df_regional = df['regional'].loc[df['regional']['dataset_name'] == dataset_name]

    fig = go.Figure(data=[
        go.Bar(
            name='L-LSSVM', 
            x=df_local["# empty regions"].value_counts().index.tolist(), 
            y=df_local["# empty regions"].value_counts().values
        ),
        go.Bar(
            name='R-LSSVM', 
            x=df_regional["# empty regions"].value_counts().index.tolist(), 
            y=df_regional["# empty regions"].value_counts().values
        )
    ])
    # Change the bar mode
    fig.update_layout(barmode='group')
    fig.update_layout(
        xaxis_title = '# empty regions',
        yaxis_title = 'Frequence',
        bargap=0.4, # gap between bars of adjacent location coordinates
    )
    fig.update_layout(legend=dict(x=.86, y=1))


    fig.update_layout(
        margin=dict(l=20, r=5, t=5, b=20),
#         paper_bgcolor="LightSteelBlue"
    )
    
    fig.show()
    fig.write_image("images/r_l-lssvm_empty_region_dist_{}.pdf".format(dataset_name))



    display(HTML('<hr>'))


## # of homogeneous regions:

In [None]:
import plotly.graph_objects as go
from IPython.core.display import display, HTML

for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))

    # get dataframe of the specific dataset
    df_local    = df['local'].loc[df['local']['dataset_name'] == dataset_name]
    df_regional = df['regional'].loc[df['regional']['dataset_name'] == dataset_name]

    fig = go.Figure(data=[
        go.Bar(
            name='L-LSSVM', 
            x=df_local["# homogeneous regions"].value_counts().index.tolist(), 
            y=df_local["# homogeneous regions"].value_counts().values
        ),
        go.Bar(
            name='R-LSSVM', 
            x=df_regional["# homogeneous regions"].value_counts().index.tolist(), 
            y=df_regional["# homogeneous regions"].value_counts().values
        )
    ])
    # Change the bar mode
    fig.update_layout(barmode='group')
    fig.update_layout(
        xaxis_title = '# homogeneous regions',
        yaxis_title = 'Frequence',
        bargap=0.4, # gap between bars of adjacent location coordinates
    )
    fig.update_layout(legend=dict(x=.86, y=1))


    fig.update_layout(
        margin=dict(l=20, r=5, t=5, b=20),
#         paper_bgcolor="LightSteelBlue"
    )
    
    fig.show()
    fig.write_image("images/r_l-lssvm_homogeneous_regions_dist_{}.pdf".format(dataset_name))

    display(HTML('<hr>'))


In [None]:
for dataset_name in datasets:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))
    
    for model_type in ['local', 'regional']:
        print(model_type)
        # get dataframe of the specific dataset
        df_dataset = df[model_type].loc[df[model_type]['dataset_name'] == dataset_name]
#         print('# empty regions')
#         print(df_dataset['# empty regions'].value_counts())
        
        print('# homogeneous regions')
        print(df_dataset['# homogeneous regions'].value_counts())
        
        print(' ')
    
    display(HTML('<hr>'))