### <center><h1>Evaluate empty and homogeneous regions</h1></center> 

- Empty regions analysis
- Homogeneous regions analysis

In [14]:
import warnings
warnings.filterwarnings('ignore')

In [15]:
import pandas as pd
import numpy as np
import plotly.offline as py

from devcode.simulation import ResultProcessor

from devcode.utils import load_csv_as_pandas
from devcode import GLOBAL_MODEL_RESULT_PATH, LOCAL_MODEL_RESULT_PATH, REGIONAL_MODEL_RESULT_PATH

_dataframes_dict = {
    'global'    : load_csv_as_pandas(path=GLOBAL_MODEL_RESULT_PATH),
    'local'     : load_csv_as_pandas(path=LOCAL_MODEL_RESULT_PATH),
    'regional'  : load_csv_as_pandas(path=REGIONAL_MODEL_RESULT_PATH)
}

ds_names = ['pk', 'vc2c', 'vc3c', 'wf2f', 'wf4f', 'wf24f']

py.init_notebook_mode(connected=True)  # enabling plot within jupyter notebook

ds_names


['pk', 'vc2c', 'vc3c', 'wf2f', 'wf4f', 'wf24f']

### 1. Analysis of empty regions  <a class="anchor" id="r-lssvm"></a>

In [16]:
import plotly.graph_objects as go
from IPython.core.display import display, HTML

for dataset_name in ds_names:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))

    # get dataframe of the specific dataset
    df_local    = _dataframes_dict['local'].loc[_dataframes_dict['local']['dataset_name'] == dataset_name]
    df_regional = _dataframes_dict['regional'].loc[_dataframes_dict['regional']['dataset_name'] == dataset_name]

    fig = go.Figure(data=[
        go.Bar(
            name='L-LSSVM', 
            x=df_local["# empty regions"].value_counts().index.tolist(), 
            y=df_local["# empty regions"].value_counts().values
        ),
        go.Bar(
            name='R-LSSVM', 
            x=df_regional["# empty regions"].value_counts().index.tolist(), 
            y=df_regional["# empty regions"].value_counts().values
        )
    ])
    # Change the bar mode
    fig.update_layout(barmode='group')
    fig.update_layout(
        xaxis_title = '# empty regions',
        yaxis_title = 'Frequence',
        bargap=0.4, # gap between bars of adjacent location coordinates
    )
    fig.update_layout(legend=dict(x=.86, y=1))


    fig.update_layout(
        margin=dict(l=20, r=5, t=5, b=20),
#         paper_bgcolor="LightSteelBlue"
    )
    
    fig.show()

    display(HTML('<hr>'))


### 2. Analysis of homogeneous regions  <a class="anchor" id="r-lssvm"></a>

In [17]:
import plotly.graph_objects as go
from IPython.core.display import display, HTML

for dataset_name in ds_names:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))

    # get dataframe of the specific dataset
    df_local    = _dataframes_dict['local'].loc[_dataframes_dict['local']['dataset_name'] == dataset_name]
    df_regional = _dataframes_dict['regional'].loc[_dataframes_dict['regional']['dataset_name'] == dataset_name]

    fig = go.Figure(data=[
        go.Bar(
            name='L-LSSVM', 
            x=df_local["# homogeneous regions"].value_counts().index.tolist(), 
            y=df_local["# homogeneous regions"].value_counts().values
        ),
        go.Bar(
            name='R-LSSVM', 
            x=df_regional["# homogeneous regions"].value_counts().index.tolist(), 
            y=df_regional["# homogeneous regions"].value_counts().values
        )
    ])
    # Change the bar mode
    fig.update_layout(barmode='group')
    fig.update_layout(
        xaxis_title = '# homogeneous regions',
        yaxis_title = 'Frequence',
        bargap=0.4, # gap between bars of adjacent location coordinates
    )
    fig.update_layout(legend=dict(x=.86, y=1))


    fig.update_layout(
        margin=dict(l=20, r=5, t=5, b=20),
#         paper_bgcolor="LightSteelBlue"
    )
    
    fig.show()

    display(HTML('<hr>'))


In [18]:
for dataset_name in ds_names:
    display(HTML('<center><h1>'+dataset_name+'</h1></center>'))
    
    for model_type in ['local', 'regional']:
        print(model_type)
        # get dataframe of the specific dataset
        df_dataset = _dataframes_dict[model_type].loc[_dataframes_dict[model_type]['dataset_name'] == dataset_name]
#         print('# empty regions')
#         print(df_dataset['# empty regions'].value_counts())
        
        print('# homogeneous regions')
        print(df_dataset['# homogeneous regions'].value_counts())
        
        print(' ')
    
    display(HTML('<hr>'))

local
# homogeneous regions
1    21
2    12
4     7
3     5
0     2
5     2
6     1
Name: # homogeneous regions, dtype: int64
 
regional
# homogeneous regions
1    26
2    11
4     8
3     3
0     1
5     1
Name: # homogeneous regions, dtype: int64
 


local
# homogeneous regions
0    25
1    13
2     6
3     4
4     1
5     1
Name: # homogeneous regions, dtype: int64
 
regional
# homogeneous regions
0    30
1    10
2     8
3     1
4     1
Name: # homogeneous regions, dtype: int64
 


local
# homogeneous regions
0    25
2    12
1     8
3     5
Name: # homogeneous regions, dtype: int64
 
regional
# homogeneous regions
0    30
1    15
2     5
Name: # homogeneous regions, dtype: int64
 


local
# homogeneous regions
27    6
28    6
26    5
29    4
0     3
6     3
31    2
30    2
9     2
5     2
24    2
12    2
19    2
32    2
8     1
23    1
16    1
1     1
2     1
20    1
25    1
Name: # homogeneous regions, dtype: int64
 
regional
# homogeneous regions
3    16
2    13
1    10
4     7
5     2
0     2
Name: # homogeneous regions, dtype: int64
 


local
# homogeneous regions
0     25
1      3
8      3
6      3
4      3
5      2
9      2
2      2
15     1
12     1
14     1
3      1
7      1
11     1
10     1
Name: # homogeneous regions, dtype: int64
 
regional
# homogeneous regions
0    45
1     4
2     1
Name: # homogeneous regions, dtype: int64
 


local
# homogeneous regions
0    50
Name: # homogeneous regions, dtype: int64
 
regional
# homogeneous regions
0    50
Name: # homogeneous regions, dtype: int64
 
