# Two dimensional toy data (2)

Example adopted from

* Kurihara K. (2004). Classification of geospatial lattice data and their graphical representation. Classification, Clustering, and Data Mining Applications, (Edited by D.Banks et al.) Springer, 251–258.

In [1]:
# %load_ext autoreload
# %autoreload 2
# %matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
import echelon
echelon.__version__

'1.1.0'

In [3]:
import numpy as np
from echelon.api import TwoDimEchelonAnalysis
from echelon.test import _visualize_echelons

data = np.array([
    [10, 24, 10, 15, 10],
    [10, 10, 14, 22, 10],
    [10, 13, 19, 23, 25],
    [20, 21, 12, 11, 17],
    [16, 10, 10, 18, 10]
])
data

array([[10, 24, 10, 15, 10],
       [10, 10, 14, 22, 10],
       [10, 13, 19, 23, 25],
       [20, 21, 12, 11, 17],
       [16, 10, 10, 18, 10]])

## Echelon construction

In [4]:
analyzer = TwoDimEchelonAnalysis(adjacency_type='8')
result = analyzer(data)
print(_visualize_echelons(data.shape, result.peak_echelons, result.foundation_echelons))

[[7 2 7 6 7]
 [7 7 7 1 7]
 [7 7 5 1 1]
 [3 3 7 7 6]
 [6 7 7 4 7]]


## Echelon dendrogram

In [5]:
print(analyzer.dendrogram(result))

E7(7): [24, 22, 21, 10, 9, 6, 5, 4, 2, 0, 18, 17, 11, 7]
 (max: 14)

├── E6(19): [3, 20, 19]
│    (max: 17)
│   
│   ├── E5(12): [12]
│   │    (max: 19)
│   │   
│   │   ├── E3(16): [15, 16]
│   │   │    (max: 21)
│   │   │   
│   │   └── E1(14): [8, 13, 14]
│   │        (max: 25)
│   │       
│   └── E4(23): [23]
│        (max: 18)
│       
└── E2(1): [1]
     (max: 24)
    


## Echelon clusters

In [6]:
import pandas as pd
pd.options.display.max_columns = None # Do not omit columns

_df = analyzer.cluster(result)
_df['representatives'] = _df['representatives'].map(lambda x: str(x[0]) + ' Zone')
pd.DataFrame(_df['indices'].to_list()).fillna('').set_index(_df['representatives'].rename('Zone'))

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
14 Zone,14,13,8,12,19,3,7,11.0,17.0,18.0,2.0,4.0,5.0,6.0,9.0,10.0,21.0,22.0,24.0
1 Zone,1,7,11,17,18,0,2,5.0,6.0,10.0,21.0,22.0,24.0,,,,,,
16 Zone,16,15,12,20,7,11,17,18.0,2.0,5.0,6.0,10.0,21.0,22.0,24.0,,,,
23 Zone,23,19,17,18,21,22,24,,,,,,,,,,,,


## Hotspot detection

In [7]:
from IPython.display import Markdown, display
display(Markdown('### Poisson score'))
display(analyzer.hotspots(result))

display(Markdown('### Binomial score'))
display(analyzer.hotspots(result, (np.ones(data.shape) * 1000, data), score='binomial'))

### Poisson score

Unnamed: 0,spot,score,c(Z),log_lambda
9,"[12, 16, 15, 14, 13, 8, 23, 19, 20]",1.343756,181,1.343756
10,"[12, 16, 15, 14, 13, 8, 23, 19, 20, 3]",1.342216,196,1.342216
8,"[12, 16, 15, 14, 13, 8, 23, 19]",1.338101,165,1.338101
11,"[19, 20, 3, 12, 16, 15, 14, 13, 8, 23, 1, 7]",1.307554,234,1.307554
6,"[16, 15, 14, 13, 8, 12]",1.297634,130,1.297634
2,"[14, 13, 8]",1.099206,70,1.099206
1,"[14, 13]",0.945417,48,0.945417
5,"[16, 15]",0.877064,41,0.877064
0,[14],0.652171,25,0.652171
3,[1],0.633069,24,0.633069


### Binomial score

Unnamed: 0,spot,score,c(Z),log_lambda
11,"[19, 20, 3, 12, 16, 15, 14, 13, 8, 23, 1, 7]",17.35288,234,17.35288
9,"[12, 16, 15, 14, 13, 8, 23, 19, 20]",12.889175,181,12.889175
8,"[12, 16, 15, 14, 13, 8, 23, 19]",12.769402,165,12.769402
10,"[12, 16, 15, 14, 13, 8, 23, 19, 20, 3]",12.666625,196,12.666625
6,"[16, 15, 14, 13, 8, 12]",11.526677,130,11.526677
2,"[14, 13, 8]",7.301756,70,7.301756
1,"[14, 13]",5.310987,48,5.310987
0,[14],3.054091,25,3.054091
3,[1],2.522426,24,2.522426
5,"[16, 15]",2.150766,41,2.150766
