# One dimensional toy data

Example adopted from

* Kurihara K. (2004). Classification of geospatial lattice data and their graphical representation. Classification, Clustering, and Data Mining Applications, (Edited by D.Banks et al.) Springer, 251–258.

In [1]:
# %load_ext autoreload
# %autoreload 2
# %matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
import echelon
echelon.__version__

'1.0.3'

In [3]:
import numpy as np

### Data
h = np.array([1, 2, 3, 4, 3, 4, 5, 4, 3, 2, 3, 4, 5, 6, 5, 6, 7, 6, 5, 4, 3, 2, 1, 2, 1])

## Echelon construction

In [4]:
from echelon.api import OneDimEchelonAnalysis
analyzer = OneDimEchelonAnalysis()
result = analyzer(h)
result

Result_EchelonAnalysis(peak_echelons=[[16, 17, 15], [13], [6, 5, 7], [3], [23]], foundation_echelons=[[12, 14, 18, 19, 11, 10, 20], [2, 4, 8], [1, 9, 21], [0, 22, 24]], hierarchy_tree=Node('/8'), oracle=<echelon.oracle.NdarrayEchelonOracle object at 0x7f823011a588>)

In [5]:
def _echelon_indicator(echelon):
    A = np.zeros(h.shape, dtype='int8')
    for e in echelon:
        A[e] = 1
    return A

for i, echelon in enumerate(result.peak_echelons):
    print(f'Peak Echelon {i+1}')
    print(_echelon_indicator(echelon))

for i, echelon in enumerate(result.foundation_echelons):
    print(f'Foundation Echelon {i+1}')
    print(_echelon_indicator(echelon))


Peak Echelon 1
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0]
Peak Echelon 2
[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
Peak Echelon 3
[0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Peak Echelon 4
[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Peak Echelon 5
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
Foundation Echelon 1
[0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0]
Foundation Echelon 2
[0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Foundation Echelon 3
[0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
Foundation Echelon 4
[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1]


## Echelon dendrogram

### Draw with numerical indices

In [6]:
print(analyzer.dendrogram(result))

E9(0,22,24): [24, 22, 0]
 (max: 1)

├── E8(1,9,21): [21, 9, 1]
│    (max: 2)
│   
│   ├── E7(2,4,8): [8, 4, 2]
│   │    (max: 3)
│   │   
│   │   ├── E4(3): [3]
│   │   │    (max: 4)
│   │   │   
│   │   └── E3(6): [7, 5, 6]
│   │        (max: 5)
│   │       
│   └── E6(12,14,18): [20, 10, 11, 19, 18, 14, 12]
│        (max: 5)
│       
│       ├── E2(13): [13]
│       │    (max: 6)
│       │   
│       └── E1(16): [15, 17, 16]
│            (max: 7)
│           
└── E5(23): [23]
     (max: 2)
    


### Draw with alphabetical labels (using custom string builder)

In [7]:
import string

def _echelon_to_str(echelon_id, _echelon, _max_idx, value, plot_config_dict):
    text = ''
    _max_items = ",".join([string.ascii_uppercase[i] for i in _max_idx])
    _echelon_items = ', '.join(map(lambda i: string.ascii_uppercase[i], reversed(_echelon)))
    return f'E{echelon_id+1}({_max_items}): [{_echelon_items}]\n (max: {value})\n'

print(analyzer.dendrogram(result, plot_config_dict={
    '_echelon_to_str': _echelon_to_str
}))

E9(A,W,Y): [Y, W, A]
 (max: 1)

├── E8(B,J,V): [V, J, B]
│    (max: 2)
│   
│   ├── E7(C,E,I): [I, E, C]
│   │    (max: 3)
│   │   
│   │   ├── E4(D): [D]
│   │   │    (max: 4)
│   │   │   
│   │   └── E3(G): [H, F, G]
│   │        (max: 5)
│   │       
│   └── E6(M,O,S): [U, K, L, T, S, O, M]
│        (max: 5)
│       
│       ├── E2(N): [N]
│       │    (max: 6)
│       │   
│       └── E1(Q): [P, R, Q]
│            (max: 7)
│           
└── E5(X): [X]
     (max: 2)
    


## Echelon clusters

In [8]:
import pandas as pd
pd.options.display.max_columns = None # Do not omit columns

_df = analyzer.cluster(result)
_df['representatives'] = _df['representatives'].map(lambda x: str(x[0]) + ' Zone')
pd.DataFrame(_df['indices'].to_list()).fillna('').set_index(_df['representatives'].rename('Zone'))

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8
Zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
16 Zone,16,17,15,14.0,18.0,19.0,20.0,21.0,22.0
13 Zone,13,12,14,11.0,10.0,9.0,,,
6 Zone,6,5,7,4.0,8.0,9.0,,,
3 Zone,3,2,4,1.0,0.0,,,,
23 Zone,23,22,24,,,,,,


## Hotspot detection

In [9]:
analyzer.hotspots(result)

Unnamed: 0,spot,score,c(Z),log_lambda
7,"[13, 16, 17, 15, 12, 14, 18, 19]",0.640049,44,0.640049
8,"[13, 16, 17, 15, 12, 14, 18, 19, 11]",0.639074,48,0.639074
9,"[13, 16, 17, 15, 12, 14, 18, 19, 11, 10]",0.633183,51,0.633183
6,"[13, 16, 17, 15, 12, 14, 18]",0.633183,40,0.633183
10,"[13, 16, 17, 15, 12, 14, 18, 19, 11, 10, 20]",0.622703,54,0.622703
5,"[13, 16, 17, 15, 12, 14]",0.613028,35,0.613028
4,"[13, 16, 17, 15, 12]",0.578439,30,0.578439
17,"[3, 6, 5, 7, 2, 4, 8]",0.538302,26,0.538302
16,"[3, 6, 5, 7, 2, 4]",0.4992,23,0.4992
15,"[3, 6, 5, 7, 2]",0.450414,20,0.450414
