# Two dimensional toy data (3)

Example adopted from

* 石岡文生, & 栗原考次. (2012). Echelon 解析に基づくスキャン法による ホットスポット検出について. 統計数理, 60(1), 93–108.

In [1]:
# %load_ext autoreload
# %autoreload 2
# %matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
import echelon
echelon.__version__

'1.0.3'

In [3]:
from IPython.display import Markdown, display
import numpy as np

### Data
data = np.array([
    [2, 5, 15, 9],
    [7, 21, 18, 4],
    [4, 3, 6, 5],
    [6, 5, 4, 2],
    [3, 1, 9, 4],
    [18, 27, 21, 24]
])

## Echelon construction

In [4]:
from echelon.api import TwoDimEchelonAnalysis
analyzer = TwoDimEchelonAnalysis()
result = analyzer(data)
result

Result_EchelonAnalysis(peak_echelons=[[21], [23], [5, 6, 2, 3, 4, 10, 1, 11], [12, 13]], foundation_echelons=[[22, 20, 18], [7, 8, 14, 19, 16, 9, 0, 15, 17]], hierarchy_tree=Node('/5'), oracle=<echelon.oracle.NdarrayEchelonOracle object at 0x7f97d01026a0>)

## Echelon dendrogram

In [5]:
import itertools
_idx_names = [alphabet + num for num, alphabet in itertools.product('123456', 'ABCD')]
idx_map = lambda idx: _idx_names[idx]
print(analyzer.dendrogram(result, {'idx_map': idx_map}))

E6(D2,A3,C4,D5): [B5, D4, A1, B3, A5, D5, C4, A3, D2]
 (max: 4)
├── E5(C6): [C5, A6, C6]
│    (max: 21)
│   ├── E2(D6): [D6]
│   │    (max: 24)
│   └── E1(B6): [B6]
│        (max: 27)
├── E4(A4): [B4, A4]
│    (max: 6)
└── E3(B2): [D3, B1, C3, A2, D1, C1, C2, B2]
     (max: 21)


## Echelon clusters

In [6]:
import pandas as pd
pd.options.display.max_columns = None # Do not omit columns

_df = analyzer.cluster(result)
_df['representatives'] = _df['representatives'].map(lambda x: str(x[0]) + ' Zone')
pd.DataFrame(_df['indices'].to_list()).fillna('').set_index(_df['representatives'].rename('Zone'))

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
Zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
21 Zone,21,22,20,18,14,19,16,15.0,17.0,,,,,
23 Zone,23,22,18,14,19,15,17,,,,,,,
5 Zone,5,6,2,3,4,10,1,11.0,7.0,8.0,14.0,9.0,0.0,15.0
12 Zone,12,13,8,14,16,9,15,17.0,,,,,,


## Hotspot detection

### Binomial score

In [7]:
from IPython.display import display
total_count_data = np.array(
    [[1000]*4]*6
)
scan_result = analyzer.hotspots(result, (total_count_data, data), 'binomial')
scan_result['spot_names'] = scan_result['spot'].map(lambda spot: [idx_map(i) for i in spot])
display(scan_result)
hotspots = scan_result['spot'][:5]

for i, spot in enumerate(hotspots):
    m = np.zeros(data.shape, dtype='int8')
    for inds in spot:
        m[np.unravel_index(inds, shape=data.shape)] = 1
    print('Hotspot candidate', i)
    print(m, end='\n'*2)

Unnamed: 0,spot,score,c(Z),log_lambda,spot_names
11,"[23, 21, 22, 20]",35.107119,90,35.107119,"[D6, B6, C6, A6]"
12,"[23, 21, 22, 20, 18]",31.093692,99,31.093692,"[D6, B6, C6, A6, C5]"
10,"[23, 21, 22]",29.612942,72,29.612942,"[D6, B6, C6]"
0,[21],11.847901,27,11.847901,[B6]
4,"[5, 6, 2]",11.415365,54,11.415365,"[B2, C2, C1]"
3,"[5, 6]",9.549231,39,9.549231,"[B2, C2]"
5,"[5, 6, 2, 3]",9.298289,63,9.298289,"[B2, C2, C1, D1]"
1,[23],8.584302,24,8.584302,[D6]
6,"[5, 6, 2, 3, 4]",6.79855,70,6.79855,"[B2, C2, C1, D1, A2]"
2,[5],5.741914,21,5.741914,[B2]


Hotspot candidate 0
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [1 1 1 1]]

Hotspot candidate 1
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 1 0]
 [1 1 1 1]]

Hotspot candidate 2
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 1 1]]

Hotspot candidate 3
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]]

Hotspot candidate 4
[[0 0 1 0]
 [0 1 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]



In [8]:
from IPython.display import display

## Poisson score
scan_result = analyzer.hotspots(result)
scan_result['spot_names'] = scan_result['spot'].map(lambda spot: [idx_map(i) for i in spot])
display(scan_result)
hotspots = scan_result['spot'][:5]

for i, spot in enumerate(hotspots):
    m = np.zeros(data.shape, dtype='int8')
    for inds in spot:
        m[np.unravel_index(inds, shape=data.shape)] = 1
    print('Hotspot candidate', i)
    print(m, end='\n'*2)

Unnamed: 0,spot,score,c(Z),log_lambda,spot_names
12,"[23, 21, 22, 20, 18]",1.084036,99,1.084036,"[D6, B6, C6, A6, C5]"
11,"[23, 21, 22, 20]",1.071376,90,1.071376,"[D6, B6, C6, A6]"
9,"[5, 6, 2, 3, 4, 10, 1, 11]",1.063436,86,1.063436,"[B2, C2, C1, D1, A2, C3, B1, D3]"
8,"[5, 6, 2, 3, 4, 10, 1]",1.051372,81,1.051372,"[B2, C2, C1, D1, A2, C3, B1]"
7,"[5, 6, 2, 3, 4, 10]",1.036769,76,1.036769,"[B2, C2, C1, D1, A2, C3]"
10,"[23, 21, 22]",1.023113,72,1.023113,"[D6, B6, C6]"
6,"[5, 6, 2, 3, 4]",1.015581,70,1.015581,"[B2, C2, C1, D1, A2]"
5,"[5, 6, 2, 3]",0.98516,63,0.98516,"[B2, C2, C1, D1]"
4,"[5, 6, 2]",0.935254,54,0.935254,"[B2, C2, C1]"
3,"[5, 6]",0.814508,39,0.814508,"[B2, C2]"


Hotspot candidate 0
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 1 0]
 [1 1 1 1]]

Hotspot candidate 1
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [1 1 1 1]]

Hotspot candidate 2
[[0 1 1 1]
 [1 1 1 0]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]

Hotspot candidate 3
[[0 1 1 1]
 [1 1 1 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]

Hotspot candidate 4
[[0 0 1 1]
 [1 1 1 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]

