# US Unemployment Example

Example adopted from:
* Kurihara K. (2004). Classification of geospatial lattice data and their graphical representation. *Classification, Clustering, and Data Mining Applications*, (Edited by D.Banks et al.) Springer, 251â€“258.

In [1]:
# %load_ext autoreload
# %autoreload 2
from IPython.display import Markdown, display

import numpy as np
import pandas as pd

df = pd.DataFrame([
    ("AL", 'Alabama', 51, ['FL', 'GA', 'MS', 'TN']),
    ("AK", 'Alaska', 79, ['WA']),
    ('AZ', 'Arizona', 46, ['CA', 'CO', 'NM', 'NV', 'UT']),
    ('AR', 'Arkansas', 53, ['LA', 'MS', 'MO', 'OK', 'TN', 'TX']),
    ('CA', 'California', 63, ['AZ', 'NV', 'OR', 'HI']),
    ('CO', 'Colorado', 33, ['AZ', 'KS', 'NE', 'NM', 'OK', 'UT', 'WY']),
    ('CT', 'Connecticut', 51, ['MA', 'NY', 'RI']),
    ('DE', 'Delaware', 40, ['MD', 'NJ', 'PA']),
    ('FL', 'Florida', 48, ['AL', 'GA']),
    ('GA', 'Georgia', 45, ['AL', 'FL', 'NC', 'SC', 'TN']),
    ('HI', 'Hawaii', 64, ['CA']),
    ('ID', 'Idaho', 53, ['MT', 'NV', 'OR', 'UT', 'WA', 'WY']),
    ('IL', 'Illinois', 47, ['IA', 'IN', 'KY', 'MO', 'WI']),
    ('IN', 'Indiana', 35, ['IL', 'KY', 'MI', 'OH']),
    ('IA', 'Iowa', 33, ['IL', 'MN', 'MO', 'NE', 'SD', 'WI']),
    ('KS', 'Kansas', 38, ['CO', 'MO', 'NE', 'OK']),
    ('KY', 'Kentucky', 54, ['IL', 'IN', 'MO', 'OH', 'TN', 'VA', 'WV']),
    ('LA', 'Louisiana', 61, ['AR', 'MS', 'TX']),
    ('ME', 'Maine', 54, ['NH']),
    ('MD', 'Maryland', 51, ['DE', 'PA', 'VA', 'WV']),
    ('MA', 'Massachusetts', 40, ['CT', 'NH', 'NY', 'RI', 'VT']),
    ('MI', 'Michigan', 42, ['IN', 'OH', 'WI']),
    ('MN', 'Minnesota', 33, ['IA', 'ND', 'SD', 'WI']),
    ('MS', 'Mississippi', 57, ['AL', 'AR', 'LA', 'TN']),
    ('MO', 'Missouri', 42, ['AR', 'IA', 'IL', 'KS', 'KY', 'NE', 'OK', 'TN']),
    ('MT', 'Montana', 54, ['ID', 'ND', 'SD', 'WY']),
    ('NE', 'Nebraska', 26, ['CO', 'IA', 'KS', 'MO', 'SD', 'WY']),
    ('NV', 'Nevada', 41, ['AZ', 'CA', 'ID', 'OR', 'UT']),
    ('NH', 'New Hampshire', 31, ['MA', 'ME', 'VT']),
    ('NJ', 'New Jersey', 51, ['DE', 'NY', 'PA']),
    ('NM', 'New Mexico', 62, ['AZ', 'CO', 'OK', 'TX', 'UT']),
    ('NY', 'New York', 64, ['CT', 'MA', 'NJ', 'PA', 'VT']),
    ('NC', 'North Carolina', 36, ['GA', 'SC', 'TN', 'VA']),
    ('ND', 'North Dakota', 25, ['MN', 'MT', 'SD']),
    ('OH', 'Ohio', 46, ['IN', 'KY', 'MI', 'PA', 'WV']),
    ('OK', 'Oklahoma', 41, ['AR', 'CO', 'KS', 'MO', 'NM', 'TX']),
    ('OR', 'Oregon', 58, ['CA', 'ID', 'NV', 'WA']),
    ('PA', 'Pennsylvania', 52, ['DE', 'MD', 'NJ', 'NY', 'OH', 'WV']),
    ('RI', 'Rhode Island', 53, ['CT', 'MA']),
    ('SC', 'South Carolina', 45, ['GA', 'NC']),
    ('SD', 'South Dakota', 31, ['IA', 'MN', 'MT', 'ND', 'NE', 'WY']),
    ('TN', 'Tennessee', 54, ['AL', 'AR', 'GA', 'KY', 'MO', 'MS', 'NC', 'VA']),
    ('TX', 'Texas', 54, ['AR', 'LA', 'NM', 'OK']),
    ('UT', 'Utah', 31, ['AZ', 'CO', 'ID', 'NM', 'NV', 'WY']),
    ('VT', 'Vermont', 40, ['MA', 'NH', 'NY']),
    ('VA', 'Virginia', 40, ['KY', 'MD', 'NC', 'TN', 'WV']),
    ('WA', 'Washington', 48, ['ID', 'OR', 'AK']),
    ('WV', 'West Virginia', 69, ['KY', 'MD', 'OH', 'PA', 'VA']),
    ('WI', 'Wisconsin', 37, ['IA', 'IL', 'MI', 'MN']),
    ('WY', 'Wyoming', 51, ['CO', 'ID', 'MT', 'NE', 'SD', 'UT'])
], columns=['code', 'name', 'unemployment rate', 'adjacent_codes'])
df = df.reset_index().rename(columns={'index': 'id'})
df['adjacent_id'] = df['adjacent_codes'].map(lambda codes: [df[df['code'] == code].iloc[0].id for code in codes])
df

Unnamed: 0,id,code,name,unemployment rate,adjacent_codes,adjacent_id
0,0,AL,Alabama,51,"[FL, GA, MS, TN]","[8, 9, 23, 41]"
1,1,AK,Alaska,79,[WA],[46]
2,2,AZ,Arizona,46,"[CA, CO, NM, NV, UT]","[4, 5, 30, 27, 43]"
3,3,AR,Arkansas,53,"[LA, MS, MO, OK, TN, TX]","[17, 23, 24, 35, 41, 42]"
4,4,CA,California,63,"[AZ, NV, OR, HI]","[2, 27, 36, 10]"
5,5,CO,Colorado,33,"[AZ, KS, NE, NM, OK, UT, WY]","[2, 15, 26, 30, 35, 43, 49]"
6,6,CT,Connecticut,51,"[MA, NY, RI]","[20, 31, 38]"
7,7,DE,Delaware,40,"[MD, NJ, PA]","[19, 29, 37]"
8,8,FL,Florida,48,"[AL, GA]","[0, 9]"
9,9,GA,Georgia,45,"[AL, FL, NC, SC, TN]","[0, 8, 32, 39, 41]"


In [2]:
id_to_name_dict = df.set_index('id')['code'].to_dict()

In [3]:
data = df['unemployment rate'].to_numpy()
adjacency = np.zeros((len(df),)*2, dtype='int8')
for _, row in df.iterrows():
    i_plus_one = row['id']
    for j_plus_one in row['adjacent_id']:
        adjacency[i_plus_one, j_plus_one] = 1

In [4]:
from echelon.api import EchelonAnalysis
analyzer = EchelonAnalysis()
result = analyzer(data, adjacency)
result

  arr = asarray(arr)


Result_EchelonAnalysis(peak_echelons=[[1], [47], [10, 4, 36], [31], [30], [17, 23], [18], [25], [38]], foundation_echelons=[[16, 41, 42, 3], [11, 49], [37], [0, 19, 29, 6, 8, 12], [46], [2, 34], [9, 39, 24, 21, 35, 27, 20, 45, 44, 7, 15, 48, 32, 13, 5, 14, 22], [40, 43, 28, 26, 33]], oracle=<echelon.oracle.NdarrayEchelonOracle object at 0x7f95d9255ba8>)

In [5]:
display(Markdown('### Peak Echelons'))
display(pd.DataFrame([[id_to_name_dict[idx] for idx in echelon] for echelon in result.peak_echelons]).fillna(''))

display(Markdown('### Foundation Echelons'))
display(pd.DataFrame([[id_to_name_dict[idx] for idx in echelon] for echelon in result.foundation_echelons]).fillna(''))

### Peak Echelons

Unnamed: 0,0,1,2
0,AK,,
1,WV,,
2,HI,CA,OR
3,NY,,
4,NM,,
5,LA,MS,
6,ME,,
7,MT,,
8,RI,,


### Foundation Echelons

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,KY,TN,TX,AR,,,,,,,,,,,,,
1,ID,WY,,,,,,,,,,,,,,,
2,PA,,,,,,,,,,,,,,,,
3,AL,MD,NJ,CT,FL,IL,,,,,,,,,,,
4,WA,,,,,,,,,,,,,,,,
5,AZ,OH,,,,,,,,,,,,,,,
6,GA,SC,MO,MI,OK,NV,MA,VA,VT,DE,KS,WI,NC,IN,CO,IA,MN
7,SD,UT,NH,NE,ND,,,,,,,,,,,,


In [6]:
pd.options.display.max_columns = None # Do not omit columns

display(Markdown('## Echelon Cluster Table'))
_df = analyzer.cluster(result).table
_df = _df.applymap(lambda ids: [id_to_name_dict[index] for index in ids])
_df['representatives'] = _df['representatives'].map(lambda x: x[0] + ' Zone')
pd.DataFrame(_df['indices'].to_list()).fillna('').set_index(_df['representatives'].rename('Zone'))

## Echelon Cluster Table

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28
Zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
AK Zone,AK,WA,,,,,,,,,,,,,,,,,,,,,,,,,,,
WV Zone,WV,KY,TN,AR,PA,AL,MD,NJ,FL,IL,OH,GA,SC,MO,MI,OK,VA,DE,KS,WI,NC,IN,CO,IA,MN,SD,UT,NE,ND
HI Zone,HI,CA,OR,ID,WY,WA,AZ,NV,CO,SD,UT,NE,ND,,,,,,,,,,,,,,,,
NY Zone,NY,PA,MD,NJ,CT,OH,MI,MA,VA,VT,DE,WI,NC,IN,IA,MN,SD,NH,NE,ND,,,,,,,,,
NM Zone,NM,TX,AR,AZ,MO,OK,NV,KS,CO,IA,MN,SD,UT,NE,ND,,,,,,,,,,,,,,
LA Zone,LA,MS,TN,TX,AR,AL,FL,GA,SC,MO,OK,VA,KS,NC,CO,IA,MN,SD,UT,NE,ND,,,,,,,,
ME Zone,ME,NH,,,,,,,,,,,,,,,,,,,,,,,,,,,
MT Zone,MT,ID,WY,WA,NV,CO,SD,UT,NE,ND,,,,,,,,,,,,,,,,,,,
RI Zone,RI,CT,MA,VT,NH,,,,,,,,,,,,,,,,,,,,,,,,
