# Hierarchy in Indexes

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv(
    'WSH_HYGIENE_BASIC.csv', 
    header=[0, 2],
    index_col=0,
)  
df.head()

In [None]:
# How coud we have made something like the chart above with the basics of Pandas
df.loc['Afghanistan':'Bangladesh', '2015']

In [None]:
rural = {
    'Afghanistan': 29.0,
    'Algeria': 73.0,
    'Angola': 15.0,
    'Armenia': 77.0,
    'Bangladesh': 31.0,
}

urban = {
    'Afghanistan': 64.0,
    'Algeria': 88.0,
    'Angola': 37.0,
    'Armenia': 93.0,
    'Bangladesh': 58.0,
}

total = {
    'Afghanistan': 38.0,
    'Algeria': 84.0,
    'Angola': 25.0,
    'Armenia': 87.0,
    'Bangladesh': 40.0,
}

data = {
    'Rural': pd.Series(rural),
    'Urban': pd.Series(urban),
    'Total': pd.Series(total),
}

df = pd.DataFrame(data)  # Not quite it..
df

In [None]:
columns = pd.MultiIndex.from_product([['2015'], ['Rural', 'Total', 'Urban']])
columns

In [None]:
index = pd.Index(['Afghanistan', 'Algeria', 'Angola', 'Armenia', 'Bangladesh'])
index

In [None]:
values = df.values
values

In [None]:
df = pd.DataFrame(values, index=index, columns=columns)
df

In [None]:
df.stack()  # Move a level of columns to be part of the row index

In [None]:
df.stack().stack()  # Move another level of column to be part of the row index

In [None]:
df.stack().stack().unstack()  # Move a level of index to be part of the columns

In [None]:
df.stack().stack().unstack().unstack()  # Move a level of index to be part of the columns

### Indexing

In [None]:
df = pd.read_csv(
    'WSH_HYGIENE_BASIC.csv', 
    header=[0, 2],
    index_col=0,
)  
df.head()

In [None]:
df.loc[:, '2015'].head()  # First index operation operates on the outer layer of the hierarchical index.

In [None]:
df.loc[:, ('2015', 'Rural')].head()  # Second moves us down through the second level of the index, use tuple

In [None]:
df.loc['Algeria', ('2015', 'Rural')]

### Slicing

In [None]:
df.loc['Chad':'Costa Rica']  # Slicing still works

In [None]:
df.loc['Chad':'Costa Rica', '2010':'2015']  # UnsortedIndexError, hmmm

In [None]:
df.columns.lexsort_depth  # Looks like the column indexes are not sorted by Pandas terms

In [None]:
df = df.sort_index(axis=1)  # Sort the column index with axis = 1
df.head()  # Notice years ascend now

In [None]:
df.columns.lexsort_depth

In [None]:
df.loc['Chad':'Costa Rica', '2010':'2015']

In [None]:
df.loc['Chad':'Costa Rica', ('2010':'2015', 'Rural')]  # SyntaxErrors

In [None]:
years = pd.IndexSlice['2010':'2015', 'Rural']
df.loc['Chad':'Costa Rica', years]

In [None]:
# Setting, Reseting indexes
df = pd.read_csv(
    'WSH_HYGIENE_BASIC.csv', 
    header=[0, 2],
    index_col=0,
)  
df.head()

In [None]:
df.reset_index(col_fill='Ct').head()  # Turn the country index into a regular column.

In [None]:
# https://catalog.data.gov/dataset/demographic-statistics-by-zip-code-acfc9
df = pd.read_csv(
    'Demographic_Statistics_By_Zip_Code.csv', 
)  
df.head()

In [None]:
df.set_index('JURISDICTION NAME').head()  # Take a current column and make it an index

In [None]:
# Aggregations
df = pd.read_csv(
    'WSH_HYGIENE_BASIC.csv', 
    header=[0, 2],
    index_col=0,
)  
df.head()

In [None]:
df.min(axis=1, level=0).head()  # Across all locations...

In [None]:
df.mean(axis=1, level=1).head()  # For all years...

We have enough tools to do some basic analysis!