# Human Development

In [None]:
from typing import List
from datachef import acquire, preview
from datachef.selection import XlsxSelectable

tables: List[XlsxSelectable] = acquire.xlsx.http("https://raw.githubusercontent.com/mikeAdamss/datachef/main/tests/fixtures/xlsx/human-development-2008.xlsx")
preview(tables[0], bounded="A1:M32")

From an xlsx source which can be [downloaded here](https://raw.githubusercontent.com/mikeAdamss/datachef/main/tests/fixtures/xlsx/human-development-2008.xlsx).

## Requirements

- We're going to take the values of HDI Rank as well as Human Poverty Index Value and Percent as observations under the column name "Value" and differentiate them with a column of "Category".
- We're going to take columns of "Year", "Area" and "HDI Category" in all cases.

**Notes**

This is a good example of using multiple passes over a single table to construct your tidy data output. In this instance we're going to use a python dictionary to minamise the code repetition required to do so.

In [None]:
from typing import List
from datachef import acquire, filters, preview
from datachef.direction import right, down, left, up
from datachef.output import TidyData, Column
from datachef.selection import XlsxSelectable

tables: List[XlsxSelectable] = acquire.xlsx.http("https://raw.githubusercontent.com/mikeAdamss/datachef/main/tests/fixtures/xlsx/human-development-2008.xlsx")
table = tables[0]

# We don't want the extensive footers
unwanted = table.re("NOTES").assert_one().expand(right).expand(down)

# Input dict of  <category>: <observations>
input_dict = {
    "HDI Rank": (table.excel_ref("B").is_not_blank().shift(left) - unwanted),
    "Human Poverty Index: Rank": (table.re("Rank").assert_one().fill(down).is_not_blank() - unwanted),
    "Human Poverty Index: Value (%)": (table.re("Value").assert_one().fill(down).is_not_blank() - unwanted)
}
    
# Selection that are the same for each iteration
area = table.excel_ref("B").is_not_blank().label_as("Area")
hdi_category = table.excel_ref("B").is_blank().shift(left).label_as("HDI Category")
year_value = table.source.split(".")[2].split("-")[-1]
    
tidy_data_list = []

# Use a counter for deciding the direction for area as it changes based on
# the observable values being extracted.
i = 0
for category, observations in input_dict.items():
    preview(observations, hdi_category, area, bounded="A12:M25")
    tidy_data_list.append(
        TidyData(
            observations.label_as("Value"),
            Column(hdi_category.finds_observations_closest(down)),
            Column(area.finds_observations_directly(left if i == 0 else right)),
            Column.constant("Year", year_value),
            Column.constant("Category", category)
            )
        )
    i += 1

all_tidy_data = TidyData.from_tidy_list(tidy_data_list)
all_tidy_data.to_csv("human-development.csv")


# Outputs

The tidy data can be [downloaded here](./human-development.csv) and a full inline preview of the tidydata generated is shown below for those people who'd prefer to scroll.

In [None]:
print(all_tidy_data)