# House Prices

In [10]:
from typing import List
from datachef import acquire, preview
from datachef.selection import XlsSelectable

tables: List[XlsSelectable] = acquire.xls.http("https://raw.githubusercontent.com/mikeAdamss/datachef/main/tests/fixtures/xls/house-prices.xls")
preview(tables[3], bounded="A1:M20")

0,1,2,3,4,5,6,7,8,9,10,11,12,13
,A,B,C,D,E,F,G,H,I,J,K,L,M
1.0,"Table 11 Housing market: simple average house prices, by new/other dwellings, type of buyer and region, United Kingdom, from 1992 (quarterly) 1, 2, 3 (previously DCLG table 504)",,,,,,,,,,,,
2.0,,,,,,,,,,,,,
3.0,,,,,New dwellings,,Other dwellings4,,All dwellings,,First time buyers,,Former owner occupiers
4.0,,,,,Price,,Price,,Price,,Price,,Price
5.0,,,,,£,,£,,£,,£,,£
6.0,United Kingdom,,,,,,,,,,,,
7.0,K02000001,,,,,,,,,,,,
8.0,,1992.0,Q1,,,,,,,,,,
9.0,,,Q2,,77360.0,,60210.0,,62255.0,,46919.0,,76988.0


From an xlx source which can be [downloaded here](https://raw.githubusercontent.com/mikeAdamss/datachef/main/tests/fixtures/xls/house-prices.xls).

## Requirements

- We'll take "Year" and "Quarter" from the appropriate values in columns B and C.
- We'll take populated cells on row 4 as "Housing" and we'll strip the "4" notation away.
- We'll take "Area" and "Area Code" from column A (see United Kingdom and K02000001 as the examples).
- We'll call the observations column "Value" and we'll strip any trailing ".0"s.

The key lesson here is the use of `closest` to get the quarter. Remember the "closest" you can be to something on a directional axis is _level with it_ (so in this example: observations on 9 will resolve "closest above" to Q2 **also** on row 9).  

In [11]:
from typing import List
from datachef import acquire, preview, filters
from datachef.direction import up, down, right
from datachef.output import TidyData, Column
from datachef.selection import XlsSelectable

tables: List[XlsSelectable] = acquire.xls.http("https://raw.githubusercontent.com/mikeAdamss/datachef/main/tests/fixtures/xls/house-prices.xls")
table = tables[3]

# Do sensible things
assert table.name == "Table 11"

housing = table.re('New dwellings').assert_one().expand(right).is_not_blank().label_as("Housing")
area_code = table.excel_ref("A").is_not_blank().re("[A-Z][0-9].*").label_as("Area Code")
area = area_code.shift(up).label_as("Area")
year = area.shift(right).expand(down).is_not_blank().label_as("Year")
quarter = year.shift(right).expand(down).is_not_blank().label_as("Quarter")
observations = quarter.fill(right).is_not_blank().filter(filters.is_not_numeric).label_as("Value")

# Create a bounded preview inline but also write the full preview to path
preview(observations, housing, area_code, area, year, quarter, bounded="A1:M20")
preview(observations, housing, area_code, area, year, quarter, path="house-prices.html")

tidy_data = TidyData(
    observations,
    Column(housing.finds_observations_directly(down), apply=lambda x: x.rstrip("4")),
    Column(area.finds_observations_closest(down)),
    Column(area_code.finds_observations_closest(down)),
    Column(year.finds_observations_closest(down)),
    Column(quarter.finds_observations_directly(right)),
    obs_apply = lambda x: x.replace(".0", "")
)

tidy_data.to_csv("house-prices.csv")

0
Value
Housing
Area Code
Area
Year
Quarter

0,1,2,3,4,5,6,7,8,9,10,11,12,13
,A,B,C,D,E,F,G,H,I,J,K,L,M
1.0,"Table 11 Housing market: simple average house prices, by new/other dwellings, type of buyer and region, United Kingdom, from 1992 (quarterly) 1, 2, 3 (previously DCLG table 504)",,,,,,,,,,,,
2.0,,,,,,,,,,,,,
3.0,,,,,New dwellings,,Other dwellings4,,All dwellings,,First time buyers,,Former owner occupiers
4.0,,,,,Price,,Price,,Price,,Price,,Price
5.0,,,,,£,,£,,£,,£,,£
6.0,United Kingdom,,,,,,,,,,,,
7.0,K02000001,,,,,,,,,,,,
8.0,,1992.0,Q1,,,,,,,,,,
9.0,,,Q2,,77360.0,,60210.0,,62255.0,,46919.0,,76988.0


# Outputs

The full preview can be [downloaded here](./house-prices.html).

The tidy data can be [downloaded here](./house-prices.csv) and a full inline preview of the tidydata generated is shown below for those people who'd prefer to scroll.

In [12]:
print(tidy_data)

Value,Housing,Area,Area Code,Year,Quarter
77360.0,New dwellings,United Kingdom,K02000001,1992,Q2
60210.0,Other dwellings,United Kingdom,K02000001,1992,Q2
62255.0,All dwellings,United Kingdom,K02000001,1992,Q2
46919.0,First time buyers,United Kingdom,K02000001,1992,Q2
76988.0,Former owner occupiers,United Kingdom,K02000001,1992,Q2
69893.0,New dwellings,United Kingdom,K02000001,1992,Q3
62513.0,Other dwellings,United Kingdom,K02000001,1992,Q3
63712.0,All dwellings,United Kingdom,K02000001,1992,Q3
48578.0,First time buyers,United Kingdom,K02000001,1992,Q3
79493.0,Former owner occupiers,United Kingdom,K02000001,1992,Q3



