In [10]:
import os
import pandas as pd
from geo import get_place_list, get_place_data
from params import DATA_DIR

Load some data

* `all_geos` is a list of all geographies mentioned in the the geography tree csv file
* `population_data` is population estimates for a (some of) the geographies
* `council_tax_data` is estimates of people in receipt of council tax support
* `clif_data` is data bout children living in poverty 

In [11]:
population_data = pd.read_csv(f'{DATA_DIR}/population-estimates/population-estimates.csv')

In [12]:
gva = pd.read_csv(f'{DATA_DIR}/gva/gva.csv')

In [13]:
area_of_place =pd.read_csv(f'{DATA_DIR}/geo/area_of_places.csv')

In [14]:
households = pd.read_csv(f'{DATA_DIR}/households/households.csv')

In [15]:
council_tax_data = pd.read_csv(f'{DATA_DIR}/council-tax-support/council-tax-support.csv')
council_tax_data = council_tax_data[council_tax_data.date == max(council_tax_data.date)]

In [16]:
clif_data = pd.read_csv(f'{DATA_DIR}/clif/clif_REL.csv')
clif_data = clif_data[
    (clif_data['Age of Child (years and bands)'] == 'Total') &
    (clif_data['Gender of Child'] == 'Total') &
    (clif_data['Family Type'] == 'Total') &
    (clif_data['Work Status'] == 'Total') &
    (clif_data.Year == max(clif_data.Year))
]

Concatenate the loaded data, then pivot into a table with a line per geography code. Filter this to only include geographies that are in the canonical list of areas.

In [18]:
place_data = pd.concat([
    population_data,
    area_of_place,
    gva,
    households,
    council_tax_data,
    clif_data
]).pivot(index='geography_code', columns='variable_name', values='value')
place_data = place_data.loc[place_data.index.isin(get_place_list())]
place_data = place_data.merge(get_place_data(), left_index=True, right_index=True, how='outer')
place_data.index.name = 'geography_code'
place_data

Unnamed: 0_level_0,Area in sq km,GVA,Number of households,Number of persons,number_of_children,pensioners,working_age,ancestors,parents,children,name,type
geography_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
E05000650,,,,14133.0,,,,"[E08000001, E47000001, E12000002, E12999901]",[E08000001],[],Astley Bridge,WD22
E05000651,,,,11331.0,,,,"[E08000001, E47000001, E12000002, E12999901]",[E08000001],[],Bradshaw,WD22
E05000652,,,,14078.0,,,,"[E08000001, E47000001, E12000002, E12999901]",[E08000001],[],Breightmet,WD22
E05000653,,,,13503.0,,,,"[E08000001, E47000001, E12000002, E12999901]",[E08000001],[],Bromley Cross,WD22
E05000654,,,,16828.0,,,,"[E08000001, E47000001, E12000002, E12999901]",[E08000001],[],Crompton,WD22
...,...,...,...,...,...,...,...,...,...,...,...,...
E47000003,2022.032423,57909.0,1035190.0,2349987.0,,,,"[E12000003, E12999901]",[E12000003],"[E08000032, E08000033, E08000034, E08000035, E...",West Yorkshire,CAUTH22
E47000004,722.669297,33598.0,729960.0,1551722.0,,,,"[E12000002, E12999901]","[E12000002, E12000002]","[E08000011, E08000012, E08000013, E08000014, E...",Liverpool City Region,CAUTH22
E47000006,801.149717,13951.0,317080.0,678173.0,,,,"[E12000001, E12999901]",[E12000001],"[E06000001, E06000002, E06000003, E06000004, E...",Tees Valley,CAUTH22
E47000010,2567.223988,22244.0,550480.0,1139626.0,,,,"[E12000001, E12999901]",[E12000001],"[E08000023, E08000024, E08000037, E06000047]",North East,CAUTH22


Finally, write the data to an interim parquet and json file for later usage.

In [24]:
os.makedirs(f'{DATA_DIR}/interim/', exist_ok=True)
place_data.to_parquet(f'{DATA_DIR}/interim/place_data.parquet')
place_data.reset_index().to_json(f"{DATA_DIR}/interim/place_data.json", orient="records")