# Summarise building density information

Access NISMOD-DB to download building data and summarise regional floor area and footprint density.

In [None]:
import configparser
import glob
import json
import os

import pandas
import geopandas
import requests
import shapely.wkt

In [None]:
def get_auth():    
    # Read connection details
    if 'NISMOD_API_USER' in os.environ and 'NISMOD_API_PASSWORD' in os.environ:
        username = os.environ['NISMOD_API_USER']
        password = os.environ['NISMOD_API_PASSWORD']
    else:
        parser = configparser.ConfigParser()
        parser.read('dbconfig.ini')
        username = parser['nismod-api']['user']
        password = parser['nismod-api']['password']

    return (username, password)

In [None]:
LAD_CODE = 'E06000042'
BUILDINGS_YEAR = 2017
CACHE_PATH = os.path.join('.', 'db-data')
AUTH = get_auth()

In [None]:
arc_lad_codes = [
    "E06000031", "E06000032", "E06000042", "E06000055", "E06000056", "E07000004", "E07000005", 
    "E07000006", "E07000007", "E07000008", "E07000009", "E07000010", "E07000011", "E07000012", 
    "E07000150", "E07000151", "E07000152", "E07000153", "E07000154", "E07000155", "E07000156", 
    "E07000177", "E07000178", "E07000179", "E07000180", "E07000181"]

In [None]:
lads = pandas.read_csv('data_as_provided/arc_dwellings__baseline.csv').lad_uk_2016.unique()
lads[:5]

## Get Buildings

In [None]:
def get_buildings(auth, lad_code, year, force=False):
    try:
        os.mkdir(os.path.join(CACHE_PATH))
    except FileExistsError:
        pass
    buildings_file = os.path.join(CACHE_PATH, "buildings_{}.json".format(lad_code))

    if not os.path.exists(buildings_file) or force:
        r = requests.get(
            'https://www.nismod.ac.uk/api/data/mastermap/buildings',
            auth=auth,
            params={
                'scale': 'lad',
                'area_codes': lad_code,
                'building_year': year
            },
            stream=True
        )
        with open(buildings_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

In [None]:
for arc_lad_code in arc_lad_codes:
    print("Getting", arc_lad_code)
    get_buildings(AUTH, arc_lad_code, BUILDINGS_YEAR)

In [None]:
dfs = []

for arc_lad_code in arc_lad_codes:
    with open(os.path.join(CACHE_PATH, "buildings_{}.json".format(arc_lad_code))) as fh:
        print("Loading", arc_lad_code)
        lad_buildings = json.load(fh)
        df = geopandas.GeoDataFrame(lad_buildings)
        df.geometry = df.geom.apply(lambda wkt: shapely.wkt.loads(wkt))
        dfs.append(df)
buildings = pandas.concat(dfs, axis=0)

In [None]:
buildings.floor_area = buildings.floor_area.astype(float)

In [None]:
buildings.drop("geom" ,axis=1, inplace=True)

In [None]:
buildings.class_code = buildings.class_code.apply(lambda d: json.dumps(d))

In [None]:
buildings.group_ids = buildings.group_ids.apply(lambda d: json.dumps(d))

In [None]:
buildings.head()

In [None]:
buildings.columns

In [None]:
buildings.to_file("arc_buildings.gpkg", driver="GPKG")

## Get generic surfaces

- could attempt to associate to buildings by adjacency?

In [None]:
def get_plots(auth, lad_code, force=False):
    try:
        os.mkdir(os.path.join(CACHE_PATH))
    except FileExistsError:
        pass
    buildings_file = os.path.join(CACHE_PATH, "plots_{}.json".format(lad_code))

    if not os.path.exists(buildings_file) or force:
        r = requests.get(
            'https://www.nismod.ac.uk/api/data/mastermap/areas',
            auth=auth,
            params={
                'scale': 'lad',
                'area_codes': lad_code,
                'classification_codes': 'all'
            },
            stream=True
        )
        with open(buildings_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

In [None]:
for arc_lad_code in arc_lad_codes:
    print("Getting", arc_lad_code)
    get_plots(AUTH, arc_lad_code)

In [None]:
dfs = []

for arc_lad_code in arc_lad_codes:
    with open(os.path.join(CACHE_PATH, "plots_{}.json".format(arc_lad_code))) as fh:
        df = json.load(fh)
        df = geopandas.GeoDataFrame(df)
        df.geometry = df.geom.apply(lambda wkt: shapely.wkt.loads(wkt))
        df.drop("geom" ,axis=1, inplace=True)
        dfs.append(df)

In [None]:
lad_plots = pandas.concat(dfs, axis=0)

In [None]:
lad_plots.descriptive_group = lad_plots.descriptive_group.apply(lambda d: json.dumps(d))

In [None]:
lad_plots.theme = lad_plots.theme.apply(lambda d: json.dumps(d))

In [None]:
lad_plots.theme.unique()

In [None]:
lad_plots.descriptive_group.unique()

In [None]:
lad_plots = lad_plots[lad_plots.descriptive_group == '["General Surface"]']

In [None]:
lad_plots.head()

In [None]:
lad_plots.to_file("arc_surfaces.gpkg", driver="GPKG")

## Get land parcels

In [None]:
def get_parcels(auth, lad_code, force=False):
    try:
        os.mkdir(os.path.join(CACHE_PATH))
    except FileExistsError:
        pass
    buildings_file = os.path.join(CACHE_PATH, "parcels_{}.json".format(lad_code))

    if not os.path.exists(buildings_file) or force:
        r = requests.get(
            'https://www.nismod.ac.uk/api/data/mastermap/landparcels',
            auth=auth,
            params={
                'scale': 'lad',
                'area_codes': lad_code
            },
            stream=True
        )
        with open(buildings_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

In [None]:
dfs = []
for arc_lad_code in arc_lad_codes:
    print("Getting", arc_lad_code)
    get_parcels(AUTH, arc_lad_code)
    
    with open(os.path.join(CACHE_PATH, "parcels_{}.json".format(arc_lad_code))) as fh:
        df = json.load(fh)
        df = geopandas.GeoDataFrame(df)
        df.geometry = df.geom.apply(lambda wkt: shapely.wkt.loads(wkt))
        df.drop("geom" ,axis=1, inplace=True)
        dfs.append(df)
parcels = pandas.concat(dfs, axis=0)

In [None]:
parcels.head()

In [None]:
parcels.to_file("arc_parcels.gpkg", driver="GPKG")

## Get households

In [None]:
def get_households(auth, lad_code, year, force=False):
    try:
        os.mkdir(os.path.join(CACHE_PATH))
    except FileExistsError:
        pass
    hfile = os.path.join(CACHE_PATH, "households_{}.json".format(lad_code))

    if not os.path.exists(hfile) or force:
        r = requests.get(
            'https://www.nismod.ac.uk/api/data/households/households',
            auth=auth,
            params={
                'scale': 'lad',
                'area_codes': lad_code,
                'year': year
            },
            stream=True
        )
        with open(hfile, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

In [None]:
dfs = []
for arc_lad_code in arc_lad_codes:
    print("Getting", arc_lad_code)
    get_households(AUTH, arc_lad_code, BUILDINGS_YEAR)
    
    with open(os.path.join(CACHE_PATH, "households_{}.json".format(arc_lad_code))) as fh:
        df = json.load(fh)
        df = geopandas.GeoDataFrame(df)
        dfs.append(df)
households = pandas.concat(dfs, axis=0)

In [None]:
households.to_csv("arc_households.csv.gz")

In [None]:
lads_11_not_16 = [
    'E06000048',
    'E07000100',
    'E07000104',
    'E07000097',
    'E07000101',
    'E08000020'
]
lads = list(lads)
lads.extend(lads_11_not_16)
lads

In [None]:
dfs = []
for lad_code in lads:
    print("Getting", lad_code)
    get_households(AUTH, lad_code, BUILDINGS_YEAR)
    
    with open(os.path.join(CACHE_PATH, "households_{}.json".format(lad_code))) as fh:
        df = json.load(fh)
        df = geopandas.GeoDataFrame(df)
        dfs.append(df)
all_households = pandas.concat(dfs, axis=0)

In [None]:
all_households.to_csv("all_households.csv.gz")

## Join buildings and households (by id)

- current data has `None` for `household_id` (buildings) and `hh_id` (households)

In [None]:
buildings.columns

In [None]:
buildings.household_id.unique()

In [None]:
households.columns

In [None]:
households.hh_id.unique()

In [None]:
len(buildings)

In [None]:
len(households)

## Get household assignment

In [None]:
def get_assignments(auth, lad_code, year, force=False):
    try:
        os.mkdir(os.path.join(CACHE_PATH))
    except FileExistsError:
        pass
    hfile = os.path.join(CACHE_PATH, "household_assignments_{}.json".format(lad_code))

    if not os.path.exists(hfile) or force:
        r = requests.get(
            'https://www.nismod.ac.uk/api/data/assignment/household_assignment',
            auth=auth,
            params={
                'scale': 'lad',
                'area_codes': lad_code,
                'year': year
            },
            stream=True
        )
        with open(hfile, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    
dfs = []
for arc_lad_code in arc_lad_codes:
    print("Getting", arc_lad_code)
    get_assignments(AUTH, arc_lad_code, BUILDINGS_YEAR, True)
    
    with open(os.path.join(CACHE_PATH, "household_assignments_{}.json".format(arc_lad_code))) as fh:
        df = json.load(fh)
        df = geopandas.GeoDataFrame(df)
        dfs.append(df)
household_assignments = pandas.concat(dfs, axis=0)

## Summarise buildings

In [None]:
summary = df[
    ['oa', 'mistral_function_class', 'mistral_building_class', 'floor_area', 'footprint_area', 'res_count']
].groupby(
    ['oa', 'mistral_function_class', 'mistral_building_class']
).sum()
summary.head()

In [None]:
summary.to_csv('summary_arc_buildings_by_oa.csv')

In [None]:
summary_lad = df[
    ['lad', 'mistral_function_class', 'mistral_building_class', 'floor_area', 'footprint_area', 'res_count']
].groupby(
    ['lad', 'mistral_function_class', 'mistral_building_class']
).sum()
summary_lad.to_csv('summary_arc_buildings_by_lad.csv')
summary_lad.head()