> exploration of the sample OSM data in `test_data/inputs/tl/osm_tl`

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import geopandas as gpd
from types import SimpleNamespace

In [3]:
import povertymapping.osm_data_proc as posm

In [4]:
import sys
import os

### Configuration 
> setup a  configuration object (which can be dictionary or any dict-like object)

The dhs configuration assumes a file structure like the following:

* These output files will be created

```
+ 
+save_path +
           + <dhs_geo_zip_folder>_cluster_coords_osm_agg.csv  # the preprocessed OSM dataset 
                                                              # filename format: 
                                                              # e.g. TLGE71FL_coords_clust_osm_agg.csv
```

* The input files are required:

```
+
+repo_path +
           + data_dir +
                      + osm_folder +
                                     + "shape" +
                                               + <osm_shp_filename> # name of folder 
                                                                    # holding the osm shape file 
                                                                    # e.g. east-timor-latest-free.shp
                                     + "pbf" +
                                             + <osm_pbf_filename> # name of osm pbf file 
                                                                  # (set 'use_pbf=True' to use)
                                                                  # e.g. east-timor-latest.osm.pbf
+save_path +
           + <dhs_geo_zip_folder>_cluster_coords.csv # the geotagged cluster data 
                                                     # created by process_dhs_data 
                                                     # for the given country
                                                     # the dhs_geo_zip_folder should 
                                                     # be the same as the one set
                                                     # for the config for dhs_process_data
        
```

In [5]:
args = SimpleNamespace(slice_interval="[0,100]")

osm_config = dict(
        save_path="../test_data/test_outputs/osm",
        repo_path="../test_data/inputs",
        data_dir="tl", # input dir folder (usually country)
        country="tl", # country
        dhs_folder="dhs_tl", # dhs folder
        dhs_geo_zip_folder="TLGE71FL", # folder holding DHS shape files
        use_pbf=False,
        multiprocess=False,
        args=args,
        osm_country="tl",
        osm_folder="osm_tl",
        sample=False,
        no_samples=60,
        random_sample=False,
        random_seed=42,
        buffer_side_length=4.0,
        crs="4683",
        osm_shp_filename="east-timor-latest-free.shp",
        osm_pbf_filename="east-timor-latest.osm.pbf",
        clust_rad=2000

    )

# you can also create a yaml file or json file
# and load it in.

Run the `process_osm_data`, passing your config object

In [6]:
# uncomment and run the following to clear out the preprocessed files 
!rm -rf {osm_config['save_path']}
!mkdir -p {osm_config['save_path']}

In [7]:
!cp ../data/outputs/dhs_{osm_config['country']}/{osm_config['dhs_geo_zip_folder']}_cluster_coords.csv {osm_config['save_path']}/.

In [8]:
%%time
posm.process_osm_data(osm_config)


Loading osm shape files by layer...
This might take a while...



100%|█████████████████████████████████████████████████████████████████████████████| 455/455 [01:26<00:00,  5.24it/s]


Saving aggregate dataframe...
CPU times: user 1min 29s, sys: 2.66 s, total: 1min 32s
Wall time: 1min 32s


Check that the preprocessed files have been created

In [9]:
from pathlib import Path

In [10]:
posm_df_path = Path(osm_config['save_path'])/f'{osm_config["dhs_geo_zip_folder"]}_cluster_coords_osm_agg.csv'

In [11]:
posm_df = pd.read_csv(posm_df_path)

In [12]:
len(posm_df)

455

In [13]:
posm_df.head()

Unnamed: 0.1,Unnamed: 0,longitude,latitude,building_ct,no_roads,no_primary_roads,no_trunk_roads,marketplace_count,charging_station_count,post_box_count,...,camp_site_count,city_count,convenience_count,supermarket_count,car_repair_count,department_store_count,computer_count,playground_count,monument_count,DHSID
0,0,125.567381,-8.712016,161,84,3,0,0,0,0,...,0,0,0,0,0,0,0,0,0,TL201600000001
1,1,125.590219,-8.730226,29,33,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,TL201600000002
2,2,125.556399,-8.74134,71,73,5,0,0,0,0,...,0,0,0,0,0,0,0,0,0,TL201600000003
3,3,125.535161,-8.811291,0,13,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,TL201600000004
4,4,125.473219,-8.79159,0,10,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,TL201600000005
