# Variables

In [None]:
YOUR_NAME = 'chris'

AWS_PROFILE = 'CitiesUserPermissionSet'


# List of cities to process
cities = ["Belo Horizonte", "Campinas", "Bogota", "Nairobi", "Bamako", 
        "Lagos", "Accra", "Abidjan", "Mogadishu", "Cape Town", 
        "Maputo", "Luanda"]

test_cities = ["Belo Horizonte"]
#cities = test_cities

cities = [city.replace(' ', '_') for city in cities]

number_of_cities = len(cities)

print(f'City count: {number_of_cities}')

grid_size = 200

# Setup

In [None]:
%load_ext autoreload

In [None]:
MAIN_PATH = "s3://wri-cities-sandbox/identifyingLandSubdivisions/data"
INPUT_PATH = f'{MAIN_PATH}/input'
CITY_INFO_PATH = f'{INPUT_PATH}/city_info'
EXTENTS_PATH = f'{CITY_INFO_PATH}/extents'
BUILDINGS_PATH = f'{INPUT_PATH}/buildings'
ROADS_PATH = f'{INPUT_PATH}/roads'
INTERSECTIONS_PATH = f'{INPUT_PATH}/intersections'
GRIDS_PATH = f'{INPUT_PATH}/city_info/grids'
OUTPUT_PATH = f'{MAIN_PATH}/output'
OUTPUT_PATH_CSV = f'{OUTPUT_PATH}/csv'
OUTPUT_PATH_RASTER = f'{OUTPUT_PATH}/raster'
OUTPUT_PATH_PNG = f'{OUTPUT_PATH}/png'
OUTPUT_PATH_RAW = f'{OUTPUT_PATH}/raw_results'

In [None]:
# Check s3 connection using AWS_PROFILE=CitiesUserPermissionSet profile 
import boto3

session = boto3.Session(profile_name=AWS_PROFILE)
s3 = session.client('s3')

# export CitiesUserPermissionSet profile to use in the next cells
import os
os.environ['AWS_PROFILE'] = AWS_PROFILE


s3.list_buckets()

In [None]:
import coiled

cluster = coiled.Cluster(
    workspace="wri-cities-data",
    name=f'ils-{YOUR_NAME}',
    region="us-west-2",
    arm=True,
    worker_vm_types="r8g.xlarge",
    spot_policy="spot",
    n_workers=4,
)
client = cluster.get_client()

print(f"Started a new Dask client on Coiled. Dashboard is available at {client.dashboard_link}")


# RUN

## Functions

In [None]:
import dask_geopandas as dgpd
from dask import delayed, compute, visualize
from dask.diagnostics import ProgressBar
%autoreload
from citywide_calculation import get_utm_crs

@delayed
def get_epsg(city_name):
    urban_extent = f'{EXTENTS_PATH}/{city_name}/{city_name}_urban_extent.geoparquet'
    extent = dgpd.read_parquet(urban_extent)
    geometry = extent.geometry[0].compute()
    epsg = get_utm_crs(geometry)
    print(f'{city_name} EPSG: {epsg}')
    return epsg

@delayed
def load_dataset(path, epsg=None):
    """Load a single parquet dataset"""
    dataset = dgpd.read_parquet(path, npartitions=2)
    if epsg:
        dataset = dataset.to_crs(epsg=epsg)
    return dataset

@delayed
def row_count(dgdf):
    """Count the rows in a dataframe"""
    row_count = dgdf.map_partitions(len).compute().sum()

    return row_count


def test_math(input):
    return input + input

%autoreload
from metrics_calculation import metric_4_share_4way_intersections

@delayed
def metrics(city_name):
    grid_cell_count = 0
    paths = {
        'grid': f'{GRIDS_PATH}/{city_name}/{city_name}_{str(grid_size)}m_grid.geoparquet',
        'buildings': f'{BUILDINGS_PATH}/{city_name}/Overture_building_{city_name}.geoparquet',
        'roads': f'{ROADS_PATH}/{city_name}/{city_name}_OSM_roads.geoparquet',
        'intersections': f'{INTERSECTIONS_PATH}/{city_name}/{city_name}_OSM_intersections.geoparquet'
    }
    # Get EPSG
    epsg = get_epsg(city_name)
    # Load grid
    grid = load_dataset(paths['grid'], epsg=epsg).compute()

    cells = grid.index.size
    grid_cell_count += cells.compute()

    # Load buildings
    buildings = load_dataset(paths['buildings'], epsg=epsg)
    #total_buildings = row_count(buildings).compute()
    #print(total_buildings)
    # Load roads
    roads = load_dataset(paths['roads'], epsg=epsg)
    # Load intersections
    intersections = load_dataset(paths['intersections'], epsg=epsg).compute()
    print(type(intersections))
    intersections_3plus = intersections[intersections.street_count >= 3]
    print(type(intersections_3plus))
    intersections_4way = intersections[intersections.street_count == 4]
    print(type(intersections_4way))

    joined_intersections_3plus = dgpd.sjoin(intersections_3plus, grid, predicate='within')
    counts_intersections_3plus = joined_intersections_3plus.groupby('index_right').size()
    grid['intersections_3plus'] = grid.index.map(counts_intersections_3plus).fillna(0).astype(int)

    joined_intersections_4way = dgpd.sjoin(intersections_4way, grid, predicate='within')
    counts_intersections_4way = joined_intersections_4way.groupby('index_right').size()
    grid['intersections_4way'] = grid.index.map(counts_intersections_4way).fillna(0).astype(int)

    grid['m4_new'] = grid['intersections_4way'] / grid['intersections_3plus']

    path = f'{OUTPUT_PATH}/city_info/grids/{city_name}/{city_name}_{str(grid_size)}m_grid_{YOUR_NAME}.geoparquet'
    grid.to_parquet(path)
    return grid_cell_count, path






In [None]:
print(cities)

# Create delayed tasks for counting
grid_calculations = []

for city_name in cities:
    grid_calc = metrics(city_name)
    grid_calculations.append(grid_calc)

#visualize(*grid_calulations)
calculated_grids = compute(*grid_calculations)
calculated_grids




In [None]:
# Sum the total number of grid cells
total_grid_cells = sum([grid_cells for grid_cells, path in calculated_grids])
print(f'Total grid cells: {total_grid_cells}')