# Generate Hexagon Labels for Districts and Cities

In [None]:
import geopandas  as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import libpysal
from tobler.util import h3fy
from h3 import h3
#import descartes
#from tobler.area_weighted import area_interpolate

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
from os.path import isfile, join
from pathlib import Path
from os import listdir
import os
os.getcwd()

path =  Path(os.getcwd())
root = path.parent.absolute()

h3_level = 8

# import libraries needed for upload / download to AWS
import boto3
import awswrangler
from fiona.session import AWSSession
import fiona

## 1. Create Boto3 session
Start by creating a boto3 session so that we can connect to the S3 bucket.

In [None]:
from aws_secrets import aws_access_key_id, aws_secret_access_key, aws_session_token

my_session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    aws_session_token = aws_session_token

)

## 2. Generate hexagon labels for cities
### 2.1 Read shapefile of Los Angeles County city boundaries from S3 bucket

In [None]:
with fiona.Env(session=AWSSession(my_session)):
    gdf = gpd.read_file("s3://traffic-data-bucket/raw_data/la_county_website_data/LA_County_City_Boundaries/LA_County_City_Boundaries.shp")

gdf.sample(2)

In [None]:
gdf.columns

In [None]:
gdf_h39 = h3fy(gdf, h3_level)
gdf_h39.reset_index(inplace = True)
gdf_h39.columns = ['hex_id', 'geometry']
gdf_h39['model_layer_centroid'] = gdf_h39['hex_id'].apply(lambda x: h3.h3_to_geo(x))

In [None]:
print(gdf_h39.shape)
gdf_h39.sample(4)

In [None]:
gdf_h39[['latitude', 'longitude']] = pd.DataFrame(gdf_h39['model_layer_centroid'].tolist(), index=gdf_h39.index)
gdf_h39.sample(4)

In [None]:
gdf_h39 = gdf_h39[['hex_id', 'latitude','longitude']]
gdf_h39.sample(3)

In [None]:
gdf_h39 = gpd.GeoDataFrame(
    gdf_h39, geometry=gpd.points_from_xy(gdf_h39.longitude,gdf_h39.latitude, 
                                               crs="EPSG:4326"
                                              ))
display(gdf_h39.sample(5))
gdf_h39.shape

In [None]:
joined = gpd.sjoin(left_df = gdf_h39.to_crs(epsg=3857),
                   right_df = gdf[['CITY_NAME', 'CITY_TYPE', 'CITY_LABEL', 'FEAT_TYPE', 'geometry']], 
                   how ='left') 
joined.sample(2)

In [None]:
joined = joined[['hex_id', 'CITY_NAME', 'CITY_TYPE', 'CITY_LABEL', 'FEAT_TYPE']].drop_duplicates()
joined = joined.groupby('hex_id').first()
joined.reset_index(inplace = True)
display(joined.sample(2))
len(joined.hex_id.unique())
joined.shape

### Write to CSV and upload to S3

In [None]:
awswrangler.s3.to_csv(df=joined, path = 's3://traffic-data-bucket/h3_processed_data/city_labels_hex.csv', index=False,
                       boto3_session=my_session, use_threads=True
                       )

## 3. Generate hexagon labels for districts
### 3.1 Read shapefile of Los Angeles County districts boundaries from S3 bucket

In [None]:
with fiona.Env(session=AWSSession(my_session)):
    gdf_dist = gpd.read_file("s3://traffic-data-bucket/raw_data/la_county_website_data/Supervisorial_Districts_(2011)/Supervisorial_Districts_(2011).shp")

gdf_dist.sample(2)

In [None]:
# gdf_dist = gpd.read_file(root / 'X.data' / 'raw_data' /  'la_county_website_data' / 'Supervisorial_Districts_(2011)'/ 'Supervisorial_Districts_(2011).shp')
# gdf_dist.sample(2)

In [None]:
gdf_dist_h39 = h3fy(gdf_dist, h3_level)
gdf_dist_h39.reset_index(inplace = True)
gdf_dist_h39.columns = ['hex_id', 'geometry']
gdf_dist_h39['model_layer_centroid'] = gdf_dist_h39['hex_id'].apply(lambda x: h3.h3_to_geo(x))

In [None]:
gdf_dist_h39[['latitude', 'longitude']] = pd.DataFrame(gdf_dist_h39['model_layer_centroid'].tolist(), index=gdf_dist_h39.index)
gdf_dist_h39.sample(4)

In [None]:
gdf_dist_h39 = gdf_dist_h39[['hex_id', 'latitude','longitude']]
gdf_dist_h39.sample(3)

In [None]:
gdf_dist_h39 = gpd.GeoDataFrame(
    gdf_dist_h39, geometry=gpd.points_from_xy(gdf_dist_h39.longitude,gdf_dist_h39.latitude, 
                                               crs="EPSG:4326"
                                              ))
display(gdf_dist_h39.sample(5))
gdf_dist_h39.shape

In [None]:
#gdf_dist_h39 = gdf_dist_h39.to_crs(epsg=3857)
joined_dist = gpd.sjoin(left_df=gdf_dist_h39.to_crs(epsg=3857),
                           right_df=gdf_dist[['DISTRICT', 'geometry']], 
                           how='left') #Join
joined_dist.sample(2)

In [None]:
joined_dist = joined_dist[['hex_id', 'DISTRICT']].drop_duplicates()
joined_dist = joined_dist.groupby('hex_id').first()
joined_dist.reset_index(inplace = True)
display(joined_dist.sample(2))
len(joined_dist.hex_id.unique())
joined_dist.shape

### Write to CSV and upload to S3

In [None]:
awswrangler.s3.to_csv(df=joined_dist, path = 's3://traffic-data-bucket/h3_processed_data/district_labels_hex.csv', index=False,
                       boto3_session=my_session, use_threads=True
                       )