# Generate a Hexgrid Geodataframe for Los Angeles County

In [None]:
import geopandas  as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import libpysal
from tobler.util import h3fy
from h3 import h3

import os
from os.path import isfile, join
from pathlib import Path
from os import listdir
import os
os.getcwd()

path =  Path(os.getcwd())
root = path.parent.absolute()

# import libraries needed for upload / download to AWS
import boto3
import awswrangler
from fiona.session import AWSSession
import fiona
# set name of s3 bucket
s3_bucket = 'traffic-data-bucket'

#### Troubleshooting AWS Wrangler
If you cannot import AWS Wrangler due to the error `AttributeError: module 'multiprocessing' has no attribute 'connection'` then try downgrading some of the dependencies by using: `pip install fsspec==0.6.3 PyAthena==1.10.2 s3fs==0.4.0`

## 1. Create Boto3 session
Start by creating a boto3 session so that we can connect to the S3 bucket.

In [None]:
from aws_secrets import aws_access_key_id, aws_secret_access_key, aws_session_token

my_session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    aws_session_token = aws_session_token

)

## 2. Read shapefile of Los Angeles County from S3 bucket

Next, use fiona to read the shapefile into GeoPandas.  If you try and read the shapefile from the S3 bucket directly into GeoPandas without using Fiona it will issue a timeout error.  The link below explains in further detail.

https://github.com/Toblerity/Fiona/issues/1055

In [None]:
with fiona.Env(session=AWSSession(my_session)):
    gdf = gpd.read_file(f"s3://{s3_bucket}/raw_data/la_county_website_data/County_Boundaries/County_Boundaries.shp")

## 3. Create a hexgrid over Los Angeles County

In [None]:
gdf = gdf.loc[gdf['OBJECTID'].eq(22)]

In [None]:
type(gdf)

In [None]:
# set the hexagon level
h3_level = 8
print('Hexagon level:', h3_level)

In [None]:
gdf_hex = h3fy(source=gdf, resolution=h3_level)

In [None]:
print(gdf_hex.columns)
gdf_hex.reset_index(inplace = True)
gdf_hex.shape

In [None]:
gdf_hex.sample(3)

## 4. Visualize hexgrids over Los Angeles County

In [None]:
import contextily as ctx
ax = gdf_hex.plot(markersize=0.01, alpha = .3, edgecolors= "black", figsize=(20, 25))

ctx.add_basemap(ax)
plt.show()

## 5. Create shapefile of hexgrids

In [None]:
gdf_hex[['hex_id', 'geometry']].to_file((root / 'X.data' / 'h3_processed_data' / 'base_map_hex_all' / 'base_map_hex_all.shp'), driver='ESRI Shapefile')

### 5.1 Create Boto3 client session and upload shapefile data to S3 bucket

In [None]:
# create client session
s3 = my_session.client('s3')

In [None]:
basemap_data_path = root / 'X.data' / 'h3_processed_data' / 'base_map_hex_all' 

for file in os.listdir(basemap_data_path):
    upload_file_key = 'h3_processed_data/base_map_hex_all/' + str(file)
    local_file_path = '../X.data/h3_processed_data/base_map_hex_all/' + str(file)
    # upload to s3
    s3.upload_file(Filename = local_file_path, Bucket = s3_bucket, Key = upload_file_key)

## 6. Create a Shapefile of hexagons for Power BI
In order to create a shapefile we will need to retrieve the area and length for each geometry point.

In [None]:
gdf_hex['area'] = gdf_hex['geometry'].area
gdf_hex['length'] = gdf_hex['geometry'].length

Now that we have the area and length we can create the shapefile.

In [None]:
gdf_hex.to_file((root / 'X.data' / 'Power_BI' / 'full_base_map_hex_all.shp'), driver='ESRI Shapefile')