# Create Base Location Data For Each Year

In [None]:
import geopandas  as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import h3 as h3

# import libraries needed for upload / download to AWS
import boto3
import awswrangler
from fiona.session import AWSSession
import fiona
# set name of S3 bucket
s3_bucket = 'traffic-data-bucket'


import re

import os
from os.path import isfile, join
from pathlib import Path
from os import listdir
import os
os.getcwd()

path =  Path(os.getcwd())
root = path.parent.absolute()

root

# 1. Import Data

#### 1.1 Base Table with node/edge info and city/district lables
##### LA County shape file transposed to Uber Hexegons at level 10. ~150 square meters
##### This process takes a shape file and maps it to hex files for a given level. The output of the mapping is the a unique hex_id for the hexegon and the shape geometry
##### https://h3geo.org/docs/core-library/restable/

In [None]:
from aws_secrets import aws_access_key_id, aws_secret_access_key, aws_session_token

my_session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    aws_session_token = aws_session_token

)

In [None]:
with fiona.Env(session=AWSSession(my_session)):
    gdf_all = gpd.read_file(f's3://{s3_bucket}/h3_processed_data/base_map_hex_all/base_map_hex_all.shp')


print(gdf_all.shape)
gdf_all.sample(2)

In [None]:
def rking_neighbors(row, skins):
    neighbors = h3.k_ring(row.hex_id, skins)
    neighbors_list = list(neighbors)
    return(neighbors_list)

gdf_all['hex_neighbors_1_ids'] = gdf_all.apply(lambda x: rking_neighbors(x, skins = 1), axis=1)
gdf_all['hex_neighbors_2_ids'] = gdf_all.apply(lambda x: rking_neighbors(x, skins = 2), axis=1)

#### 1.2 Collision Data

In [None]:
# collisions = pd.read_csv(root / 'X.data' / 'h3_processed_data' / 'collisions_hex.csv')
collisions = awswrangler.s3.read_csv(path=f's3://{s3_bucket}/h3_processed_data/collisions_hex.csv', boto3_session=my_session)

collisions = collisions[~(collisions.hex_id == '0')]
print(collisions.shape)
collisions.sample(2)

# 2. Collisions 

### 2.1 Prep collision file by making a wide table.  One unique row per hex id

In [None]:
collisions_year_grp = collisions.groupby(["hex_id", "collision_year"])["accident_count"].agg('sum').reset_index()
print(collisions_year_grp.columns)
years = list(collisions_year_grp.collision_year.unique())
years.sort()
years

In [None]:
collisions_year_grp['hex_neighbors_1_ids'] = collisions_year_grp.apply(lambda x: rking_neighbors(x, skins = 1), axis=1)
collisions_year_grp['hex_neighbors_2_ids'] = collisions_year_grp.apply(lambda x: rking_neighbors(x, skins = 2), axis=1)
collisions_year_grp.sample(2)


In [None]:
collisions_year_grp.shape

In [None]:
collisions_year_grp[['hex_id', 'hex_neighbors_1_ids']].explode('hex_neighbors_1_ids')

In [None]:
#for year in years[1:]:
coll_year_dict = {}
gdf_all_join = gdf_all[["hex_id", "hex_neighbors_1_ids", "hex_neighbors_2_ids"]]

for year in years+[2022]:
#for year in [2020]:
    print(year)
    gdf_all_join = gdf_all[["hex_id", "hex_neighbors_1_ids", "hex_neighbors_2_ids"]]
    df_prev1 = collisions_year_grp[["hex_id", "accident_count"]][collisions_year_grp.collision_year == (year-1)]
    #print(df_prev1.columns)
    df_prev2 = collisions_year_grp[["hex_id", "accident_count"]][collisions_year_grp.collision_year == (year-2)]
    gdf_all_join = gdf_all_join.merge(df_prev1, on = 'hex_id', how = 'left')
    gdf_all_join = gdf_all_join.merge(df_prev2, on = 'hex_id', how = 'left').fillna(0)
    #display(gdf_all_join.sample())
    gdf_all_join.columns = ["hex_id", "hex_neighbors_1_ids", "hex_neighbors_2_ids", "prev1_yr_coll_cnt", "prev2_yr_coll_cnt"]
    
    #1 yr prev
    #ring 1
    
    gdf_all_join_tall = gdf_all_join[['hex_id', 'hex_neighbors_1_ids']].explode('hex_neighbors_1_ids')
    gdf_all_join_tall = gdf_all_join_tall.merge(df_prev1[['hex_id', 'accident_count']], 
                                        left_on = 'hex_neighbors_1_ids', 
                                        right_on = 'hex_id',
                                        how = 'inner')
    n_grp = gdf_all_join_tall.groupby('hex_id_x')['accident_count'].agg('sum').reset_index()
    n_grp.columns = ['hex_id_x', 'prev1_yr_coll_neighbor1']
    n_grp['prev1_yr_coll_neighbor1_ave'] = n_grp['prev1_yr_coll_neighbor1']/7
    gdf_all_join = gdf_all_join.merge(n_grp, left_on = 'hex_id', right_on = 'hex_id_x', how = 'left')
    gdf_all_join.drop(columns='hex_id_x', inplace = True)
    #ring 2
    gdf_all_join_tall = gdf_all_join[['hex_id', 'hex_neighbors_2_ids']].explode('hex_neighbors_2_ids')
    gdf_all_join_tall = gdf_all_join_tall.merge(df_prev1[['hex_id', 'accident_count']], 
                                        left_on = 'hex_neighbors_2_ids', 
                                        right_on = 'hex_id',
                                        how = 'inner')
    n_grp = gdf_all_join_tall.groupby('hex_id_x')['accident_count'].agg('sum').reset_index()
    n_grp.columns = ['hex_id_x', 'prev1_yr_coll_neighbor2']
    n_grp['prev1_yr_coll_neighbor2_ave'] = n_grp['prev1_yr_coll_neighbor2']/19
    gdf_all_join = gdf_all_join.merge(n_grp, left_on = 'hex_id', right_on = 'hex_id_x', how = 'left')
    gdf_all_join.drop(columns='hex_id_x', inplace = True)
    
    #2 yr prev
    #ring 1
    gdf_all_join_tall = gdf_all_join[['hex_id', 'hex_neighbors_1_ids']].explode('hex_neighbors_1_ids')
    gdf_all_join_tall = gdf_all_join_tall.merge(df_prev2[['hex_id', 'accident_count']], 
                                        left_on = 'hex_neighbors_1_ids', 
                                        right_on = 'hex_id',
                                        how = 'inner')
    n_grp = gdf_all_join_tall.groupby('hex_id_x')['accident_count'].agg('sum').reset_index()
    n_grp.columns = ['hex_id_x', 'prev2_yr_coll_neighbor1']
    n_grp['prev2_yr_coll_neighbor1_ave'] = n_grp['prev2_yr_coll_neighbor1']/7
    gdf_all_join = gdf_all_join.merge(n_grp, left_on = 'hex_id', right_on = 'hex_id_x', how = 'left')
    gdf_all_join.drop(columns='hex_id_x', inplace = True)
    #2 yr prev
    gdf_all_join_tall = gdf_all_join[['hex_id', 'hex_neighbors_2_ids']].explode('hex_neighbors_2_ids')
    gdf_all_join_tall = gdf_all_join_tall.merge(df_prev2[['hex_id', 'accident_count']], 
                                        left_on = 'hex_neighbors_2_ids', 
                                        right_on = 'hex_id',
                                        how = 'inner')
    n_grp = gdf_all_join_tall.groupby('hex_id_x')['accident_count'].agg('sum').reset_index()
    n_grp.columns = ['hex_id_x', 'prev2_yr_coll_neighbor2']
    n_grp['prev2_yr_coll_neighbor2_ave'] = n_grp['prev2_yr_coll_neighbor2']/19
    gdf_all_join = gdf_all_join.merge(n_grp, left_on = 'hex_id', right_on = 'hex_id_x', how = 'left')
    gdf_all_join.drop(columns='hex_id_x', inplace = True)
    
    #print(gd_all_1_tall.columns)
    coll_year_dict[year] = gdf_all_join

In [None]:
len(coll_year_dict)

In [None]:
coll_year_dict[2022]

## 3. Upload to S3 bucket as CSV

In [None]:
# name of folder that will store the data uploaded to s3
raw_path_dir = 'joined_data'

for key in coll_year_dict:
    df = coll_year_dict[key]
    print(key)
    raw_path = f"s3://{s3_bucket}/{raw_path_dir}/{'base_location_' + str(key) + '_collision_data.csv'}"
    awswrangler.s3.to_csv(df=df, path = raw_path, index=False,
                       boto3_session=my_session, use_threads=True
                       )