# Import & Setup

In [1]:
import sys
sys.path.append(r"C:/Users/mikha/Dropbox/mikhael_misc/Projects/My-Package")

import pandas as pd
import numpy as np
from myfunctions import clean_path
import h3

df = pd.read_csv(filepath_or_buffer=clean_path(r"C:/Users/mikha/Dropbox/mikhael_misc/Projects/Policing Thesis/Modified Dataset - 2021 - One Row per Stop.csv"),
                 index_col='Stop ID')

## Remove "Sparse" from col names

In [16]:
sparse_cols = df.filter(like='Sparse').columns

# replace non sparse-filled cols with sparse-filled cols
for sparse_col in sparse_cols:
    df[sparse_col.replace(' - Sparse Filled', '')] = df[sparse_col]
    
df.drop(columns=sparse_cols, inplace=True)
    

# H3 Encoding

* Resolution options:
  * Official list w/ area https://h3geo.org/docs/core-library/restable/
  * Visualize at https://observablehq.com/@four43/h3-index-visualizer
* Looks like resolution=(7 or 8) is the way to go

In [2]:
def create_h3_series(dataframe:pd.DataFrame, resolution:int) -> pd.Series:
    return h3.geo_to_h3(lat=dataframe['Longitude'], 
                        lng=dataframe['Latitude'],
                        resolution=resolution)

## Create per cols

In [9]:
def create_per_cols(dataframe:pd.DataFrame, numerator_cols:list, denominator_cols:list) -> None:
    for num_col in numerator_cols:
        for denom_col in denominator_cols:
            dataframe[f'{num_col} per {denom_col}'] = dataframe[num_col] / dataframe[denom_col]

## Create grouped dataframes

In [5]:
def create_grouped_dataframes(dataframe:pd.DataFrame, resolutions:list, numerator_cols:list, denominator_cols:list) -> None:
    """Creates 'grouped_dataframes' dict """
    
    global grouped_dataframes
    grouped_dataframes = dict()
    for resolution in resolutions:
        # Make geo-encoding column (hexagon encoding)
        dataframe[f'H3 Encoding - Res={resolution}'] = dataframe.apply(create_h3_series, resolution=resolution, axis=1)
        
        # Create dataframes grouped by geo-encoding
        
        # NEXT TO IMPLEMENT - grouped.agg({'numberA':'sum', 'numberB':'min'}) (as template)

        
        grouped_dataframes[resolution] = dataframe.groupby(by=f'H3 Encoding - Res={resolution}')
        
        # Create "... Per Stop", "... Per Citation"
        create_per_cols(dataframe=dataframe,
                        numerator_cols=numerator_cols,
                        denominator_cols=denominator_cols)
        
        

# run
num_cols = ['Alcohol', 'Search Conducted', ] # these should be actions the police can decide on - e.g., stopping somebody
denom_cols = ['Fatal', 'Accident', 'Personal Injury', 'Property Damage'] # these should be negative traffic events that simply occur - e.g., accidents, fatalities
create_grouped_dataframes(dataframe=df,
                          resolutions=[7,8], 
                          numerator_cols=num_cols, denominator_cols=denom_cols)

In [18]:
df.columns

Index(['Agency', 'SubAgency', 'Description', 'Location', 'Latitude',
       'Longitude', 'Accident', 'Belts', 'Personal Injury', 'Property Damage',
       'Fatal', 'Commercial License', 'HAZMAT', 'Commercial Vehicle',
       'Alcohol', 'Work Zone', 'Search Conducted', 'Search Disposition',
       'Search Outcome', 'Search Reason', 'Search Reason For Stop',
       'Search Type', 'Search Arrest Reason', 'State', 'VehicleType', 'Year',
       'Make', 'Model', 'Color', 'Violation Type', 'Charge', 'Article',
       'Contributed To Accident', 'Race', 'Gender', 'Arrest Type', 'Citation',
       'Speed Limit', 'Recorded Speed', 'H3 Encoding - Res=7',
       'H3 Encoding - Res=8'],
      dtype='object')