In [None]:
import geopandas as gp
from shapely import wkt
from shapely.geometry import Point, Polygon
from shapely.ops import unary_union
import pandas as pd
import numpy as np
from pprint import pprint
import os
import glob
import openpyxl
import matplotlib.pyplot as plt
import plotly.express as px #if using plotly
import folium

## Read in files

In [None]:
pd.set_option('display.max_columns', None)  # display all columns
pd.options.display.float_format = '{:20,.2f}'.format  # suppress scientific notation

In [None]:
ca_state = gp.read_file("data/CA_State_TIGER2016.shp")  # CA state
ca_counties = gp.read_file("data/CA_Counties_TIGER2016.shp")  # CA counties
ca_bg = gp.read_file("data/tl_2022_06_bg.shp")  # CA block groups

In [None]:
# Load census block group level data from EJscreen
# Source: https://www.epa.gov/ejscreen/download-ejscreen-data
# This is the 2017-2021 5-year ACS average data
ejscreen = pd.read_excel("data/CA_EJSCREEN_2022_Full_with_AS_CNMI_GU_VI.xlsx", index_col=None, header=0)

In [None]:
# read in cleaned and combined flares data
all_flares = gp.read_file("data/all_flares.shp")

In [None]:
# set common crs for project
projcrs = 4326
ca_state = ca_state.to_crs(projcrs)
ca_counties = ca_counties.to_crs(projcrs)
ca_bg = ca_bg.to_crs(projcrs)

ca_bg.rename(columns={'GEOID':'ID'}, inplace=True)  # match column names for merging
ca_bg['ID'] = ca_bg['ID'].astype(np.int64)

In [None]:
print(f"{len(ejscreen['ID'])} block groups in the EJScreen data \n")
print(f"{len(ca_bg['ID'])} block groups in the CA block group shapefile\n")
ca_bg_joined = pd.merge(ca_bg, ejscreen, on='ID')
print(f"{len(ca_bg['ID'].unique())-len(ca_bg_joined)} block groups are missing after merge")

In [None]:
# subset flares to only those in Cali
ca_flares = gp.sjoin(all_flares, ca_counties, how = "inner", predicate = 'within')
print(f'Flares found: {len(ca_flares)}')
ca_flares.drop('index_right', axis=1, inplace=True)

# set col list for BCM_avg calculation
col_list = ['BCM_2012','BCM_2013','BCM_2014','BCM_2015','BCM_2016','BCM_2017',
            'BCM_2018','BCM_2019','BCM_2020','BCM_2021']

# add new column for average BCM across all years
ca_flares['BCM_avg'] = ca_flares[col_list].mean(axis=1)  

In [None]:
# ca_polys_subset = gp.sjoin(ca_bg_joined, ca_flares, how='inner', predicate='contains')

In [None]:
# epsg3310: https://epsg.io/3310-1739
# units: meters
meters_crs = 3310
# ca_polys_subset = ca_polys_subset.to_crs(meters_crs)
ca_flares = ca_flares.to_crs(meters_crs)
ca_bg_joined = ca_bg_joined.to_crs(meters_crs)

In [None]:
# # Create "flares present" binary dependent variable
# ca_bg_joined = ca_bg_joined.assign(flares_present=ca_bg_joined['geometry'].isin(ca_polys_subset['geometry']).astype(int))

# print(f"Block groups containing flares (0=no, 1=yes):\n")
# print(ca_bg_joined['flares_present'].value_counts())

### 5km Buffer Analysis

1) set 5km buffers around flares  
2) Subset census data to only to race and age columns  
3) count # of people or create proportion of those columns that are anywhere within the combined buffer  
4) same for outside the buffer  
5) Create table that summarizes the proportions for in-buffer and out-buffer

In [None]:
# 1) set 5km buffers around flares and unary_union() into single multipolygon
ca_flares["buffer_5000m"] = ca_flares['geometry'].buffer(distance = 5000)


In [None]:
# ID, ACSTOTPOP, UNDER5, OVER64, MINORPOP, LOWINCOME, D_PM25_2
# total population, under 5yr, over 64yr, people of color, low income, EJ index for PM2.5

bg_formodel = ca_bg_joined[['ID', 'ACSTOTPOP', 'UNDER5', 'OVER64', 'MINORPOP', 'LOWINCOME', 'D_PM25_2', 'geometry']]

In [None]:
# match crs
ca_flares = ca_flares.to_crs(meters_crs)
bg_formodel = bg_formodel.to_crs(meters_crs)

# sjoin() doesn't seem to allow picking a specific geometry col.
# Have to manually set it to the buffers rather than the flare points
ca_flares = ca_flares.set_geometry('buffer_5000m')  # set to the buffers rather than the points

# Spatial overlay operation to find only the areas that are in both geometries. 
# i.e. only the block group areas that are within any buffer zone
bg_flares = gp.overlay(ca_flares, bg_formodel, how='intersection')

In [None]:
# Calculate the area of each block group within the buffer zone
for bg in bg_formodel['geometry']:
    bg_flares['intersection_area'] = bg_flares.geometry.area

### It's not calculating correctly when a block group is completely contained within a buffer. So I've set the ceiling of possible proportions to 1. Band-aid fix. 
### Does this mean the area calculation is not right either?

In [None]:
# Calculate the proportion of each block group within the buffer zone
bg_flares['bg_5kbuf_prop'] = bg_flares['intersection_area'] / bg_formodel.geometry.area

# It's not calculating correctly when a block group is completely contained within a buffer.
# so set the ceiling of possible proportions to 1. Band-aid fix. 
# Does this mean the area calculation is not right either?
bg_flares['bg_5kbuf_prop'] = bg_flares['bg_5kbuf_prop'].clip(0, 1)

# calculate the outside-buffer proportion
bg_flares['bg_5kbuf_out_prop'] = 1 - bg_flares['bg_5kbuf_prop']

# Apply the proportion to each demographic variable to find counts by variable
demo_vars = ['ACSTOTPOP', 'UNDER5', 'OVER64', 'MINORPOP', 'LOWINCOME']
for var in demo_vars:
    bg_flares[var + '_5kbuf_count'] = bg_flares[var] * bg_flares['bg_5kbuf_prop']

# find overall proportions for each demo var by dividing their count by their total population    
for var in demo_vars:
    bg_flares[var + '_bg_totprop'] = bg_flares[var] / bg_flares['ACSTOTPOP']
    

In [None]:
prop_vars = ['ACSTOTPOP_bg_totprop', 'UNDER5_bg_totprop', 'OVER64_bg_totprop', 'MINORPOP_bg_totprop', 'LOWINCOME_bg_totprop']
in_buffer_prop = bg_flares[prop_vars].mean()
out_buffer_prop = 1-in_buffer_prop

# create a table from the in-buffer and outside-buffer proportions
proportions = pd.concat([in_buffer_prop, out_buffer_prop], axis=1)
proportions.columns = ['In_Buffer', 'Outside_Buffer']

# table that compares the in-buffer proportions to the outside-buffer proportions
print(proportions)

In [None]:
count_vars = ['ACSTOTPOP_5kbuf_count', 'UNDER5_5kbuf_count', 'OVER64_5kbuf_count', 'MINORPOP_5kbuf_count', 'LOWINCOME_5kbuf_count']

in_buffer_tot = bg_flares[count_vars].sum()
out_buffer_tot = bg_formodel['ACSTOTPOP'].sum() - in_buffer_tot

# create a table from the in-buffer and outside-buffer counts
counts = pd.concat([in_buffer_tot, out_buffer_tot], axis=1)
counts.columns = ['In_Buffer_Count', 'Outside_Buffer_Count']
# format w thousands separators
counts[['In_Buffer_Count', 'Outside_Buffer_Count']] = counts[['In_Buffer_Count', 'Outside_Buffer_Count']].applymap('{:,.0f}'.format)

# table that compares the in-buffer counts to the outside-buffer counts
print(counts)

### Folium mapping

In [None]:
# # Convert the GeoDataFrame to the same CRS as the folium map (if necessary)
# ca_flares = ca_flares.to_crs(3857)

# # Create a folium map with a center location
# m = folium.Map(location=[38.377158,-121.645792], zoom_start=6, tiles=None,overlay=False)  #start w lat/long roughly in center of CA
# base_map = folium.FeatureGroup(name='Base map', overlay=True, control=False)
# folium.TileLayer(tiles='OpenStreetMap').add_to(base_map)
# base_map.add_to(m)

# def style_function(feature):
#     return {
#         'fillColor': 'red',
#         'color': 'red',
#         'fillOpacity': 0.2
#     }

# def style_function2(feature):
#     return {
#         'fillColor': 'blue',
#         'color': 'blue',
#         'fillOpacity': 0.2
#     }


# # points
# all_flares_points = folium.FeatureGroup(name='flare points', overlay=True)
# folium.GeoJson(data=ca_flares["geometry"]).add_to(all_flares_points)
# all_flares_points.add_to(m)


# # buffers
# all_flares_buffers = folium.FeatureGroup(name='flare buffers', overlay=True)
# folium.GeoJson(
#     data=ca_flares["buffer_5000m"],
#     style_function=style_function
# ).add_to(all_flares_buffers)
# all_flares_buffers.add_to(m)

# # intersected polygons
# intersect = folium.FeatureGroup(name='intersect', overlay=True)
# folium.GeoJson(
#     data=bg_flares['geometry'],
#     style_function=style_function2
# ).add_to(intersect)
# intersect.add_to(m)


# # multipolygon test
# # leaving commented out for now b/c it's not working
# # all_flares_comb = folium.FeatureGroup(name='flares combined', overlay=True)
# # if combined_buffers.geom_type == 'MultiPolygon':
# #     for polygon in combined_buffers.geoms:
# #         folium.Polygon(
# #             locations=polygon.exterior.coords[:],
# #             color='red',
# #             fill=True,
# #             fill_color='red'
# #         ).add_to(all_flares_comb)
        
# # all_flares_comb.add_to(m)


# folium.LayerControl().add_to(m)
# m