In [None]:
import geopandas as gp
from shapely import wkt
import pandas as pd
import numpy as np
import pprint
import os
import glob
import openpyxl
import matplotlib.pyplot as plt
import plotly.express as px #if using plotly
import folium
import shapely.speedups
shapely.speedups.enable()
import contextily

In [None]:
pd.set_option('display.max_columns', None)  # display all columns
pd.options.display.float_format = '{:20,.2f}'.format  # suppress scientific notation

## Read in files

In [None]:
#worldmap = gp.read_file(gp.datasets.get_path("naturalearth_lowres"))
#usa = gp.read_file("data/cb_2018_us_nation_5m.shp")
us_states = gp.read_file("data/cb_2018_us_state_500k.shp")
ca_state = gp.read_file("data/CA_State_TIGER2016.shp")
ca_counties = gp.read_file("data/CA_Counties_TIGER2016.shp")

#### Dataset of social vulnerability measures from CalEnviroScreen, at the spatial level of census tracts

In [None]:
# CA EnviroScreen 4.0 social vulnerability data at level of census tract

# source: https://oehha.ca.gov/calenviroscreen/report/calenviroscreen-40
calenv4_df = gp.read_file("data/CES4 Final Shapefile.shp")
calenv4_df['Tract'] = calenv4_df['Tract'].astype(np.int64)  # change from float to int

#### US O&G files

In [None]:
og_fields = gp.read_file("data/United_States.shp")

#### Refineries

In [None]:
df = pd.read_csv("data/asset-climate-trace_oil-refining_091922.csv")
df['geometry'] = df['location'].apply(wkt.loads)
refineries = gp.GeoDataFrame(df, crs='epsg:4326')

#### Methane flaring data. Geometry-wise, these are recorded as points

In [None]:
# read in cleaned and combined flares data
all_flares = gp.read_file("data/all_flares.shp")

In [None]:
# data pulled from Census api
# ca_demos = pd.read_csv("data/ca_blocklevel_demographics.csv")

In [None]:
# set common crs for project
# EPSG:4326 - WGS 84, latitude/longitude coordinate system based on the Earth's center of mass, used by the Global Positioning System among others. 
projcrs = 4326
us_states = us_states.to_crs(projcrs)  # match us_states common crs for project
ca_state = ca_state.to_crs(projcrs)
ca_counties = ca_counties.to_crs(projcrs)
calenv4_df = calenv4_df.to_crs(projcrs)
og_fields = og_fields.to_crs(projcrs)
refineries = refineries.set_crs(projcrs)

In [None]:
#### Merging tract numbers coming from the US Census api (in the ca_demos df) are different from the numbers coming from the CalEnviroscreen (ca_socialdata df) data
# the two datasets — which both come from the Census — are using different values for tract id. Baffling.
# match last six digits of calenv4_df['Tract'] with full value of ca_demos["tract_id"]
# calenv4_df['tract_id'] = calenv4_df.apply(lambda row: str(row.Tract)[-6:], axis=1)
# ca_demos['tract_id'] = ca_demos['tract_id'].astype(str)  # change from float to int for ease of merging
# calenv4_df.loc[calenv4_df['County']=='Alameda']

#### Subset O&G fields and refineries to just CA

In [None]:
og_fields = gp.sjoin(og_fields, ca_counties, how = "inner", predicate = 'within')
print(f'Fields found in CA: {len(og_fields)}')
og_fields.drop('index_right', axis=1, inplace=True)

refineries = gp.sjoin(refineries, ca_counties, how = "inner", predicate = 'within')
print(f'Refineries found in CA: {len(refineries)}')
refineries.drop('index_right', axis=1, inplace=True)

#### There are 103 tracts in California that do not have a CIscore due to a lack of demographic data. CIScore is the comprehensive social vulnerability score assigned at the census tract level. Removing these NAs for now. 

In [None]:
print(len(calenv4_df))
print(len(calenv4_df[calenv4_df['CIscore']==-999]))
calenv4_df = calenv4_df[calenv4_df["CIscore"]!=-999]
print(len(calenv4_df))

In [None]:
# subset US states list to just lower 48 plus AK
# remove territories (PR, PW, VI, MH, GU) and AK, HI
# stateslist = ["AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","IA","ID", 
#"IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE",
#"NH","NJ","NM","NV","NY", "OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY"]
# us_states = us_states[us_states['STUSPS'].isin(stateslist)]

## Subset flares to just CA, combine recordings within Xkm into single points

In [None]:
ca_flares = gp.sjoin(all_flares, ca_counties, how = "inner", predicate = 'within')
print(f'Flares found in CA: {len(ca_flares)}')
ca_flares.drop('index_right', axis=1, inplace=True)

#ca_flares.reset_index(drop=True, inplace=True)  # reset index after subsetting

col_list = ['BCM_2012','BCM_2013','BCM_2014','BCM_2015','BCM_2016','BCM_2017',
            'BCM_2018','BCM_2019','BCM_2020','BCM_2021']

# for curiousity: add new column for average BCM across all years
ca_flares['BCM_avg'] = ca_flares[col_list].mean(axis=1)  

## Create subset of tracts showing only those with flares present

In [None]:
# kern = calenv4_df[calenv4_df['County'] == 'Kern'].geometry.squeeze()
# kern = kern.unary_union  # create single polygon of county

In [None]:

# # create mask of T/F booleans for all the countries
# mask = ca_flares.within(kern)

# # filter countries by the mask so only the True values are selected
# calenv4_df[mask]

In [None]:
# ca_subset = gp.sjoin(calenv4_df, ca_flares, how='inner', predicate='contains')

In [None]:
# fig, ax = plt.subplots(figsize = (8, 8))
# calenv4_df.plot(ax=ax, color = 'darkgray', edgecolor="white", linewidth=.5)
# ca_subset.plot(ax=ax, color='gold')
# ca_flares.plot(ax=ax, color='red', markersize=5)
# plt.tight_layout()

## Mapping with Folium

### Choropleth + flare markers

In [None]:
# Set base folium map
m = folium.Map(location=[38.377158,-121.645792], zoom_start=6, tiles=None,overlay=False)  #start w lat/long roughly in center of CA
base_map = folium.FeatureGroup(name='Base map', overlay=True, control=False)
folium.TileLayer(tiles='OpenStreetMap').add_to(base_map)
base_map.add_to(m)

# Create choropleth
folium.Choropleth(calenv4_df,
                  data=calenv4_df,
                  columns = ['Tract', 'CIscore'], 
                  key_on='feature.properties.Tract',
                  fill_color="Reds",
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name="CI Score",
                 name="Census tracts").add_to(m)

In [None]:
# Set flares as a feature group
fg_flares = folium.FeatureGroup(name='Methane flaring',overlay=True)

# add markers to fg_flares corresponding to specific flares
latitudes = list(ca_flares.Latitude)
longitudes = list(ca_flares.Longitude)
category = list(ca_flares.flare_cate)
bcm_avg = list(ca_flares.BCM_avg)

for lat, lng, label, bcm in zip(latitudes, longitudes, category, bcm_avg):
    if label == 'flares_upstream':
        fg_flares.add_child(folium.Marker(
            location = [lat, lng],
            popup = [label,bcm],
            icon = folium.Icon(color='gray', icon='fire')))
    else:
        fg_flares.add_child(folium.Marker(
            location = [lat, lng],
            popup = [label,bcm],
            icon = folium.Icon(color='lightgray', prefix='fa', icon='fire')))
        
fg_flares.add_to(m)

In [None]:
# add third layer: flaring by average volume
fg_flares_vol = folium.FeatureGroup(name='Flaring by avg volume (BCM)',overlay=True)

# add markers to fg_flares corresponding to specific flares
latitudes = list(ca_flares.Latitude)
longitudes = list(ca_flares.Longitude)
category = list(ca_flares.flare_cate)
bcm_avg = list(ca_flares.BCM_avg)

for lat, lng, label, bcm in zip(latitudes, longitudes, category, bcm_avg):
    if label == 'flares_upstream':
        fg_flares_vol.add_child(folium.CircleMarker(
            location = [lat, lng],
            radius=bcm*5000,  # artificially increase size for ease of visual comparison
            weight=1,
            color='black',
            popup = [label,bcm]))
    else:
        fg_flares_vol.add_child(folium.CircleMarker(
            location = [lat, lng],
            radius=bcm*5000,  # artificially increase size for ease of visual comparison
            weight=1,
            color='black',
            popup = [label,bcm]))
        
fg_flares_vol.add_to(m)

In [None]:
# add fourth layer: O&G fields
og_fields_ca = folium.FeatureGroup(name='O&G fields', overlay=True)
folium.GeoJson(data=og_fields["geometry"],
              style_function=lambda x: {'fillColor': 'grey'}).add_to(og_fields_ca)
og_fields_ca.add_to(m)

In [None]:
# add fifth layer: refineries
refineries_ca = folium.FeatureGroup(name='Refineries', overlay=True)

gjson = refineries.to_json()
points = folium.features.GeoJson(gjson)

refineries_ca.add_child(points)
refineries_ca.add_to(m)

In [None]:
# Add hover functionality
style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#999999', 
                                'color':'#999999', 
                                'fillOpacity': 0.50, 
                                'weight': 0.1}
NIL = folium.features.GeoJson(
    data = calenv4_df,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['County','Tract', 'CIscore'],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)

In [None]:
# add hover functionality as child to map, add layering, display map
m.add_child(NIL)
m.keep_in_front(NIL)
folium.LayerControl().add_to(m)
m