### Setup / packages / crs

In [1]:
import geopandas as gp
from shapely import wkt
import pandas as pd
import pprint
import os
import glob
import openpyxl
import matplotlib.pyplot as plt
import plotly.express as px #if using plotly

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
worldmap = gp.read_file(gp.datasets.get_path("naturalearth_lowres"))
usa = gp.read_file("data/cb_2018_us_nation_5m.shp")
us_states = gp.read_file("data/cb_2018_us_state_500k.shp")
ca_state = gp.read_file("/Users/natebender/Desktop/repo/capstone_rmi/data/CA_State_TIGER2016.shp")
ca_counties = gp.read_file("/Users/natebender/Desktop/repo/capstone_rmi/data/CA_Counties_TIGER2016.shp")

In [4]:
# set common crs for project
projcrs = 4326
us_states = us_states.to_crs(projcrs)  # match us_states common crs for project
worldmap = worldmap.to_crs(projcrs)
ca_state = ca_state.to_crs(projcrs)
ca_counties = ca_counties.to_crs(projcrs)

In [5]:
# read in Refineries

cols = list(pd.read_csv("data/asset-climate-trace_oil-refining_091922.csv", nrows =1))
refs = pd.read_csv("data/asset-climate-trace_oil-refining_091922.csv", 
                   usecols = lambda x: x not in ['other1', 'other2', 'other3', 
                                                 'other4', 'other5', 'other6', 'other7'])
refs['location'] = gp.GeoSeries.from_wkt(refs['location'])  # convert existing points to geoseries
refs = gp.GeoDataFrame(refs, geometry="location", crs=projcrs)  # convert df to geodf

In [19]:
# read in cleaned and combined flares data
all_flares = gp.read_file("data/all_flares.shp")

In [None]:
# check 
all_flares.head(3)

In [10]:
len(all_flares)

74289

In [None]:
# see where the flares are across the world

fig, ax = plt.subplots(figsize = (10,8))
worldmap.plot(ax = ax, color = 'white', edgecolor = 'black')
all_flares.plot(ax = ax, color='red', markersize=1)  # increase size for visibility
plt.show()

### US / CA analysis

In [None]:
# subset to just lower 48 plus AK
# remove territories (PR, PW, VI, MH, GU) and AK, HI
stateslist = ["AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","IA","ID", "IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY", "OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY"]
us_states = us_states[us_states['STUSPS'].isin(stateslist)]

In [None]:
# match all flares and refineries with just CA counties
inner_join_result = gp.sjoin(all_flares, ca_counties, how = "inner", op = 'within')
refs_join = gp.sjoin(refs, ca_counties, how='inner', op='within')
print(f'Flares found: {len(inner_join_result)}')

In [None]:
len(inner_join_result)

In [None]:
col_list = ['BCM_2012','BCM_2013','BCM_2014','BCM_2015','BCM_2016','BCM_2017',
            'BCM_2018','BCM_2019','BCM_2020','BCM_2021']

inner_join_result['BCM_avg'] = inner_join_result[col_list].mean(axis=1)

In [None]:
inner_join_result

### Visualization

In [None]:
# # Plotting multiple layers

fig, ax = plt.subplots(figsize = (10,8))
#worldmap.plot(ax = ax, color = 'white', edgecolor = 'black')
#us_states.plot(ax = ax, color = 'whitesmoke', edgecolor = 'black')
ca_counties.plot(ax = ax, color = 'whitesmoke', edgecolor = 'grey')
# increase size by 100000 just for readability
inner_join_result.plot(ax = ax, facecolors='none', color = 'red', alpha=.2, markersize=inner_join_result[col_list].mean(axis=1)*100000)
refs_join.plot(ax = ax, facecolors = 'none', color = 'blue', alpha = .08, markersize=50)
#plt.ylim([33, 39])  # can zoom for readability
plt.title("CA methane flaring, 2012-2021")
plt.text(-119.5,41, 'flares are plotted in red,\n with transparency to show overlap\n and sized by their avg flare volume')
plt.text(-119.5,40, 'refineries are plotted in blue,\n sized arbitrarily')
plt.show()

## California: BCM by county

In [None]:
ca_flares = gp.sjoin(all_flares, ca_counties, how = "inner", op = 'within')
# rename so county names col is clear
ca_flares.rename(columns={'NAME':'county_name'}, inplace=True)

In [None]:
# Create df of total BCM by year, grouped by CA county

col_list = ['BCM_2012','BCM_2013','BCM_2014','BCM_2015','BCM_2016','BCM_2017',
            'BCM_2018','BCM_2019','BCM_2020','BCM_2021']

stats = ca_flares.groupby(['county_name'])[col_list].sum()
stats['Total_bcm'] = stats.loc[:,col_list].sum(axis=1)

In [None]:
stats.sort_values('Total_bcm', ascending=False)

In [None]:
forplot = stats.drop(['Total_bcm'], axis=1)
forplot.loc['total'] = forplot.sum()
forplot = forplot.transpose() 

plot = forplot.plot(kind = 'line', title = 'Methane flaring in CA, 2012-2021', colormap='tab20c', figsize = [15, 8])
plot.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, title='counties')

## California: BCM by flare category (upstream, downstream oil, downstream gas)

In [None]:
col_list = ['BCM_2012','BCM_2013','BCM_2014','BCM_2015','BCM_2016','BCM_2017',
            'BCM_2018','BCM_2019','BCM_2020','BCM_2021']

stats = all_flares.groupby(['flare_category'])[col_list].sum()
stats['Total_cat'] = stats.loc[:,col_list].sum(axis=1)
stats.loc['Total'] = stats.sum()

In [None]:
# table of BCM totals by flare category and year. Ignore the meaningless value at the intersection of
# the two totals
stats

In [None]:
forplot = stats.drop(['Total_cat'], axis=1)
forplot = forplot.transpose() 

plot = forplot.plot(kind = 'line', title = 'Methane flaring in CA, 2012-2021', colormap='tab20c', figsize = [15, 8])
plot.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, title='counties')