In [None]:
%matplotlib notebook
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import contextily as ctx
import plotly.express as px
import seaborn as sns
import numpy as np
import folium

# For spatial statistics
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster,plot_moran_simulation
import libpysal as lps

# California Power Plant Map

In [None]:
#import data
cpp = gpd.read_file('California_Power_Plants_MP Cleaned 3.1.21.csv')

Explore data

In [None]:
type(cpp)

In [None]:
cpp.shape

In [None]:
cpp.head(10)

In [None]:
cpp.dtypes

In [None]:
cpp.columns = ['Plant_ID',
 'Name',
 'MW',
 'Gross_MWh',
 'Net_MWh',
 'Fuel_Type',
 'Status',
 'Online_Year',
 'REAT_ID',
 'County',
 'State',
 'Renewable_Energy',
 'Jobs',
 'Senate_District',
 'Assembly_District',
 'Congressional_District',
 'CES30_PercentileRange',
 'CES30_Percentile',
 'Lon',
 'Lat',
 'Operation_Job',
 'Capacity_Factor',
 'Income_Percent',
 'Project_Location',
 'geometry']

In [None]:
# define variable with desired columns 
desired_columns = ['Plant_ID',
 'Name',
 'MW',
 'Fuel_Type',
 'Status',
 'County',
 'State',
 'Renewable_Energy',
 'Jobs',
 'CES30_PercentileRange',
 'CES30_Percentile',
 'Lon',
 'Lat',
 'Income_Percent',
 'Project_Location',
 'geometry']

# redefine dataframe with desired columns
cpp_trim = cpp[desired_columns].copy()

# check new dataframe
cpp_trim

In [None]:
# Convert coordinates to floats. 
cpp_trim.Lon = cpp_trim.Lon.astype('float')
cpp_trim.Lat = cpp_trim.Lat.astype('float')

In [None]:
# Reproject dataframe
cpp_trim = gpd.GeoDataFrame(cpp_trim, 
                         crs='EPSG:4326',
                         geometry=gpd.points_from_xy(cpp_trim.Lon, cpp_trim.Lat))

cpp_trim.head(5)

In [None]:
# check crs type
cpp_trim.crs

# Demographics (CES)

In [None]:
# import demographics dataset
gdf_ces = gpd.read_file('CES3June2018Update.shp')

Explore data

In [None]:
type(gdf_ces)

In [None]:
gdf_ces.crs

In [None]:
gdf_ces.shape

In [None]:
#define variable with desired columns 
columns_to_keep = ['tract', 'pop2010', 'California', 'ZIP', 'City', 'Longitude', 'Latitude', 'CIscore', 'CIscoreP', 'edu', 'eduP', 'pov', 'povP', 'unemp', 'unempP', 'Pop_11_64_', 'Elderly_ov', 'Hispanic_p', 'White_pct', 'African_Am', 'Native_Ame', 'Asian_Amer', 'Other_pct', 'geometry']

#redfine dataframe with desired columns 
gdf_ces = gdf_ces[columns_to_keep]

# check to make sure 
gdf_ces.head()

In [None]:
list(gdf_ces)

## Tracts by race

In [None]:
# create list of races
ces_races = ['Hispanic_p', 'White_pct', 'African_Am', 'Asian_Amer']

In [None]:
# reproject dataframes to web mercator
gdf_ces = gdf_ces.to_crs(epsg=3857)

cpp_trim = cpp_trim.to_crs(epsg=3857)

In [None]:
# convert CES30_Percentile to integer
cpp_trim['CES30_Percentile'] = pd.to_numeric(cpp_trim['CES30_Percentile'])

In [None]:
# convert MW to integer
cpp_trim['MW'] = pd.to_numeric(cpp_trim['MW'])

In [None]:
# create dataframe for renewable energy
cpp_ce = cpp_trim[cpp_trim.Renewable_Energy != '0']

In [None]:
# check work
cpp_ce.shape

In [None]:
# create dataframe for fossil fuel energy
cpp_ff = cpp_trim[cpp_trim.Renewable_Energy != '1']

In [None]:
cpp_ff.shape

In [None]:
# create new variable for disadvantaged community
cpp_trim['Community type'] = [0 if x =='Neither Low Income Nor Disadvantaged Community' else 1 for x in cpp_trim['Project_Location']]

In [None]:
list(cpp_trim)

In [None]:
# convert to integer
cpp_trim['Renewable_Energy'] = pd.to_numeric(cpp_trim['Renewable_Energy'])

In [None]:
# convert to integer
cpp_trim['Community type'] = pd.to_numeric(cpp_trim['Community type'])

In [None]:
# check work
cpp_trim.dtypes

In [None]:
# create CPP dataframe for LA
cpp_LA = cpp_trim[cpp_trim.County == 'Los Angeles']

In [None]:
cpp_LA.head()

In [None]:
# create CES dataframe for LA
ces_LA = gdf_ces[gdf_ces.California == 'Los Angeles']
ces_LA.shape

In [None]:
gdf_ces.shape

In [None]:
# create dataframe for fossil fuel energy  in LA
cpp_ff_LA = cpp_LA[cpp_LA.Renewable_Energy != '1']

In [None]:
# create dataframe for renewable energy in LA
cpp_ce_LA = cpp_LA[cpp_LA.Renewable_Energy == '1']

# Data analysis

## Stacked bar chart

Compare renewable power plants to fossil fuel plants across environmental burden (CES) score

First, create new variable and group CES scores into percentiles.

In [None]:
# first percentile
cpp_trim.loc[cpp_trim['CES30_Percentile'] < 25, 'CES30_Percentile_Group'] = '1st Percentile'

In [None]:
# check work
cpp_trim.head(10)

In [None]:
# 4th percentile
cpp_trim.loc[cpp_trim['CES30_Percentile'] > 74, 'CES30_Percentile_Group'] = '4th Percentile'

In [None]:
print(cpp_trim.CES30_Percentile_Group)

In [None]:
# 2nd percentile
cpp_trim.loc[(cpp_trim['CES30_Percentile'] <50) & 
             (cpp_trim['CES30_Percentile'] >24), 'CES30_Percentile_Group'] = '2nd Percentile' 

In [None]:
# 3rd percentile
cpp_trim.loc[(cpp_trim['CES30_Percentile'] <75) & 
             (cpp_trim['CES30_Percentile'] >49), 'CES30_Percentile_Group'] = '3rd Percentile' 

In [None]:
print(cpp_trim.CES30_Percentile_Group)

In [None]:
# add count of percentiles
cpp_trim.groupby(['CES30_Percentile_Group']).count()

In [None]:
# create new dataframe for grouped percentiles and count
cpp_grouped=cpp_trim.groupby(['CES30_Percentile_Group','Renewable_Energy']).count()[['Plant_ID']]
cpp_grouped.head(50)

In [None]:
cpp_flat = cpp_grouped.reset_index()
cpp_flat

In [None]:
# rename columns
cpp_flat = cpp_flat.rename(columns={'Plant_ID':'count'})

In [None]:
# convert Renewable_Energy to string
cpp_flat = cpp_flat.astype({'Renewable_Energy':str})

In [None]:
cpp_flat.dtypes

In [None]:
cpp_flat.columns = ['CES_Percentile', 'Energy Type','count']

In [None]:
# check work
cpp_flat.head(10)

In [None]:
# rename renewable energy values
cpp_flat['Energy Type'].replace({'1':'Renewable','0':'Fossil fuel'}, inplace=True)
print(cpp_flat)

In [None]:
# make basic bar chart
px.bar(cpp_flat,
       x='CES_Percentile',
       y='count'
      )

In [None]:
# make a stacked bar chart - Percentile
px.bar(cpp_flat,
       x='CES_Percentile',
       y='count',
       color='Energy Type'
      )

In [None]:
# make a stacked bar chart - Renewable Energy
fig = px.bar(cpp_flat,
             x='Energy Type',
             y='count',
             color='CES_Percentile',
             title='Energy type by Environmental Burden'
            )

fig.show()

In [None]:
# write to html for storymap display
fig.write_html("Energy_CES_Stacked.html")

# Spatial analysis

In [None]:
# create CES new dataframe for spatial analysis
ces_spatial = gdf_ces.copy()

In [None]:
# check work
ces_spatial.head()

In [None]:
# trim the census tracts data to desired columns
ces_spatial = ces_spatial[['tract','California','geometry']]

# rename the columns
ces_spatial.columns = ['FIPS','County','geometry']

In [None]:
ces_spatial.head()

In [None]:
# create CPP new dataframe for spatial analysis
cpp_spatial = cpp.copy()

In [None]:
# check work
cpp_spatial.head()

In [None]:
# project to web mercator
ces_spatial = ces_spatial.to_crs(epsg=3857)

In [None]:
# convert coordinates to floats
cpp_spatial.Lon = cpp_spatial.Lon.astype('float')
cpp_spatial.Lat = cpp_spatial.Lat.astype('float')

In [None]:
# Convert CPP data to geodataframe
cpp_spatial = gpd.GeoDataFrame(cpp_spatial, 
                         crs='EPSG:4326',
                         geometry=gpd.points_from_xy(cpp_spatial.Lon, cpp_spatial.Lat))

cpp_spatial.head()

In [None]:
# check crs type
cpp_spatial.crs

In [None]:
#reproject CES to web mercator. 
ces_spatial = ces_spatial.to_crs(epsg=3857)

In [None]:
#reproject CPP to web mercator
cpp_spatial = cpp_spatial.to_crs(epsg=3857)

# check work
print(cpp_spatial.crs)

We're going to start our spatial statisticss. We need to start by creating new dataframes with counts for all power plants, for just clean energy plants, and for just fossil fuel plants, and then join them back to the main one.

In [None]:
# Spatial join of both datasets
join = gpd.sjoin(cpp_spatial, ces_spatial, how='left')
join.head(20)

In [None]:
join.dtypes

In [None]:
# create new dataframe for grouping CPP data into tracts
powerplants_by_tract_spatial = join.FIPS.value_counts().rename_axis('FIPS').reset_index(name='powerplant_count')
powerplants_by_tract_spatial.head(20)

In [None]:
# join the summary table to main gdf
ces_spatial=ces_spatial.merge(powerplants_by_tract_spatial,on='FIPS')

In [None]:
ces_spatial.head()

In [None]:
# map the top 20 census tracts
fig,ax = plt.subplots(figsize=(10,10))
ces_spatial.sort_values(by='powerplant_count',ascending=False).plot(ax=ax,
                                                                 color='red',
                                                                 edgecolor='white',
                                                                 alpha=0.5,
                                                                 legend=True)

# title
ax.set_title('All power plants')

# no axis
ax.axis('off')

# add a basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

## Spatial autocorrelation for all power plants

### Spatial lag for all power plants

In [None]:
# calculate spatial weight
wq =  lps.weights.KNN.from_dataframe(ces_spatial,k=8)

# Row-standardization
wq.transform = 'r'

In [None]:
ces_spatial.sample(5)

In [None]:
# create a new column for the spatial lag
ces_spatial['powerplant_count_lag'] = lps.weights.lag_spatial(wq, ces_spatial['powerplant_count'])

In [None]:
# create a column that calculates the difference betwen powerplant counts and lag
ces_spatial['powerplant_count_lag_diff'] = ces_spatial['powerplant_count'] - ces_spatial['powerplant_count_lag']

In [None]:
# check work
ces_spatial.sample(10)[['FIPS','County','geometry','powerplant_count','powerplant_count_lag']]

In [None]:
# sort by lag differences
ces_spatial.sort_values(by='powerplant_count_lag_diff')

In [None]:
fig,ax = plt.subplots(figsize=(15,15))

ces_spatial.plot(ax=ax,
        column='powerplant_count',
        legend=True,
        alpha=0.8,
         #underscore r reverses the scheme, so that green becomes low numbers and red high
        cmap='RdYlGn_r',
         #quntiles makes sure the data is divided equally
        scheme='naturalbreaks')
                   
ax.axis('off')

ax.set_title('All Power Plants',fontsize=22)

ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

In [None]:
# plot the spatial lag with a chloropleth map
fig, ax = plt.subplots(figsize=(8, 8))

ces_spatial.plot(ax=ax,
         figsize=(15,15),
         column='powerplant_count_lag',
         legend=True,
         alpha=0.8,
         cmap='RdYlGn_r',
         scheme='naturalbreaks')

ax.axis('off')
ax.set_title('Powerplants spatial lag',fontsize=22)

ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

### Moran's I Plot for all powerplants

In [None]:
y = ces_spatial.powerplant_count
moran = Moran(y, wq)
moran.I

In [None]:
# Create a scatterplot of the Moran's plot
fig, ax = moran_scatterplot(moran, aspect_equal=True)
plt.show()

We don't know if our value of 0.145 is statistically significant or not. To do that, we need to calculate the p-value.

In [None]:
moran.p_sim

This is a very low p-value, indicating that there's a 0.1% chance our data could be randomly arranged this way. It means our Moran's I-value is statistically significant.

We want to look at where clusters are. We'll use the Local Indicators of Spatial Association (LISA) model. LISA classifies areas into four groups: high values near to high values (HH), Low values with nearby low values (LL), Low values with high values in its neighborhood, and vice-versa.

In [None]:
# calculate local moran values
lisa = esda.moran.Moran_Local(y, wq)

In [None]:
# Plot
fig,ax = plt.subplots(figsize=(10,10))

moran_scatterplot(lisa, ax=ax, p=0.05)
ax.set_xlabel("Powerplants")
ax.set_ylabel('Spatial Lag of Powerplants')

# add labels
plt.text(1.95, 0.5, "HH", fontsize=25)
plt.text(1.95, -1, "HL", fontsize=25)
plt.text(-2, 1, "LH", fontsize=25)
plt.text(-1, -1, "LL", fontsize=25)
plt.show()

In the scatterplot above, the yellow dots represent the census tract that have a P-value less that 0.05. We have several statistically significantly spatially autocorrelated geographies. Below, we'll visualize this.

In [None]:
# use LISA cluster function
fig, ax = plt.subplots(figsize=(10,10))
lisa_cluster(lisa, ces_spatial, p=0.05, ax=ax)


ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

ax.set_title('All Power Plants\n Statistically Significant Clusters',fontsize=22)

plt.show()

In [None]:
#creates new columns in df for significance
sig = 1 * (lisa.p_sim < 0.05)
ces_spatial['p-sim'] = lisa.p_sim
ces_spatial['sig'] = sig
ces_spatial[['sig','p-sim']].head()

#subset to only keep significant tracts
all_pp_sig = ces_spatial[ces_spatial['p-sim'] < 0.05]
all_pp_sig

In [None]:
latitude = join.Lat.mean()
latitude

In [None]:
longitude = join.Lon.mean()
longitude

In [None]:
m = folium.Map(location=[latitude,longitude], zoom_start=11)
m

In [None]:
#bins_interval = [0, 20, 30, 40, 50, 60, 70, 80, 100]
m10 = folium.Map(location=[33.99169246896552, -117.272919], 
               zoom_start = 10,
               tiles='CartoDB positron')
choropleth = folium.Choropleth(
                  geo_data=ces_spatial, 
                  data=all_pp_sig,         
                  key_on='feature.properties.FIPS',
                  columns=['FIPS', 'powerplant_count_lag'],
                  fill_color='YlOrRd',
                  line_weight=.5, 
                  line_color='#252525',
                  fill_opacity=0.5,
                  line_opacity=1,
                  nan_fill_color='grey',
                  nan_fill_opacity=0,
                  #bins=[float(x) for x in bins_interval],
                  ).add_to(m10)   
choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['powerplant_count'],labels=True)
)
m10

In [None]:
m10.save('All_PP_Sig.html')

## Fossil fuel spatial analysis

In [None]:
# create dataframe for fossil fuel energy
join_ff = join[join.Renewable_Energy != '1']

In [None]:
join_ff.shape

In [None]:
# create new dataframe for grouping CPP data into tracts
ff_bytract_spatial = join_ff.FIPS.value_counts().rename_axis('FIPS').reset_index(name='ff_count')
ff_bytract_spatial.head(20)

In [None]:
# create new dataframe to run separate Moran's
ces_spatial_ff=ces_spatial.copy()

In [None]:
ces_spatial_ff.head()

In [None]:
# delete powerplant columns
del ces_spatial_ff['powerplant_count']

In [None]:
del ces_spatial_ff['powerplant_count_lag']

In [None]:
del ces_spatial_ff['powerplant_count_lag_diff']

In [None]:
ces_spatial_ff.head()

In [None]:
# join the summary table to main gdf
ces_spatial_ff=ces_spatial_ff.merge(ff_bytract_spatial,on='FIPS')

In [None]:
ces_spatial_ff.head()

In [None]:
# map the top 20 census tracts
fig,ax = plt.subplots(figsize=(10,10))
ces_spatial_ff.sort_values(by='ff_count',ascending=False)[:20].plot(ax=ax,
                                                                 color='red',
                                                                 edgecolor='white',
                                                                 alpha=0.5,
                                                                 legend=True)

# title
ax.set_title('Top 20 tracts of fossil fuel power plants')

# no axis
ax.axis('off')

# add a basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

### Spatial lag for fossil fuel power plants

In [None]:
# calculate spatial weight
wq =  lps.weights.KNN.from_dataframe(ces_spatial_ff,k=8)

# Row-standardization
wq.transform = 'r'

In [None]:
# create a new column for the spatial lag
ces_spatial_ff['ff_count_lag'] = lps.weights.lag_spatial(wq, ces_spatial_ff['ff_count'])

In [None]:
# create a column that calculates the difference betwen powerplant counts and lag
ces_spatial_ff['ff_count_lag_diff'] = ces_spatial_ff['ff_count'] - ces_spatial_ff['ff_count_lag']

In [None]:
# check work
ces_spatial_ff.sample(10)[['FIPS','County','geometry','ff_count','ff_count_lag']]

In [None]:
# sort by lag differences
ces_spatial_ff.sort_values(by='ff_count_lag_diff')

In [None]:
# plot the spatial lag with a chloropleth map
fig, ax = plt.subplots(figsize=(8, 8))

ces_spatial_ff.plot(ax=ax,
         figsize=(15,15),
         column='ff_count_lag',
         legend=True,
         alpha=0.8,
         cmap='RdYlGn_r',
         scheme='quantiles')

ax.axis('off')
ax.set_title('Fossil fuel plants spatial lag',fontsize=22)

ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

### Moran's I for fossil fuel plants

In [None]:
y = ces_spatial_ff.ff_count
moran = Moran(y, wq)
moran.I

In [None]:
# Create a scatterplot of the Moran's plot
fig, ax = moran_scatterplot(moran, aspect_equal=True)
plt.show()

In [None]:
# calculate p-value
moran.p_sim

In [None]:
# calculate local moran values
lisa = esda.moran.Moran_Local(y, wq)

In [None]:
# Plot
fig,ax = plt.subplots(figsize=(10,10))

moran_scatterplot(lisa, ax=ax, p=0.05)
ax.set_xlabel("Powerplants")
ax.set_ylabel('Spatial Lag of Powerplants')

# add labels
plt.text(1.95, 0.5, "HH", fontsize=25)
plt.text(1.95, -1, "HL", fontsize=25)
plt.text(-2, 1, "LH", fontsize=25)
plt.text(-1, -1, "LL", fontsize=25)
plt.show()

In [None]:
# use LISA cluster function
fig, ax = plt.subplots(figsize=(8,8))
lisa_cluster(lisa, ces_spatial_ff, p=0.05, ax=ax)
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
plt.show()

## Spatial analysis for clean energy plants

In [None]:
# create dataframe for clean energy
join_ce = join[join.Renewable_Energy == '1']

In [None]:
join_ce.shape

In [None]:
# create new dataframe for grouping CPP data into tracts
ce_bytract_spatial = join_ce.FIPS.value_counts().rename_axis('FIPS').reset_index(name='ce_count')
ce_bytract_spatial.head(20)

In [None]:
# create new dataframe to run separate Moran's
ces_spatial_ce=ces_spatial.copy()

In [None]:
ces_spatial_ce.head()

In [None]:
del ces_spatial_ce['powerplant_count']

In [None]:
del ces_spatial_ce['powerplant_count_lag']

In [None]:
del ces_spatial_ce['powerplant_count_lag_diff']

In [None]:
# join the summary table to main gdf
ces_spatial_ce=ces_spatial_ce.merge(ce_bytract_spatial,on='FIPS')

In [None]:
ces_spatial_ce.head()

In [None]:
# map the top 20 census tracts
fig,ax = plt.subplots(figsize=(10,10))
ces_spatial_ce.sort_values(by='ce_count',ascending=False)[:20].plot(ax=ax,
                                                                 color='red',
                                                                 edgecolor='white',
                                                                 alpha=0.5,
                                                                 legend=True)

# title
ax.set_title('Top 20 tracts of clean energy power plants')

# no axis
ax.axis('off')

# add a basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

In [None]:
fig,ax = plt.subplots(figsize=(12,12))

ces_spatial_ce.plot(ax=ax,
        column='ce_count',
        legend=True,
        alpha=0.8,
         #underscore r reverses the scheme, so that green becomes low numbers and red high
        cmap='RdYlGn_r',
         #quntiles makes sure the data is divided equally
        scheme='naturalbreaks')
                   
ax.axis('off')

ax.set_title('Clean Energy Plants',fontsize=22)

ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

### Spatial weight for clean energy

In [None]:
# calculate spatial weight
wq =  lps.weights.KNN.from_dataframe(ces_spatial_ce,k=8)

# Row-standardization
wq.transform = 'r'

In [None]:
# create a new column for the spatial lag
ces_spatial_ce['ce_count_lag'] = lps.weights.lag_spatial(wq, ces_spatial_ce['ce_count'])

In [None]:
# create a column that calculates the difference betwen powerplant counts and lag
ces_spatial_ce['ce_count_lag_diff'] = ces_spatial_ce['ce_count'] - ces_spatial_ce['ce_count_lag']

In [None]:
# check work
ces_spatial_ce.sample(10)[['FIPS','County','geometry','ce_count','ce_count_lag']]

In [None]:
# sort by lag differences
ces_spatial_ce.sort_values(by='ce_count_lag_diff')

In [None]:
# plot the spatial lag with a chloropleth map
fig, ax = plt.subplots(figsize=(8, 8))

ces_spatial_ce.plot(ax=ax,
         figsize=(15,15),
         column='ce_count_lag',
         legend=True,
         alpha=0.8,
         cmap='RdYlGn_r',
         scheme='quantiles')

ax.axis('off')
ax.set_title('Clean energy spatial lag',fontsize=22)

ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)

### Moran's I for clean energy

In [None]:
y = ces_spatial_ce.ce_count
moran = Moran(y, wq)
moran.I

In [None]:
# Create a scatterplot of the Moran's plot
fig, ax = moran_scatterplot(moran, aspect_equal=True)
plt.show()

In [None]:
moran.p_sim

In [None]:
# calculate local moran values
lisa = esda.moran.Moran_Local(y, wq)

In [None]:
# Plot
fig,ax = plt.subplots(figsize=(10,10))

moran_scatterplot(lisa, ax=ax, p=0.05)
ax.set_xlabel("Powerplants")
ax.set_ylabel('Spatial Lag of Powerplants')

# add labels
plt.text(1.95, 0.5, "HH", fontsize=25)
plt.text(1.95, -1, "HL", fontsize=25)
plt.text(-2, 1, "LH", fontsize=25)
plt.text(-1, -1, "LL", fontsize=25)
plt.show()

In [None]:
# use LISA cluster function
fig, ax = plt.subplots(figsize=(8,8))
lisa_cluster(lisa, ces_spatial_ce, p=0.05, ax=ax)
ax.set_title('Clean Energy Plants\n Statistically Significant Clusters')
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
plt.show()

In [None]:
#creates new columns in df for significance
sig = 1 * (lisa.p_sim < 0.05)
ces_spatial_ce['p-sim'] = lisa.p_sim
ces_spatial_ce['sig'] = sig
ces_spatial_ce[['sig','p-sim']].head()

#subset to only keep significant tracts
ce_sig = ces_spatial_ce[ces_spatial_ce['p-sim'] < 0.05]
ce_sig

In [None]:
latitude = join_ce.Lat.mean()
latitude

In [None]:
longitude = join_ce.Lon.mean()
longitude

In [None]:
m = folium.Map(location=[latitude,longitude], zoom_start=11)
m

In [None]:
#bins_interval = [0, 20, 30, 40, 50, 60, 70, 80, 100]
m10_2 = folium.Map(location=[33.99169246896552, -117.272919], 
               zoom_start = 10,
               tiles='CartoDB positron')
choropleth = folium.Choropleth(
                  geo_data=ces_spatial_ce, 
                  data=ce_sig,         
                  key_on='feature.properties.FIPS',
                  columns=['FIPS', 'ce_count_lag'],
                  fill_color='YlOrRd',
                  line_weight=.5, 
                  line_color='#252525',
                  fill_opacity=0.5,
                  line_opacity=1,
                  nan_fill_color='grey',
                  nan_fill_opacity=0,
                  #bins=[float(x) for x in bins_interval],
                  ).add_to(m10_2)   
choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['ce_count'],labels=True)
)
m10_2

In [None]:
m10_2.save('CESig.html')