In [None]:
# Oil Wells and Race
## Census Tract Data and Racial Composition of Los Angeles

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import folium

In [None]:
# begin reading the ACS 5-year racial demographic data based on the census tracts

In [None]:
cens_race = pd.read_csv('Data/ACS 2022_Race_5YEst.csv',
                     dtype={'FIPS':str,}
                     )

In [None]:
#Merge ACS data with census tracts in LA, create the FIPS column, and allow merging with other geospatial data 
cens_geo = gpd.read_file('Data/Census_Tracts_2020_DM.geojson')
cens_geo = cens_geo[['CT20','geometry']]
cens_geo['FIPS'] = '06'+ '037' + cens_geo['CT20']
cens_race_geo= cens_geo.merge(cens_race,on="FIPS")
cens_race_geo.columns

In [None]:
# Create a percentage of 4 ethnic groups (H/NH) as the basis of the Analysis on race/ethnicity

In [None]:
cens_race_geo['PCT_Black'] = cens_race_geo['Total Population: Not Hispanic or Latino: Black or African American Alone']/cens_race_geo['Total Population:']*100
cens_race_geo['PCT_White'] = cens_race_geo['Total Population: Not Hispanic or Latino: White Alone']/cens_race_geo['Total Population:']*100
cens_race_geo['PCT_Hispanic'] = cens_race_geo['Total Population: Hispanic or Latino']/cens_race_geo['Total Population:']*100
cens_race_geo['PCT_Asian'] = cens_race_geo['Total Population: Not Hispanic or Latino: Asian Alone']/cens_race_geo['Total Population:']*100

In [None]:
## Introducing oil wells data 

In [None]:
df = pd.read_csv('Data/Oil_Wells.csv')

In [None]:
gdf = gpd.GeoDataFrame(df, 
                       crs='epsg:4326',
                       geometry=gpd.points_from_xy(df.Longitude, df.Latitude))

In [None]:
# To minimize memory issues, we limit the dataset to active wells only 
gdf_active = gdf.drop(gdf[gdf['WellStatus'] != 'A'].index)

In [None]:
### Question 1: Is there any initial observed clustering of oil wells around ethnic majority census tracts?

In [None]:
df_Hmaj = cens_race_geo[cens_race_geo['PCT_Hispanic']>50]
df_NHWhite_maj = cens_race_geo[cens_race_geo['PCT_White']>50]
df_NHBlack_maj = cens_race_geo[cens_race_geo['PCT_Black']>50]
df_NHAsian_maj = cens_race_geo[cens_race_geo['PCT_Asian']>50]

In [None]:
# We will now visualize the oil wells data for each of the four ethnic majority census tracts. Starting with the Hispanic majority tracts
m = folium.Map(location=[34.2,-118.2], #develop base map using the geop parameters of Los Angeles 
               zoom_start = 9,
               tiles='CartoDB positron', 
               attribution='CartoDB')
# plot chorpleth over the base map
folium.Choropleth(                 
                  geo_data=df_Hmaj, # geo data
                  data=df_Hmaj, # data          
                  key_on='feature.properties.FIPS', # key, or merge column
                  columns=['FIPS', 'PCT_Hispanic'], # [key, value]
                  fill_color='RdPu',
                  nan_fill_color='white',  # set nan_fill_color to 'white' for NaN values
                  line_weight=0.1, 
                  fill_opacity=0.8,
                  line_opacity=0.2, # line opacity (of the border)
                  legend_name='Majority Hispanic Census Tracts (2022)').add_to(m)  

# create feature group so that business layer can be added to drop down
f2=folium.FeatureGroup(name='Oil Wells').add_to(m)

# add the wells to the feature group
for index, row in gdf_active.iterrows():
    folium.Circle(
        radius=1,
        color="black",
        location=[row.Latitude,row.Longitude], 
        overlay=False).add_to(f2)

folium.LayerControl(position='topright', collapsed=True, autoZIndex=True).add_to(m)

m

In [None]:
m.save('OilWells_Hispanic_Maj.png')

In [None]:
#FOR NH_WHITE MAJORITY TRACTS
m2 = folium.Map(location=[34.2,-118.2],
               zoom_start = 9,
               tiles='CartoDB positron', 
               attribution='CartoDB')
folium.Choropleth(                         
                  geo_data=df_NHWhite_maj,
                  data=df_NHWhite_maj,          
                  key_on='feature.properties.FIPS',
                  columns=['FIPS', 'PCT_White'],
                  fill_color='RdPu',
                  nan_fill_color='white', 
                  line_weight=0.1, 
                  fill_opacity=0.8,
                  line_opacity=0.2, # 
                  legend_name='Majority White Census Tracts (2022)').add_to(m2)  
f3=folium.FeatureGroup(name='Oil Wells').add_to(m2)
for index, row in gdf_active.iterrows():
    folium.Circle(
        radius=1,
        color="black",
        location=[row.Latitude,row.Longitude], 
        overlay=False).add_to(f3)
folium.LayerControl(position='topright', collapsed=True, autoZIndex=True).add_to(m2)
m2

In [None]:
m2.save('OilWells_NHWhite_Maj.png')

In [None]:
#FOR NH_BLACK MAJORITY TRACTS
m3 = folium.Map(location=[34.2,-118.2], 
               zoom_start = 9,
               tiles='CartoDB positron', 
               attribution='CartoDB')
folium.Choropleth(
                  geo_data=df_NHBlack_maj,
                  data=df_NHBlack_maj,          
                  key_on='feature.properties.FIPS',
                  columns=['FIPS', 'PCT_Black'],
                  fill_color='RdPu',
                  nan_fill_color='white',
                  line_weight=0.1, 
                  fill_opacity=0.8,
                  line_opacity=0.2, 
                  legend_name='Majority Black Census Tracts (2022)').add_to(m3)  

f2=folium.FeatureGroup(name='Oil Wells').add_to(m3)
for index, row in gdf_active.iterrows():
    folium.Circle(
        radius=1,
        color="black",
        location=[row.Latitude,row.Longitude], 
        overlay=False).add_to(f2)

folium.LayerControl(position='topright', collapsed=True, autoZIndex=True).add_to(m3)
m3

In [None]:
m3.save('OilWells_NHBlack_Maj.png')

In [None]:
#FOR NH_ASIAN MAJORITY TRACTS
m4 = folium.Map(location=[34.2,-118.2], 
               zoom_start = 9,
               tiles='CartoDB positron', 
               attribution='CartoDB')

folium.Choropleth(
                  geo_data=df_NHAsian_maj,
                  data=df_NHAsian_maj,         
                  key_on='feature.properties.FIPS',
                  columns=['FIPS', 'PCT_Asian'],
                  fill_color='RdPu',
                  nan_fill_color='white',
                  line_weight=0.1, 
                  fill_opacity=0.8,
                  line_opacity=0.2,
                  legend_name='Majority Asian Census Tracts (2022)').add_to(m4)  
f2=folium.FeatureGroup(name='Oil Wells').add_to(m4)
for index, row in gdf_active.iterrows():
    folium.Circle(
        radius=1,
        color="black",
        location=[row.Latitude,row.Longitude], 
        overlay=False).add_to(f2)

folium.LayerControl(position='topright', collapsed=True, autoZIndex=True).add_to(m4)

m4

In [None]:
m4.save('OilWells_NHAsian_Maj.html')

In [None]:
### Question 2: What is the racial composition of the top 10 census tracts with the most oil wells?

In [None]:
# Join the oil wells data with the cens_race_geo with racial group %
merged_df = gpd.sjoin(gdf_active, cens_race_geo, how='left', op='within')
merged_df.columns

In [None]:
# oil wells data top 10 census tracts 
number_wells = merged_df.groupby('index_right').size()
top_10 = number_wells.nlargest(10)
tract_PCT_oil = cens_race_geo.loc[top_10.index]
tract_PCT_oil.columns

In [None]:
# Place the tract_PCT_oil into a grouped bar chart using the mean percentage of each racial group
df2 = pd.DataFrame(tract_PCT_oil, columns=['PCT_Hispanic', 'PCT_White', 'PCT_Black', 'PCT_Asian'])
# Calculate the mean for each column
column_means = df2.mean()

# Plot grouped bar plot for each column mean
plt.bar(column_means.index, column_means, color='Green')
plt.title('Racial demographics in Top 10 Oil Wells Tracts')
plt.xlabel('Race/Ethnicity(by H-NH)')
plt.ylabel('% of Tract Residents')
plt.show()

In [None]:
plt.savefig('Top10Oil_byRace.png')

In [None]:
# Map racial composition of census tracts around Los Angeles in general
df3 = pd.DataFrame(cens_race_geo, columns=['PCT_Hispanic', 'PCT_White', 'PCT_Black', 'PCT_Asian'])
# Calculate the mean for each column
column_means_gen = df3.mean()

# Plot grouped bar plot for each column mean
plt.bar(column_means_gen.index, column_means_gen, color='Brown')
plt.title('Racial demographics in Los Angeles County')
plt.xlabel('Race/Ethnicity(by H-NH)')
plt.ylabel('% of Tract Residents')
plt.show()