Combine the Wind, Oil, and Gas CSV (energy) with the Wildfire and Drought Risk CSV (risk) using a shapefile of US counties.

In [6]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

In [16]:
risk = pd.read_csv('Drought_WHP.csv')
risk.head(5)

# clean up columns
risk = risk[['GEOID', 'County', 'State', 'WHP_Score', 'DroughtRisk_Score']]
risk.head(5)
risk.columns

Index(['GEOID', 'County', 'State', 'WHP_Score', 'DroughtRisk_Score'], dtype='object')

In [17]:
energy = pd.read_csv("OilGasWind.csv")
energy.head(5)
energy.columns
# lots of federal offshore values in state cols - these are areas that are a few miles off the shore, not technically part of a state

Index(['State_Gas', 'State_Oil', 'County', 'Latitude', 'Longitude',
       'Gas Production Quantity', 'Oil Production Quantity',
       'Wind Plant Capacity'],
      dtype='object')

In [18]:
# read in shapefile
counties = gpd.read_file('shapefile')
counties = counties.to_crs('EPSG:4326')
counties.head(5)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,39,71,1074048,0500000US39071,39071,Highland,6,1432479992,12194983,"POLYGON ((-83.86976 39.05553, -83.86567 39.247..."
1,6,3,1675840,0500000US06003,6003,Alpine,6,1912292630,12557304,"POLYGON ((-120.07249 38.50988, -120.0724 38.70..."
2,12,33,295737,0500000US12033,12033,Escambia,6,1701544502,563927612,"POLYGON ((-87.62999 30.87766, -87.62946 30.880..."
3,17,101,424252,0500000US17101,17101,Lawrence,6,963936864,5077783,"POLYGON ((-87.91028 38.57493, -87.90811 38.850..."
4,28,153,695797,0500000US28153,28153,Wayne,6,2099745573,7255476,"POLYGON ((-88.94317 31.78421, -88.94336 31.824..."


In [23]:
# combine energy dataset with shapefile on GEOID

# make both cols string type
risk['GEOID'] = risk['GEOID'].astype(str)
counties['GEOID'] = counties['GEOID'].astype(str)

counties_with_risk = counties.merge(
    risk[['GEOID', 'WHP_Score', 'DroughtRisk_Score']], 
    on='GEOID', 
    how='left'
)
counties_with_risk.head(5)
# len(counties_with_risk) # 3233

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry,WHP_Score,DroughtRisk_Score
0,39,71,1074048,0500000US39071,39071,Highland,6,1432479992,12194983,"POLYGON ((-83.86976 39.05553, -83.86567 39.247...",5.3,0.0
1,6,3,1675840,0500000US06003,6003,Alpine,6,1912292630,12557304,"POLYGON ((-120.07249 38.50988, -120.0724 38.70...",,
2,12,33,295737,0500000US12033,12033,Escambia,6,1701544502,563927612,"POLYGON ((-87.62999 30.87766, -87.62946 30.880...",32.2,0.09264
3,17,101,424252,0500000US17101,17101,Lawrence,6,963936864,5077783,"POLYGON ((-87.91028 38.57493, -87.90811 38.850...",2.6,0.3
4,28,153,695797,0500000US28153,28153,Wayne,6,2099745573,7255476,"POLYGON ((-88.94317 31.78421, -88.94336 31.824...",49.7,0.274699


In [25]:
# clean up dataset
counties_with_risk = counties_with_risk[['GEOID', 'NAME', 'geometry', 'WHP_Score', 'DroughtRisk_Score']]
counties_with_risk = counties_with_risk.rename(columns={'NAME':'COUNTY'})
counties_with_risk.head(5)

Unnamed: 0,GEOID,COUNTY,geometry,WHP_Score,DroughtRisk_Score
0,39071,Highland,"POLYGON ((-83.86976 39.05553, -83.86567 39.247...",5.3,0.0
1,6003,Alpine,"POLYGON ((-120.07249 38.50988, -120.0724 38.70...",,
2,12033,Escambia,"POLYGON ((-87.62999 30.87766, -87.62946 30.880...",32.2,0.09264
3,17101,Lawrence,"POLYGON ((-87.91028 38.57493, -87.90811 38.850...",2.6,0.3
4,28153,Wayne,"POLYGON ((-88.94317 31.78421, -88.94336 31.824...",49.7,0.274699


In [44]:
# combine with energy data - one latitude and longitude
energy_points = gpd.GeoDataFrame(
    energy,
    geometry=gpd.points_from_xy(energy.Longitude, energy.Latitude),
    crs="EPSG:4326" # matches counties one
)

final = gpd.sjoin(
    energy_points,
    counties_with_risk,
    how="left",
    predicate="intersects" # includes if point is on the border
)

final.head(5)

Unnamed: 0,State_Gas,State_Oil,County,Latitude,Longitude,Gas Production Quantity,Oil Production Quantity,Wind Plant Capacity,geometry,index_right,GEOID,COUNTY,WHP_Score,DroughtRisk_Score
0,,,Hawaii County,18.978,-155.688,,,0.334645,POINT (-155.688 18.978),2014.0,15001.0,Hawaii,11.7,0.106041
1,,,Maui County,20.8001,-156.539,,,0.334645,POINT (-156.539 20.8001),829.0,15009.0,Maui,10.0,0.181957
2,,,Honolulu County,21.6692,-157.9501,,,0.37126,POINT (-157.9501 21.6692),1696.0,15003.0,Honolulu,19.7,0.044702
3,,,Honolulu County,21.6804,-157.982,,,0.382505,POINT (-157.982 21.6804),1696.0,15003.0,Honolulu,19.7,0.044702
4,,Federal offshore,,26.10225,-92.0615,,0.578501,,POINT (-92.0615 26.10225),,,,,


In [45]:
# clean up
final = final.drop(columns = ["COUNTY", "index_right", "State_Gas", "State_Oil"])
final = final.rename(columns = {'WHP_Score':'Wildfire Hazard Potential Score', 'DroughtRisk_Score':'Drought Risk Score'})

# reorder
final = final.iloc[:, [0, 7, 1, 2, 6, 3, 4, 5, 8, 9]]

final.head(5)

Unnamed: 0,County,GEOID,Latitude,Longitude,geometry,Gas Production Quantity,Oil Production Quantity,Wind Plant Capacity,Wildfire Hazard Potential Score,Drought Risk Score
0,Hawaii County,15001.0,18.978,-155.688,POINT (-155.688 18.978),,,0.334645,11.7,0.106041
1,Maui County,15009.0,20.8001,-156.539,POINT (-156.539 20.8001),,,0.334645,10.0,0.181957
2,Honolulu County,15003.0,21.6692,-157.9501,POINT (-157.9501 21.6692),,,0.37126,19.7,0.044702
3,Honolulu County,15003.0,21.6804,-157.982,POINT (-157.982 21.6804),,,0.382505,19.7,0.044702
4,,,26.10225,-92.0615,POINT (-92.0615 26.10225),,0.578501,,,


In [46]:
# create csv
final.to_csv("final.csv", index=False)