In [1]:
#     US_Biogas_Analysis.ipynb
#     McKay Rytting
#     This notebook is an initial exploration of US Biogas data compiled by USDA

In [2]:
#   Using geopy and Nominatim for geolocating
#   pip install geopy
#   pip install Nominatim

Collecting Nominatim
  Downloading nominatim-0.1.tar.gz (1.7 kB)
Building wheels for collected packages: Nominatim
  Building wheel for Nominatim (setup.py) ... [?25ldone
[?25h  Created wheel for Nominatim: filename=nominatim-0.1-py3-none-any.whl size=2363 sha256=001a1f10cd79b3bfdfbbf54daa0f19dd5c4ad913bd488574fef3288fe942a3ee
  Stored in directory: /Users/mckayrytting/Library/Caches/pip/wheels/61/a8/8d/06a142598c8ff2d5ac76ee5c2935f22dcf67d05e208722c4c0
Successfully built Nominatim
Installing collected packages: Nominatim
Successfully installed Nominatim-0.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [2]:
# Read the data files in:
allBiogasData = pd.read_excel('BioGas_Download.xlsx', sheet_name=None)
newFarmData=pd.read_excel('agstar-livestock-ad-database.xlsx', sheet_name=None)

In [3]:
# First data file keys
allBiogasData.keys()

dict_keys(['Readme', 'Agriculture BioGas System', 'Landfill BioGas System', 'Wastewater BioGas System', 'Food Scrap BioGas System', 'Compressed NG Fueling Stations', 'Electric Fueling Stations', 'NG Pipelines Gathering', 'NG Pipelines Interstate', 'NG Pipelines Intrastate', '2013 Electricity Prices', '2014 Gas Prices', 'State Incentives Total Count', 'Net Metering Rules', 'Net Metering Caps', 'Livestock Dairy Inventory', 'Livestock Poultry Inventory', 'Livestock Swine Inventory', 'Crop Residue Bagasse ', 'Crop Residue Barley Straw', 'Crop Residue Cornstover', 'Crop Residue Sorghum', 'Crop Residue Rice', 'Crop Residue Wheatstraw', 'Energy Crops Cane', 'Energy Crops Miscanthus', 'Energy Crops Switchgrass', 'NRCS EQIP Practice Standards'])

In [4]:
newFarmData.keys()

dict_keys(['Operational and Construction', 'Shutdown'])

In [5]:
# Create individual dataframes for tabs of interest
dfAgriculture=newFarmData['Operational and Construction']
dfLandfill=allBiogasData['Landfill BioGas System']
dfIncentives=allBiogasData['State Incentives Total Count']
dfDairyInventory=allBiogasData['Livestock Dairy Inventory']
dfSwineInventory=allBiogasData['Livestock Swine Inventory']
dfPoultryInventory=allBiogasData['Livestock Poultry Inventory']

In [6]:
# Clean up livestock inventory dataframes, combine into single dataframe of livestock counts in US counties
dfLivestockInventory=pd.merge(dfDairyInventory,dfSwineInventory, how='outer',
                              left_on=['County','State'], right_on=['NAME','STATE NAME'])
dfLivestockInventory=pd.merge(dfLivestockInventory,dfPoultryInventory, how='outer',
                              left_on=['NAME','STATE NAME'], right_on=['NAME','STATE NAME'])
dfLivestockInventory.drop(columns=['NAME','STATE NAME'],inplace=True)
dfLivestockInventory.rename(columns={'Milk Cow Inventory':'Cow Inventory',
                                     'Inventory Total Higs and Pigs':'Swine Inventory'},
                           inplace=True)
dfLivestockInventory=dfLivestockInventory.fillna(0)

In [7]:
# Want to clean up the CoDigestion column in the agriculture dataframe. First need a list of unique values to turn into categories.
dfAgriculture['Co-Digestion']=dfAgriculture['Co-Digestion'].fillna('None')
dfAgriculture['Co-Digestion'].unique()

array(['None', 'Process Water',
       'Agricultural Residues; Dairy Processing Wastes',
       'Other Feedstocks', 'Dairy Processing Wastes', 'Food Wastes',
       'Agricultural Residues; Food Processing Wastes; Slaughterhouses; Other Feedstocks',
       'Agricultural Residues', 'Fats, Oils, Greases; Food Wastes',
       'Food Processing Wastes', 'Fats, Oils, Greases',
       'Agricultural Residues; Fats, Oils, Greases; Food Processing Wastes; Food Wastes',
       'Agricultural Residues; Beverage and Distillery Wastes; Dairy Processing Wastes; Fats, Oils, Greases; Food Processing Wastes; Food Wastes; Process Water; Slaughterhouses; Other Feedstocks',
       'Dairy Processing Wastes; Fats, Oils, Greases; Food Processing Wastes; Food Wastes; Other Feedstocks',
       'Fats, Oils, Greases; Process Water',
       'Food Wastes; Slaughterhouses',
       'Agricultural Residues; Dairy Processing Wastes; Food Wastes; Process Water',
       'Dairy Processing Wastes; Food Wastes',
       'Dairy 

In [8]:
# Make masks for various types of CoDigestion
dfAgriculture['Codigestion']=dfAgriculture['Co-Digestion']
foodMask=dfAgriculture['Co-Digestion'].str.contains('food|Food')
waterMask=dfAgriculture['Co-Digestion'].str.contains('water|Water')
agMask=dfAgriculture['Co-Digestion'].str.contains('agri|Agri')
mixMask=dfAgriculture['Co-Digestion'].str.contains('mix|Mix')
allMask=dfAgriculture['Co-Digestion']=='None'
multMask=(foodMask^waterMask==agMask)

In [9]:
# Apply masks to dataframe
dfAgriculture['Codigestion'][allMask]='None'
dfAgriculture['Codigestion'][~allMask]='Other'
dfAgriculture['Codigestion'][multMask^~foodMask]='Mult'
dfAgriculture['Codigestion'][agMask & ~multMask]='Agricultural Substrates'
dfAgriculture['Codigestion'][waterMask & ~multMask]='Wastewater'
dfAgriculture['Codigestion'][foodMask & ~multMask]='Food'
dfAgriculture['Codigestion'][mixMask]='Mix'

In [10]:
# Check to make sure things look categorized correctly!
dfAgriculture[dfAgriculture['Codigestion']=='Mult'][['Co-Digestion','Codigestion']]

Unnamed: 0,Co-Digestion,Codigestion
51,Agricultural Residues; Food Processing Wastes;...,Mult
73,"Agricultural Residues; Fats, Oils, Greases; Fo...",Mult
150,Agricultural Residues; Food Wastes,Mult
157,Dairy Processing Wastes; Food Wastes; Process ...,Mult
160,Food Wastes; Process Water,Mult
173,Food Wastes; Process Water,Mult
175,Food Wastes; Process Water,Mult
177,Food Wastes; Process Water,Mult
179,Food Wastes; Process Water,Mult
194,Food Wastes; Process Water,Mult


In [11]:
# Rename Codigestion columns to make a bit more sense
dfAgriculture.rename(columns={'Co-Digestion':'CoDigestion Details'})
dfAgriculture.head()

Unnamed: 0,Project Name,Project Type,City,County,State,Digester Type,Status,Year Operational,Animal/Farm Type(s),Cattle,...,Swine,Co-Digestion,Biogas Generation Estimate (cu_ft/day),Electricity Generated (kWh/yr),Biogas End Use(s),System Designer(s)_Developer(s) and Affiliates,Receiving Utility,Total Emission Reductions (MTCO2e/yr),Awarded USDA Funding?,Codigestion
0,Cargill - Sandy River Farm Digester,Farm Scale,Morrilton,Conway,AR,Covered Lagoon,Operational,2008.0,Swine,,...,8400.0,,1814400.0,,Flared Full-time,Martin Construction Resource LLC (formerly RCM...,,4002.460092,,
1,Paloma Dairy Digester,Farm Scale,Gila Bend,Maricopa,AZ,Covered Lagoon,Construction,,Dairy,,...,,,,,CNG,"Fortistar, LLC [Project Developer]; Industrial...",Southwest Gas Company,90025.627853,,
2,Stotz Southern Dairy Digester,Farm Scale,Buckeye,Maricopa,AZ,Covered Lagoon,Operational,2011.0,Dairy,,...,,,,5256000.0,Electricity,Chapel Street Environmental [System Design Eng...,Arizona Public Service,138786.936883,,
3,Triple G Dairy Digester,Farm Scale,Buckeye,Maricopa,AZ,Covered Lagoon,Operational,2011.0,Dairy,,...,,,,4467600.0,Cogeneration,Chapel Street Environmental [System Design Eng...,Arizona Public Service Co.,39196.47198,,
4,4K Dairy Digester,Farm Scale,Pixley,Tulare,CA,Covered Lagoon,Operational,2020.0,Dairy,,...,,,,,CNG,Maas Energy Works [Project Developer],SoCalGas,39694.379011,,


In [12]:
# What are the potential end uses?
dfAgriculture['Biogas End Use(s)'].unique()

array(['Flared Full-time', 'CNG', 'Electricity', 'Cogeneration',
       'Cogeneration; Refrigeration', 'Electricity; CNG',
       'Boiler/Furnace fuel; CNG', 'Cogeneration; Boiler/Furnace fuel',
       'Electricity; Cogeneration', 'Boiler/Furnace fuel',
       'Electricity; Boiler/Furnace fuel', 'Pipeline Gas',
       'Cogeneration; CNG', 'Pipeline to Electricity',
       'Cogeneration; Pipeline Gas', nan,
       'Electricity; Boiler/Furnace fuel; CNG',
       'Electricity; Cogeneration; Boiler/Furnace fuel',
       'Electricity; Pipeline Gas'], dtype=object)

In [13]:
# For unknown end gas uses, fill as unknown
dfAgriculture['Biogas End Use(s)']=dfAgriculture['Biogas End Use(s)'].fillna('Unknown')

In [14]:
# Want only one entry in end use column (Flared, Heat, Electricity, Cogeneration, Sales), consider making second end use column?
flaredMask=dfAgriculture['Biogas End Use(s)'].str.contains('Flare')
electricityMask=dfAgriculture['Biogas End Use(s)'].str.contains('elec|Elec')
cogenerationMask=dfAgriculture['Biogas End Use(s)'].str.contains('Cogen')
cngMask=dfAgriculture['Biogas End Use(s)'].str.contains('CNG')
furnaceMask=dfAgriculture['Biogas End Use(s)'].str.contains('Furn')
pipelineMask=dfAgriculture['Biogas End Use(s)'].str.contains('Gas')


In [15]:
# Apply End Use masks to dataframe
dfAgriculture['Biogas End Use(s)'][flaredMask]='Flared'
dfAgriculture['Biogas End Use(s)'][electricityMask]='Electricity'
dfAgriculture['Biogas End Use(s)'][cogenerationMask]='Cogeneration'
dfAgriculture['Biogas End Use(s)'][cngMask]='Sales'
dfAgriculture['Biogas End Use(s)'][furnaceMask]='Heat'
dfAgriculture['Biogas End Use(s)'][pipelineMask]='Sales'
dfAgriculture['Biogas End Use(s)'][electricityMask & furnaceMask]='Cogeneration'
dfAgriculture['Biogas End Use(s)'][electricityMask]='Electricity'
dfAgriculture['Biogas End Use(s)'].unique()


array(['Flared', 'Sales', 'Electricity', 'Cogeneration', 'Heat',
       'Unknown'], dtype=object)

In [16]:
#   Concatenate City and State to get single city/state column for geocoding
dfAgriculture['City/State']=dfAgriculture['City']+', '+dfAgriculture['State']

In [38]:
geolocator=Nominatim(user_agent='McKay')
latLon=RateLimiter(geolocator.geocode, min_delay_seconds=1)
dfAgriculture['Location']=dfAgriculture['City/State'].apply(latLon)
dfAgriculture['Point']=dfAgriculture['Location'].apply(lambda loc: tuple(loc.point) if loc else None)
dfAgriculture[['Latitude','Longitude','Altitude']]=pd.DataFrame(dfAgriculture['Point'].tolist(), index=dfAgriculture.index)
dfAgriculture=dfAgriculture.drop(['Point','Altitude','Location'])

KeyError: "['Point' 'Altitude' 'Location'] not found in axis"

In [39]:
dfAgriculture[dfAgriculture['Biogas End Use(s)']=='Sales']

Unnamed: 0,Project Name,Project Type,City,County,State,Digester Type,Status,Year Operational,Animal/Farm Type(s),Cattle,...,Receiving Utility,Total Emission Reductions (MTCO2e/yr),Awarded USDA Funding?,Codigestion,City/State,Location,Point,Latitude,Longitude,Altitude
1,Paloma Dairy Digester,Farm Scale,Gila Bend,Maricopa,AZ,Covered Lagoon,Construction,,Dairy,,...,Southwest Gas Company,90025.627853,,,"Gila Bend, AZ","(Gila Bend, Maricopa County, Arizona, 85337, U...","(32.9478267, -112.7168239, 0.0)",32.947827,-112.716824,0.0
4,4K Dairy Digester,Farm Scale,Pixley,Tulare,CA,Covered Lagoon,Operational,2020.0,Dairy,,...,SoCalGas,39694.379011,,,"Pixley, CA","(Pixley, West Orrland Avenue, Pixley, Tulare C...","(35.978775, -119.29479637596647, 0.0)",35.978775,-119.294796,0.0
13,Circle A Dairy Digester,Farm Scale,Pixley,Tulare,CA,Covered Lagoon,Operational,2018.0,Dairy,,...,SoCalGas,34692.887256,,,"Pixley, CA","(Pixley, West Orrland Avenue, Pixley, Tulare C...","(35.978775, -119.29479637596647, 0.0)",35.978775,-119.294796,0.0
14,Cornerstone Dairy Digester,Farm Scale,Tipton,Tulare,CA,Covered Lagoon,Operational,2019.0,Dairy,,...,SoCalGas,58946.152832,,,"Tipton, CA","(Tipton, Avenue 152, Tipton, Tulare County, Ca...","(36.058815949999996, -119.31294805849555, 0.0)",36.058816,-119.312948,0.0
16,Decade Dairy Digester,Farm Scale,Tulare,Tulare,CA,Covered Lagoon,Construction,2020.0,Dairy,,...,SoCalGas,82167.364553,,,"Tulare, CA","(Tulare County, California, United States of A...","(36.2516475, -118.852583, 0.0)",36.251647,-118.852583,0.0
19,FM Jerseys Dairy Digester,Farm Scale,Tipton,Tulare,CA,Covered Lagoon,Construction,2019.0,Dairy,,...,SoCalGas,41282.154172,,,"Tipton, CA","(Tipton, Avenue 152, Tipton, Tulare County, Ca...","(36.058815949999996, -119.31294805849555, 0.0)",36.058816,-119.312948,0.0
22,K & M Visser Dairy Digester,Farm Scale,Pixley,Tulare,CA,Covered Lagoon,Operational,2019.0,Dairy,,...,SoCalGas,72918.574244,,,"Pixley, CA","(Pixley, West Orrland Avenue, Pixley, Tulare C...","(35.978775, -119.29479637596647, 0.0)",35.978775,-119.294796,0.0
23,Legacy Dairy Digester,Farm Scale,Pixley,Tulare,CA,Covered Lagoon,Operational,2018.0,Dairy,,...,SoCalGas,20307.644302,,,"Pixley, CA","(Pixley, West Orrland Avenue, Pixley, Tulare C...","(35.978775, -119.29479637596647, 0.0)",35.978775,-119.294796,0.0
24,Little Rock Dairy Digester,Farm Scale,Tipton,Tulare,CA,Covered Lagoon,Operational,2019.0,Dairy,,...,SoCalGas,55167.24795,,,"Tipton, CA","(Tipton, Avenue 152, Tipton, Tulare County, Ca...","(36.058815949999996, -119.31294805849555, 0.0)",36.058816,-119.312948,0.0
25,Lone Oak #1 Dairy Digester,Farm Scale,Hanford,Kings,CA,Covered Lagoon,Construction,2020.0,Dairy,,...,SoCalGas,106380.93575,,,"Hanford, CA","(Hanford, Kings County, California, 93230, Uni...","(36.3274502, -119.6456844, 0.0)",36.32745,-119.645684,0.0
