In [1]:
#     US_Biogas_Analysis.ipynb
#     McKay Rytting
#     This notebook is an initial exploration of US Biogas data compiled by USDA

In [2]:
#   Using geopy and Nominatim for geolocating
#   pip install geopy
#   pip install Nominatim

Collecting Nominatim
  Downloading nominatim-0.1.tar.gz (1.7 kB)
Building wheels for collected packages: Nominatim
  Building wheel for Nominatim (setup.py) ... [?25ldone
[?25h  Created wheel for Nominatim: filename=nominatim-0.1-py3-none-any.whl size=2363 sha256=001a1f10cd79b3bfdfbbf54daa0f19dd5c4ad913bd488574fef3288fe942a3ee
  Stored in directory: /Users/mckayrytting/Library/Caches/pip/wheels/61/a8/8d/06a142598c8ff2d5ac76ee5c2935f22dcf67d05e208722c4c0
Successfully built Nominatim
Installing collected packages: Nominatim
Successfully installed Nominatim-0.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [2]:
# Read the data file in and list all of its tabs:
allBiogasData = pd.read_excel('BioGas_Download.xlsx', sheet_name=None)
allBiogasData.keys()

dict_keys(['Readme', 'Agriculture BioGas System', 'Landfill BioGas System', 'Wastewater BioGas System', 'Food Scrap BioGas System', 'Compressed NG Fueling Stations', 'Electric Fueling Stations', 'NG Pipelines Gathering', 'NG Pipelines Interstate', 'NG Pipelines Intrastate', '2013 Electricity Prices', '2014 Gas Prices', 'State Incentives Total Count', 'Net Metering Rules', 'Net Metering Caps', 'Livestock Dairy Inventory', 'Livestock Poultry Inventory', 'Livestock Swine Inventory', 'Crop Residue Bagasse ', 'Crop Residue Barley Straw', 'Crop Residue Cornstover', 'Crop Residue Sorghum', 'Crop Residue Rice', 'Crop Residue Wheatstraw', 'Energy Crops Cane', 'Energy Crops Miscanthus', 'Energy Crops Switchgrass', 'NRCS EQIP Practice Standards'])

In [3]:
# Create individual dataframes for tabs of interest
dfAgriculture=allBiogasData['Agriculture BioGas System']
dfLandfill=allBiogasData['Landfill BioGas System']
dfIncentives=allBiogasData['State Incentives Total Count']
dfDairyInventory=allBiogasData['Livestock Dairy Inventory']
dfSwineInventory=allBiogasData['Livestock Swine Inventory']
dfPoultryInventory=allBiogasData['Livestock Poultry Inventory']

In [4]:
# Clean up livestock inventory dataframes, combine into single dataframe of livestock counts in US counties
dfLivestockInventory=pd.merge(dfDairyInventory,dfSwineInventory, how='outer',
                              left_on=['County','State'], right_on=['NAME','STATE NAME'])
dfLivestockInventory=pd.merge(dfLivestockInventory,dfPoultryInventory, how='outer',
                              left_on=['NAME','STATE NAME'], right_on=['NAME','STATE NAME'])
dfLivestockInventory.drop(columns=['NAME','STATE NAME'],inplace=True)
dfLivestockInventory.rename(columns={'Milk Cow Inventory':'Cow Inventory',
                                     'Inventory Total Higs and Pigs':'Swine Inventory'},
                           inplace=True)
dfLivestockInventory=dfLivestockInventory.fillna(0)

In [5]:
# Want to clean up the CoDigestion column in the agriculture dataframe. First need a list of unique values to turn into categories.
dfAgriculture['CoDigestion']=dfAgriculture['CoDigestion'].fillna('None')
dfAgriculture['CoDigestion'].unique()

array(['None', 'Waste water (from cheese plant)',
       'Wastewater (warm clean up water from cheese plant)',
       'Cheese whey, sudan grass, and residuals (30 tons/day sudan silage, 20 tons/day whey)',
       'Sludge (Paper sludge substrate)', 'Additional substrates',
       'Organic Wastes',
       'Crop wastes, food wastes, haylage, and cooking grease',
       'Wastes from surrounding community', 'Food waste',
       'Crude glycerine (from biodiesel plant)',
       'Food Processing Waste (Syrup stillage from ethanol plant; Crude glycerine from biodiesel plant)',
       'Cheese whey', 'Organic wastes (silage leachate and food waste)',
       'Slaughterhouse wastewater',
       'Food processing waste (cheese whey, waste onions, and potato starch water)',
       'Food processing waste (food waste from grapes; milk/ice cream and salad dressing production)',
       'Food waste/organic waste',
       'Organic food waste and agricultural residue',
       'Food wastes (Milk processing wa

In [6]:
# Make masks for various types of CoDigestion
dfAgriculture['Codigestion']=dfAgriculture['CoDigestion']
foodMask=dfAgriculture['CoDigestion'].str.contains('food|Food')
waterMask=dfAgriculture['CoDigestion'].str.contains('water|Water')
agMask=dfAgriculture['CoDigestion'].str.contains('agri|Agri')
mixMask=dfAgriculture['CoDigestion'].str.contains('mix|Mix')
allMask=dfAgriculture['CoDigestion']=='None'
multMask=(foodMask^waterMask==agMask)

In [7]:
# Apply masks to dataframe
dfAgriculture['Codigestion'][allMask]='None'
dfAgriculture['Codigestion'][~allMask]='Other'
dfAgriculture['Codigestion'][multMask^~foodMask]='Mult'
dfAgriculture['Codigestion'][agMask & ~multMask]='Agricultural Substrates'
dfAgriculture['Codigestion'][waterMask & ~multMask]='Wastewater'
dfAgriculture['Codigestion'][foodMask & ~multMask]='Food'
dfAgriculture['Codigestion'][mixMask]='Mix'

In [8]:
# Check to make sure things look categorized correctly!
dfAgriculture[dfAgriculture['Codigestion']=='Mult'][['CoDigestion','Codigestion']]

Unnamed: 0,CoDigestion,Codigestion
78,"Food processing waste (cheese whey, waste onio...",Mult
87,Organic food waste and agricultural residue,Mult
123,"Food wastes (molasses processor wash water, Je...",Mult
141,Agricultural substrates (fish processing waste...,Mult
151,"Agricultural substrates (waste grain, food was...",Mult


In [9]:
# Rename Codigestion columns to make a bit more sense
dfAgriculture.rename(columns={'CoDigestion':'CoDigestion Details'})
dfAgriculture.head()

Unnamed: 0,Name,Full Address,City,State,Year Opened,Year Closed,Start Date,Owner,Developer,Emission Reductions MMTCO2E/Year,Digester Typer,Farm Type,Population Feeding Digester,CoDigestion,Biogas Generation Estimate (Cubic Feet a Day),Biogas End Use,Boiler Capacity (BTU an hour),Baseline System,Codigestion
0,Cargill - Sandy River Farm,"Morrilton, AR United States",Morrilton,AR,2008,,,,,2847.284662,Covered Lagoon,Swine,4200,,,Flared Full Time,,Storage Lagoon,
1,Bob Giacomini Dairy,"Point Reyes Station, CA United States",Point Reyes Station,CA,2009,,,,Williams Engineering Associates,1592.63626,Covered Lagoon,Dairy,300,Waste water (from cheese plant),25000.0,Cogeneration,,Storage Lagoon,Wastewater
2,Bullfrog Dairy,"Imperial, CA United States",Imperial,CA,2008,,,,"RCM International, LLC",17518.99886,Covered Lagoon,Dairy,3300,,,Electricity,,Storage Lagoon,
3,CAL-Denier Dairy,"Galt, CA United States",Galt,CA,2008,,,,"RCM International, LLC",3983.482409,Covered Lagoon,Dairy,900,,33000.0,Electricity,,Storage Lagoon,
4,Castelanelli Bros. Dairy,"Lodi, CA United States",Lodi,CA,2004,,,,"RCM International, LLC; Cover installed by Env...",12582.92503,Covered Lagoon,Dairy,3213,,89148.0,Electricity,,Storage Lagoon,


In [10]:
# What are the potential end uses?
dfAgriculture['Biogas End Use'].unique()

array(['Flared Full Time', 'Cogeneration', 'Electricity',
       'Cogeneration; Boiler/Furnace Fuel', 'Electricity; CNG', nan,
       'Boiler/Furnace Fuel', 'Electricity; Boiler/Furnace Fuel',
       'Cogeneration; CNG', 'Cogeneration; Electricity',
       'Boiler/Furnace Fuel; Electricity', 'Pipeline Gas', 'electricity'],
      dtype=object)

In [14]:
# Want only one entry in end use column (Flared, Heat, Electricity, Cogeneration, Sales), consider making second end use column?
flaredMask=dfAgriculture['Biogas End Use'].str.contains('Flare')
electricityMask=dfAgriculture['Biogas End Use'].str.contains('elec|Elec')
cogenerationMask=dfAgriculture['Biogas End Use'].str.contains('Cogen')
cngMask=dfAgriculture['Biogas End Use'].str.contains('CNG')
furnaceMask=dfAgriculture['Biogas End Use'].str.contains('Furn')
pipelineMask=dfAgriculture['Biogas End Use'].str.contains('Gas')
dfAgriculture['Biogas End Use']=dfAgriculture['Biogas End Use'].fillna('Unknown')

In [15]:
# Apply End Use masks to dataframe
dfAgriculture['Biogas End Use'][flaredMask]='Flared'
dfAgriculture['Biogas End Use'][electricityMask]='Electricity'
dfAgriculture['Biogas End Use'][cogenerationMask]='Cogeneration'
dfAgriculture['Biogas End Use'][cngMask]='Sales'
dfAgriculture['Biogas End Use'][furnaceMask]='Heat'
dfAgriculture['Biogas End Use'][pipelineMask]='Sales'
dfAgriculture['Biogas End Use'][electricityMask & furnaceMask]='Cogeneration'
dfAgriculture['Biogas End Use'][electricityMask]='Electricity'
dfAgriculture['Biogas End Use'].unique()


array(['Flared', 'Cogeneration', 'Electricity', 'Heat', 'Unknown',
       'Sales'], dtype=object)

In [17]:
#   Concatenate City and State to get single city/state column for geocoding
dfAgriculture['City/State']=dfAgriculture['City']+dfAgriculture['State']

In [22]:
geolocator=Nominatim(user_agent='McKay')
latLon=RateLimiter(geolocator.geocode, min_delay_seconds=1)
dfAgriculture['Location']=dfAgriculture['City/State'].apply(latLon)
dfAgriculture['Point']=dfAgriculture['Location'].apply(lambda loc: tuple(loc.point) if loc else None)
dfAgriculture[['Latitude','Longitude','Altitude']]=pd.DataFrame(dfAgriculture['Point'].tolist(), index=dfAgriculture.index)

In [23]:
dfAgriculture.head()

Unnamed: 0,Name,Full Address,City,State,Year Opened,Year Closed,Start Date,Owner,Developer,Emission Reductions MMTCO2E/Year,...,Biogas End Use,Boiler Capacity (BTU an hour),Baseline System,Codigestion,City/State,Location,Point,Latitude,Longitude,Altitude
0,Cargill - Sandy River Farm,"Morrilton, AR United States",Morrilton,AR,2008,,,,,2847.284662,...,Flared,,Storage Lagoon,,Morrilton AR,"(Morrilton, Conway County, Arkansas, 72110, Un...","(35.1509173, -92.7440538, 0.0)",35.150917,-92.744054,0.0
1,Bob Giacomini Dairy,"Point Reyes Station, CA United States",Point Reyes Station,CA,2009,,,,Williams Engineering Associates,1592.63626,...,Cogeneration,,Storage Lagoon,Wastewater,Point Reyes Station CA,"(Point Reyes Station, Marin County, California...","(38.0690895, -122.8069356, 0.0)",38.069089,-122.806936,0.0
2,Bullfrog Dairy,"Imperial, CA United States",Imperial,CA,2008,,,,"RCM International, LLC",17518.99886,...,Electricity,,Storage Lagoon,,Imperial CA,"(Imperial, Saskatchewan, S0G 2J0, Canada, (51....","(51.346117, -105.4355653, 0.0)",51.346117,-105.435565,0.0
3,CAL-Denier Dairy,"Galt, CA United States",Galt,CA,2008,,,,"RCM International, LLC",3983.482409,...,Electricity,,Storage Lagoon,,Galt CA,"(Galt, City of Cambridge, Region of Waterloo, ...","(43.3589869, -80.3152471, 0.0)",43.358987,-80.315247,0.0
4,Castelanelli Bros. Dairy,"Lodi, CA United States",Lodi,CA,2004,,,,"RCM International, LLC; Cover installed by Env...",12582.92503,...,Electricity,,Storage Lagoon,,Lodi CA,"(Lodi, North Stormont, Stormont, Dundas and Gl...","(45.2232055, -75.0073385, 0.0)",45.223205,-75.007339,0.0
