In [1]:
import io, time, json
import requests
from bs4 import BeautifulSoup
import urllib
from urllib import parse
import time
import pandas as pd

In [2]:
def retrieve_html(url, params = None):  
    if params is None:
        params = ''
    response = requests.get(url + urllib.parse.urlencode(params))
    return response.status_code, response.text

In [3]:
def retrieve_County_html(url, urlevents = None, params = None):  
    if params is None:
        params = ''
    nexturl = url + '&'.join(urlevents) + '&' + urllib.parse.urlencode(params)
    response = requests.get(nexturl)
    return response.status_code, response.text

In [4]:
def parse_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    prd = soup.find("table", id="results")
    if prd is None:
        print('No results found')
        return
    prd = soup.find_all("tr")
    fund_val = prd[8].find_all("td")[-2].text
    return fund_val

In [5]:
def get_all_states(html):
    soup = BeautifulSoup(html, 'html.parser')
    states = soup.find("select", id="statefips").find_all("option")
    states_text = [t.text for t in states]
    states_values = [t['value'] for t in states]
    return zip(states_values, states_text)

In [6]:
def getFundingForQuery(url, params, search_html):
    state_list = get_all_states(search_html)
    state_damage_vals = []
    for state_value, state_text in state_list:
        params['statefips'] = state_value
        state_damage_page = retrieve_html(url, params)[1]
        state_damage_vals.append(parse_page(state_damage_page))
    return state_damage_vals

In [7]:
def getFundingDataFromNOAA(eventType='(Z) Storm Surge/Tide', beginDate_mm='01', beginDate_dd='01', 
                           beginDate_yyyy='2017', endDate_mm='01', endDate_dd='31', endDate_yyyy='2018', 
                           county='ALL', statefips='48,TEXAS'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    params = {'eventType': eventType,
                  'beginDate_mm': beginDate_mm,
                  'beginDate_dd': beginDate_dd,
                  'beginDate_yyyy': beginDate_yyyy,
                  'endDate_mm': endDate_mm,
                  'endDate_dd': endDate_dd,
                  'endDate_yyyy': endDate_yyyy,
                  'county': county,
                  'sort': 'DT',
                  'submitbutton': 'Search',
                  'statefips': statefips}
    
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?statefips=-999%2CALL"
    search_html = retrieve_html(searchUrl)[1]
    damage_values = getFundingForQuery(url, params, search_html)
    return damage_values

In [8]:
# Get list of all county's corresponding to the states
def parse_county_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    prd = soup.find('select', attrs={'name':'county'})
    if prd is None:
        print('No results found')
        return
    prd = prd.find_all("option")
    return prd

In [9]:
def getCountyList():
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?"
    search_html = retrieve_html(searchUrl + 'statefips=-999%2CALL')[1]
    state_county = {}

    print('Number of counties per state observed in NOAA.')
    for state_value, state_text in get_all_states(search_html):
        state_county[state_text] = []
        params = {'statefips': state_value}
        search_html = retrieve_html(searchUrl, params=params)[1]
        county_list = parse_county_page(search_html)
        if county_list is not None:
            state_county[state_text] = [(i['value'], i.text) for i in county_list if i is not None]        
        print('{}: {}'.format(state_text, len(state_county[state_text])))
    
    return state_county

In [14]:
#Now we have the dictionary of states and their countys. Lets extract all data for given state and county
def getNumber(string):
    val = 1
    if string.endswith('K'):
        val = 1e3
    elif string.endswith('M'):
        val = 1e6
    elif string.endswith('B'):
        val = 1e9
    return round(float(string[:-1])*val, 4)
    
def getFundingForCounty(url, eventUrl, params, search_html):
    state_list = get_all_states(search_html)
    state_county_damage_val = {}
    for state_value, state_text in state_list:
        params['statefips'] = state_value
        state_county_damage_val[state_text] = {}
        for county_value, county_text in state_county[state_text]:
            params['county'] = county_value
            code, state_damage_page = retrieve_County_html(url, eventUrl, params)
            if code != 200:
                print('Could not find %s, %s' %(county_text, state_text))
                continue
            val = getNumber(parse_page(state_damage_page))
            state_county_damage_val[state_text][county_text] = val
            time.sleep(0.5)
        if state_county_damage_val[state_text]:
            print(state_text, 'for year', params['beginDate_yyyy'], 'received: ',
                  state_county_damage_val[state_text]['-- All --'])
    return state_county_damage_val

def getCountyLevelFundingDataFromNOAA(eventUrl = '', beginDate_mm='01', beginDate_dd='01', 
                           beginDate_yyyy='2017', endDate_mm='12', endDate_dd='31', endDate_yyyy='2018', 
                           county='ALL', statefips='48,TEXAS'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    params = {'beginDate_mm': beginDate_mm,
              'beginDate_dd': beginDate_dd,
              'beginDate_yyyy': beginDate_yyyy,
              'endDate_mm': endDate_mm,
              'endDate_dd': endDate_dd,
              'endDate_yyyy': endDate_yyyy,
              'county': county,
              'statefips': statefips}
    
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?statefips=-999%2CALL"
    search_html = retrieve_html(searchUrl)[1]
    
    damage_values = getFundingForCounty(url, eventUrl, params, search_html)
    return damage_values

In [15]:
state_county = getCountyList()

Number of counties per state observed in NOAA.
No results found
-- All States and Areas --: 0
Alabama: 68
Alaska: 66
Arizona: 16
Arkansas: 76
California: 59
Colorado: 65
Connecticut: 9
Delaware: 4
District Of Columbia: 2
Florida: 68
Georgia: 160
Hawaii: 5
Idaho: 45
Illinois: 104
Indiana: 93
Iowa: 100
Kansas: 107
Kentucky: 121
Louisiana: 65
Maine: 17
Maryland: 25
Massachusetts: 15
Michigan: 84
Minnesota: 88
Mississippi: 83
Missouri: 117
Montana: 57
Nebraska: 94
Nevada: 18
New Hampshire: 11
New Jersey: 22
New Mexico: 35
New York: 63
North Carolina: 101
North Dakota: 54
Ohio: 89
Oklahoma: 78
Oregon: 37
Pennsylvania: 68
Rhode Island: 6
South Carolina: 47
South Dakota: 67
Tennessee: 97
Texas: 255
Utah: 30
Vermont: 15
Virginia: 137
Washington: 40
West Virginia: 56
Wisconsin: 73
Wyoming: 24
Lake St Clair: 1
Hawaii Waters: 1
Gulf Of Mexico: 1
E Pacific: 1
Atlantic South: 1
Atlantic North: 1
Gulf Of Alaska: 1
Lake Huron: 1
Lake Michigan: 1
Lake Superior: 1
St Lawrence R: 1
Lake Ontario: 1
Lake 

In [21]:
year_state_county_damage = {}
eventTypes = ["(Z) High Surf", "(Z) Hurricane (Typhoon)", "(Z) High Wind", "(Z) Marine Strong Wind", 
              "(Z) Marine Thunderstorm Wind", "(Z) Coastal Flood", "(Z) Flood", "(Z) Storm Surge/Tide", 
              "(Z) Tropical Storm", "(Z) Tropical Depression"]
pevents = [{'eventType':eventTypes[0]},
          {'eventType':eventTypes[1]},
          {'eventType':eventTypes[2]},
          {'eventType':eventTypes[3]},
          {'eventType':eventTypes[4]},
          {'eventType':eventTypes[5]},
          {'eventType':eventTypes[6]},
          {'eventType':eventTypes[7]},
          {'eventType':eventTypes[8]},
          {'eventType': eventTypes[9]}]
urlevents = [urllib.parse.urlencode(p) for p in pevents]
beginYearList = [2011] #[2005, 2008, 2010, 2011, 2012, 2016, 2017]

In [22]:
for year in beginYearList:
    year_state_county_damage[year] = getCountyLevelFundingDataFromNOAA(eventUrl=urlevents, beginDate_yyyy=str(year), 
                                                                       endDate_yyyy=str(year))

Alabama for year 2011 received:  170000.0
Alaska for year 2011 received:  28849000.0
Arizona for year 2011 received:  42000.0
Arkansas for year 2011 received:  176073000.0
California for year 2011 received:  34065000.0
Colorado for year 2011 received:  2691000.0
Connecticut for year 2011 received:  68213000.0
Delaware for year 2011 received:  360000.0
District Of Columbia for year 2011 received:  2406000.0
Florida for year 2011 received:  59000.0
Georgia for year 2011 received:  23000.0
Hawaii for year 2011 received:  0.0
Idaho for year 2011 received:  4165000.0
Illinois for year 2011 received:  11650000.0
Indiana for year 2011 received:  5919000.0
Iowa for year 2011 received:  47952000.0
Kansas for year 2011 received:  2697000.0
Kentucky for year 2011 received:  42862000.0
Louisiana for year 2011 received:  27528000.0
Maine for year 2011 received:  107500.0
Maryland for year 2011 received:  17121000.0
Massachusetts for year 2011 received:  59720000.0
Michigan for year 2011 received:  

In [23]:
import numpy as np
np.save('State_CountyDamages_2011.npy', year_state_county_damage)