In [1]:
import io, time, json
import requests
from bs4 import BeautifulSoup
import urllib
from urllib import parse
import time
import pandas as pd

In [2]:
def retrieve_html(url, params = None):  
    if params is None:
        params = ''
    response = requests.get(url + urllib.parse.urlencode(params))
    return response.status_code, response.text

In [3]:
def parse_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    prd = soup.find("table", id="results")
    if prd is None:
        print('No results found')
        return
    prd = soup.find_all("tr")
    fund_val = prd[8].find_all("td")[-2].text
    return fund_val

In [4]:
def get_all_states(html):
    soup = BeautifulSoup(html, 'html.parser')
    states = soup.find("select", id="statefips").find_all("option")
    states_text = [t.text for t in states]
    states_values = [t['value'] for t in states]
    return zip(states_values, states_text)

In [5]:
def getFundingForQuery(url, params, search_html):
    state_list = get_all_states(search_html)
    state_damage_vals = []
    for state_value, state_text in state_list:
        params['statefips'] = state_value
        state_damage_page = retrieve_html(url, params)[1]
        state_damage_vals.append(parse_page(state_damage_page))
    return state_damage_vals

In [6]:
def getFundingDataFromNOAA(eventType='(Z) Storm Surge/Tide', beginDate_mm='01', beginDate_dd='01', 
                           beginDate_yyyy='2017', endDate_mm='01', endDate_dd='31', endDate_yyyy='2018', 
                           county='ALL', statefips='48,TEXAS'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    params = {'eventType': eventType,
                  'beginDate_mm': beginDate_mm,
                  'beginDate_dd': beginDate_dd,
                  'beginDate_yyyy': beginDate_yyyy,
                  'endDate_mm': endDate_mm,
                  'endDate_dd': endDate_dd,
                  'endDate_yyyy': endDate_yyyy,
                  'county': county,
                  'sort': 'DT',
                  'submitbutton': 'Search',
                  'statefips': statefips}
    
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?statefips=-999%2CALL"
    search_html = retrieve_html(searchUrl)[1]
    damage_values = getFundingForQuery(url, params, search_html)
    return damage_values

In [7]:
# Get list of all county's corresponding to the states
def parse_county_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    prd = soup.find('select', attrs={'name':'county'})
    if prd is None:
        print('No results found')
        return
    prd = prd.find_all("option")
    return prd

In [8]:
def getCountyList():
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?"
    search_html = retrieve_html(searchUrl + 'statefips=-999%2CALL')[1]
    state_county = {}

    print('Number of counties per state observed in NOAA.')
    for state_value, state_text in get_all_states(search_html):
        state_county[state_text] = []
        params = {'statefips': state_value}
        search_html = retrieve_html(searchUrl, params=params)[1]
        county_list = parse_county_page(search_html)
        if county_list is not None:
            state_county[state_text] = [(i['value'], i.text) for i in county_list if i is not None]        
        print('{}: {}'.format(state_text, len(state_county[state_text])))
    
    return state_county

In [14]:
state_county = getCountyList()
beginYearList = [2005, 2007, 2008, 2010, 2011, 2012, 2013, 2016, 2017]
eventTypes = ["(Z) High Surf", "(Z) Hurricane (Typhoon)", "(Z) High Wind", "(Z) Marine Strong Wind", 
              "(Z) Marine Thunderstorm Wind", "(Z) Coastal Flood", "(Z) Flood", "(Z) Storm Surge/Tide", 
              "(Z) Tropical Storm", "(Z) Tropical Depression"]

Number of counties per state observed in NOAA.
No results found
-- All States and Areas --: 0
Alabama: 68
Alaska: 66
Arizona: 16
Arkansas: 76
California: 59
Colorado: 65
Connecticut: 9
Delaware: 4
District Of Columbia: 2
Florida: 68
Georgia: 160
Hawaii: 5
Idaho: 45
Illinois: 104
Indiana: 93
Iowa: 100
Kansas: 107
Kentucky: 121
Louisiana: 65
Maine: 17
Maryland: 25
Massachusetts: 15
Michigan: 84
Minnesota: 88
Mississippi: 83
Missouri: 117
Montana: 57
Nebraska: 94
Nevada: 18
New Hampshire: 11
New Jersey: 22
New Mexico: 35
New York: 63
North Carolina: 101
North Dakota: 54
Ohio: 89
Oklahoma: 78
Oregon: 37
Pennsylvania: 68
Rhode Island: 6
South Carolina: 47
South Dakota: 67
Tennessee: 97
Texas: 255
Utah: 30
Vermont: 15
Virginia: 137
Washington: 40
West Virginia: 56
Wisconsin: 73
Wyoming: 24
Lake St Clair: 1
Hawaii Waters: 1
Gulf Of Mexico: 1
E Pacific: 1
Atlantic South: 1
Atlantic North: 1
Gulf Of Alaska: 1
Lake Huron: 1
Lake Michigan: 1
Lake Superior: 1
St Lawrence R: 1
Lake Ontario: 1
Lake 

In [13]:
#Now we have the dictionary of states and their countys. Lets extract all data for given state and county
def getNumber(string):
    val = 1
    if string.endswith('K'):
        val = 1e3
    elif string.endswith('M'):
        val = 1e6
    elif string.endswith('B'):
        val = 1e9
    return round(float(string[:-1])*val, 4)
    
def getFundingForCounty(url, params, search_html):
    state_list = get_all_states(search_html)
    state_county_damage_val = {}
    for state_value, state_text in state_list:
        params['statefips'] = state_value
        state_county_damage_val[state_text] = {}
        for county_value, county_text in state_county[state_text]:
            params['county'] = county_value
            val = 0
            for event in eventTypes:
                params['eventType'] = event
                state_damage_page = retrieve_html(url, params)[1]
                val += getNumber(parse_page(state_damage_page))
            state_county_damage_val[state_text][county_text] = val
            time.sleep(0.01)
        if state_county_damage_val[state_text].keys():
            print(state_text, 'for year', params['beginDate_yyyy'], 'received: ',
                  state_county_damage_val[state_text]['-- All --'])
    return state_county_damage_val

def getCountyLevelFundingDataFromNOAA(eventType='(Z) Storm Surge/Tide', beginDate_mm='01', beginDate_dd='01', 
                           beginDate_yyyy='2017', endDate_mm='12', endDate_dd='31', endDate_yyyy='2018', 
                           county='ALL', statefips='Texas'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    params = {'eventType': eventType,
                  'beginDate_mm': beginDate_mm,
                  'beginDate_dd': beginDate_dd,
                  'beginDate_yyyy': beginDate_yyyy,
                  'endDate_mm': endDate_mm,
                  'endDate_dd': endDate_dd,
                  'endDate_yyyy': endDate_yyyy,
                  'county': county,
                  'sort': 'DT',
                  'submitbutton': 'Search',
                  'statefips': statefips}
    
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?statefips=-999%2CALL"
    search_html = retrieve_html(searchUrl)[1]
    
    damage_values = getFundingForCounty(url, params, search_html)
    return damage_values

In [15]:
year_state_county_damage = {}
for year in beginYearList:
    year_state_county_damage[year] = getCountyLevelFundingDataFromNOAA(beginDate_yyyy=str(year), 
                                                                       endDate_yyyy=str(year))

Alabama for year 2005 received:  1163197000.0
Alaska for year 2005 received:  12519000.0
Arizona for year 2005 received:  1500000.0
Arkansas for year 2005 received:  770000.0
California for year 2005 received:  381025000.0
Colorado for year 2005 received:  322000.0
Connecticut for year 2005 received:  6115000.0
Delaware for year 2005 received:  10000.0
District Of Columbia for year 2005 received:  0.0
Florida for year 2005 received:  12068085000.0
Georgia for year 2005 received:  10701000.0
Hawaii for year 2005 received:  0.0
Idaho for year 2005 received:  47500.0
Illinois for year 2005 received:  2262000.0
Indiana for year 2005 received:  11817000.0
Iowa for year 2005 received:  7570000.0
Kansas for year 2005 received:  9742000.0
Kentucky for year 2005 received:  5885000.0
Louisiana for year 2005 received:  52764000000.0
Maine for year 2005 received:  8213000.0
Maryland for year 2005 received:  250000.0
Massachusetts for year 2005 received:  16178000.0
Michigan for year 2005 received:

ConnectionError: HTTPSConnectionPool(host='www.ncdc.noaa.gov', port=443): Max retries exceeded with url: /stormevents/listevents.jsp?eventType=%28Z%29+Storm+Surge%2FTide&beginDate_mm=01&beginDate_dd=01&beginDate_yyyy=2005&endDate_mm=12&endDate_dd=31&endDate_yyyy=2005&county=GRANT%3A51&sort=DT&submitbutton=Search&statefips=27%2CMINNESOTA (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x10d449860>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known',))

In [None]:
np.save('State_County_Damage.npy', year_state_county_damage)