In [1]:
import io, time, json
import requests
from bs4 import BeautifulSoup
import urllib
from urllib import parse
import time
import pandas as pd

In [32]:
def retrieve_html(url, params = None):  
    if params is None:
        params = ''
    response = requests.get(url + urllib.parse.urlencode(params))
    return response.status_code, response.text

In [33]:
def retrieve_County_html(url, urlevents = None, params = None):  
    if params is None:
        params = ''
    nexturl = url + '&'.join(urlevents) + '&' + urllib.parse.urlencode(params)
    response = requests.get(nexturl)
    return response.status_code, response.text

In [34]:
def parse_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    prd = soup.find("table", id="results")
    if prd is None:
        print('No results found')
        return
    prd = soup.find_all("tr")
    fund_val = prd[8].find_all("td")[-2].text
    return fund_val

In [35]:
def get_all_states(html):
    soup = BeautifulSoup(html, 'html.parser')
    states = soup.find("select", id="statefips").find_all("option")
    states_text = [t.text for t in states]
    states_values = [t['value'] for t in states]
    return zip(states_values, states_text)

In [36]:
def getFundingForQuery(url, params, search_html):
    state_list = get_all_states(search_html)
    state_damage_vals = []
    for state_value, state_text in state_list:
        params['statefips'] = state_value
        state_damage_page = retrieve_html(url, params)[1]
        state_damage_vals.append(parse_page(state_damage_page))
    return state_damage_vals

In [37]:
def getFundingDataFromNOAA(eventType='(Z) Storm Surge/Tide', beginDate_mm='01', beginDate_dd='01', 
                           beginDate_yyyy='2017', endDate_mm='01', endDate_dd='31', endDate_yyyy='2018', 
                           county='ALL', statefips='48,TEXAS'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    params = {'eventType': eventType,
                  'beginDate_mm': beginDate_mm,
                  'beginDate_dd': beginDate_dd,
                  'beginDate_yyyy': beginDate_yyyy,
                  'endDate_mm': endDate_mm,
                  'endDate_dd': endDate_dd,
                  'endDate_yyyy': endDate_yyyy,
                  'county': county,
                  'sort': 'DT',
                  'submitbutton': 'Search',
                  'statefips': statefips}
    
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?statefips=-999%2CALL"
    search_html = retrieve_html(searchUrl)[1]
    damage_values = getFundingForQuery(url, params, search_html)
    return damage_values

In [38]:
# Get list of all county's corresponding to the states
def parse_county_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    prd = soup.find('select', attrs={'name':'county'})
    if prd is None:
        print('No results found')
        return
    prd = prd.find_all("option")
    return prd

In [39]:
def getCountyList():
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?"
    search_html = retrieve_html(searchUrl + 'statefips=-999%2CALL')[1]
    state_county = {}

    print('Number of counties per state observed in NOAA.')
    for state_value, state_text in get_all_states(search_html):
        state_county[state_text] = []
        params = {'statefips': state_value}
        search_html = retrieve_html(searchUrl, params=params)[1]
        county_list = parse_county_page(search_html)
        if county_list is not None:
            state_county[state_text] = [(i['value'], i.text) for i in county_list if i is not None]        
        print('{}: {}'.format(state_text, len(state_county[state_text])))
    
    return state_county

In [30]:
def test(beginDate_mm='01', beginDate_dd='01', 
       beginDate_yyyy='2017', endDate_mm='12', endDate_dd='31', endDate_yyyy='2017', 
       county='ALL', statefips='48,CTEXAS'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    eventTypes = ["(Z) High Surf", "(Z) Hurricane (Typhoon)", "(Z) High Wind", "(Z) Marine Strong Wind", 
                  "(Z) Marine Thunderstorm Wind", "(Z) Coastal Flood", "(Z) Flood", "(Z) Storm Surge/Tide", 
                  "(Z) Tropical Storm", "(Z) Tropical Depression"]
    params = {'beginDate_mm': beginDate_mm,
              'beginDate_dd': beginDate_dd,
              'beginDate_yyyy': beginDate_yyyy,
              'endDate_mm': endDate_mm,
              'endDate_dd': endDate_dd,
              'endDate_yyyy': endDate_yyyy,
              'county': county,
              'sort': 'DT',
              'submitbutton': 'Search',
              'statefips': statefips}
    pevents = [{'eventType':eventTypes[0]},
          {'eventType':eventTypes[1]},
          {'eventType':eventTypes[2]},
          {'eventType':eventTypes[3]},
          {'eventType':eventTypes[4]},
          {'eventType':eventTypes[5]},
          {'eventType':eventTypes[6]},
          {'eventType':eventTypes[7]},
          {'eventType':eventTypes[8]},
          {'eventType': eventTypes[9]}]
    urlevents = [urllib.parse.urlencode(p) for p in pevents]
    nexturl = url + '&'.join(urlevents) + '&' + urllib.parse.urlencode(params)


In [45]:
#Now we have the dictionary of states and their countys. Lets extract all data for given state and county
def getNumber(string):
    val = 1
    if string.endswith('K'):
        val = 1e3
    elif string.endswith('M'):
        val = 1e6
    elif string.endswith('B'):
        val = 1e9
    return round(float(string[:-1])*val, 4)
    
def getFundingForCounty(url, eventUrl, params, search_html):
    
    state_list = get_all_states(search_html)
    state_county_damage_val = {}
    for state_value, state_text in state_list:
        params['statefips'] = state_value
        state_county_damage_val[state_text] = {}
        for county_value, county_text in state_county[state_text]:
            params['county'] = county_value
            code, state_damage_page = retrieve_County_html(url, eventUrl, params)
            if code != 200:
                print('Could not find %s, %s' %(county_text, state_text))
                continue
            val = getNumber(parse_page(state_damage_page))
            state_county_damage_val[state_text][county_text] = val
            time.sleep(0.5)
        if state_county_damage_val[state_text]:
            print(state_text, 'for year', params['beginDate_yyyy'], 'received: ',
                  state_county_damage_val[state_text]['-- All --'])
            print(state_county_damage_val[state_text])
    return state_county_damage_val

def getCountyLevelFundingDataFromNOAA(eventUrl = '', beginDate_mm='01', beginDate_dd='01', 
                           beginDate_yyyy='2017', endDate_mm='12', endDate_dd='31', endDate_yyyy='2018', 
                           county='ALL', statefips='48,TEXAS'):
    url = "https://www.ncdc.noaa.gov/stormevents/listevents.jsp?"
    params = {'beginDate_mm': beginDate_mm,
              'beginDate_dd': beginDate_dd,
              'beginDate_yyyy': beginDate_yyyy,
              'endDate_mm': endDate_mm,
              'endDate_dd': endDate_dd,
              'endDate_yyyy': endDate_yyyy,
              'county': county,
              'statefips': statefips}
    
    searchUrl = "https://www.ncdc.noaa.gov/stormevents/choosedates.jsp?statefips=-999%2CALL"
    search_html = retrieve_html(searchUrl)[1]
    
    damage_values = getFundingForCounty(url, eventUrl, params, search_html)
    return damage_values

In [None]:
year_state_county_damage = {}
pevents = [{'eventType':eventTypes[0]},
          {'eventType':eventTypes[1]},
          {'eventType':eventTypes[2]},
          {'eventType':eventTypes[3]},
          {'eventType':eventTypes[4]},
          {'eventType':eventTypes[5]},
          {'eventType':eventTypes[6]},
          {'eventType':eventTypes[7]},
          {'eventType':eventTypes[8]},
          {'eventType': eventTypes[9]}]
urlevents = [urllib.parse.urlencode(p) for p in pevents]
state_county = getCountyList()
beginYearList = [2005, 2007, 2008, 2010, 2011, 2012, 2013, 2016, 2017]

In [46]:
for year in beginYearList:
    year_state_county_damage[year] = getCountyLevelFundingDataFromNOAA(eventUrl=urlevents, beginDate_yyyy=str(year), 
                                                                       endDate_yyyy=str(year))

Alabama for year 2005 received:  1164000000.0
{'-- All --': 1164000000.0, 'Autauga': 260000.0, 'Baldwin': 0.0, 'Barbour': 77000.0, 'Bibb': 260000.0, 'Blount': 97000.0, 'Bullock': 64000.0, 'Butler': 0.0, 'Calhoun': 56000.0, 'Chambers': 34000.0, 'Cherokee': 108000.0, 'Chilton': 180000.0, 'Choctaw': 0.0, 'Clarke': 0.0, 'Clay': 91000.0, 'Cleburne': 49000.0, 'Coffee': 300000.0, 'Colbert': 0.0, 'Conecuh': 0.0, 'Coosa': 140000.0, 'Covington': 0.0, 'Crenshaw': 0.0, 'Cullman': 0.0, 'Dale': 300000.0, 'Dallas': 855000.0, 'Dekalb': 0.0, 'Elmore': 240000.0, 'Escambia': 0.0, 'Etowah': 47000.0, 'Fayette': 555000.0, 'Franklin': 0.0, 'Geneva': 150000.0, 'Greene': 1988000.0, 'Hale': 2178000.0, 'Henry': 100000.0, 'Houston': 650000.0, 'Jackson': 0.0, 'Jefferson': 6400000.0, 'Lamar': 1860000.0, 'Lauderdale': 0.0, 'Lawrence': 5000.0, 'Lee': 55000.0, 'Limestone': 0.0, 'Lowndes': 360000.0, 'Macon': 90000.0, 'Madison': 0.0, 'Marengo': 3305000.0, 'Marion': 3060000.0, 'Marshall': 0.0, 'Mobile': 0.0, 'Monroe': 0.

KeyboardInterrupt: 

In [None]:
np.save('State_County_Damage.npy', year_state_county_damage)