# NOAA extreme weather events
The [National Oceanic and Atmospheric Administration](https://en.wikipedia.org/wiki/National_Oceanic_and_Atmospheric_Administration) has a database of extreme weather events that contains lots of detail for every year. [Link](https://www.climate.gov/maps-data/dataset/severe-storms-and-extreme-events-data-table).  I'll extract the data for 2018.

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)  # Unlimited columns

## Get official list of counties

In [176]:
import geopandas
# Import a shape file with all the counties in the US.
counties = geopandas.read_file('../data_input/1_USCounties/')

# Turn state codes from strings to integers
for col in ['STATE_FIPS', 'CNTY_FIPS', 'FIPS']:
    counties[col] = counties[col].astype(int)

official_counties = counties.FIPS.to_list()

In [189]:
counties.head()

Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,geometry
0,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283127277658 48.546679319076, ..."
1,Ferry,Washington,53,19,53019,POLYGON ((-118.8516288013387 47.94956368481996...
2,Stevens,Washington,53,65,53065,"POLYGON ((-117.438831576286 48.04411548512263,..."
3,Okanogan,Washington,53,47,53047,"POLYGON ((-118.972093862835 47.93915200536639,..."
4,Pend Oreille,Washington,53,51,53051,POLYGON ((-117.4385804303028 48.99991850672649...


## Get NOAA data

In [11]:
# The full table contains more than I want to use.
df1 = pd.read_csv('../data_input/4_NOAA Storms/StormEvents_details-ftp_v1.0_d2018_c20190422.csv')
print(df1.shape)
print(df1.columns)
df1.head(2)

(62169, 51)
Index(['BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH',
       'END_DAY', 'END_TIME', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_FIPS',
       'YEAR', 'MONTH_NAME', 'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'CZ_NAME',
       'WFO', 'BEGIN_DATE_TIME', 'CZ_TIMEZONE', 'END_DATE_TIME',
       'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
       'DEATHS_INDIRECT', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE',
       'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE', 'CATEGORY', 'TOR_F_SCALE',
       'TOR_LENGTH', 'TOR_WIDTH', 'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE',
       'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 'BEGIN_RANGE',
       'BEGIN_AZIMUTH', 'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH',
       'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'DATA_SOURCE'],
      dtype='object')


Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,CZ_TIMEZONE,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,MAGNITUDE,MAGNITUDE_TYPE,FLOOD_CAUSE,CATEGORY,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_WFO,TOR_OTHER_CZ_STATE,TOR_OTHER_CZ_FIPS,TOR_OTHER_CZ_NAME,BEGIN_RANGE,BEGIN_AZIMUTH,BEGIN_LOCATION,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,201806,6,1810,201806,6,1810,125578,753161,NEBRASKA,31,2018,June,Hail,C,69,GARDEN,LBF,06-JUN-18 18:10:00,MST-7,06-JUN-18 18:10:00,0,0,0,0,0.00K,0.00K,Public,1.0,,,,,,,,,,,36.0,N,OSHKOSH,36.0,N,OSHKOSH,41.93,-102.21,41.93,-102.21,Severe storms developed in the Nebraska Panhan...,Hail predominately penny size with some quarte...,CSV
1,201806,6,1741,201806,6,1741,125578,753160,NEBRASKA,31,2018,June,Hail,C,161,SHERIDAN,LBF,06-JUN-18 17:41:00,MST-7,06-JUN-18 17:41:00,0,0,0,0,0.00K,0.00K,Trained Spotter,1.25,,,,,,,,,,,1.0,NW,BINGHAM,1.0,NW,BINGHAM,42.03,-102.1,42.03,-102.1,Severe storms developed in the Nebraska Panhan...,Hail mainly quarter size with some half dollar...,CSV


In [194]:
# Extract only a few useful columns
df2 = df1[['STATE','STATE_FIPS','CZ_FIPS','EVENT_TYPE']].copy()

# Create new column for complete county FIPS code
state_FIPS = [str(x) for x in df2['STATE_FIPS']]
county_FIPS = [str(x) for x in df2['CZ_FIPS']]

# Make sure the string for county FIPS is 3 digits long.
county_FIPS_2 = []
for fip in county_FIPS:
    if len(fip) == 3:
        fip2 = fip
    elif len(fip) == 2:
        fip2 = '0' + fip
    elif len(fip) == 1:
        fip2 = '00' + fip
    county_FIPS_2.append(fip2)
    
# Create a full FIPS for each county
complete_FIPS = [int(x+y) for x, y in zip(state_FIPS, county_FIPS_2)]
df2['FIPS'] = complete_FIPS


# Drop entries for areas not in the list of counties that we're working
# with.  This includes counties in non-state territories (eg, Puerto Rico),
# areas over lakes and oceans, and several forecast zones that don't 
# correspond to particular counties.
df3 = df2[~df2.FIPS.isin(official_counties)].copy()

# df2 = df2.drop(columns=['STATE_FIPS','CZ_FIPS'])

In [188]:
len(set(df2.FIPS.values))

2099

In [190]:
df2.head()

Unnamed: 0,STATE,STATE_FIPS,CZ_FIPS,EVENT_TYPE,FIPS
41,TEXAS,48,158,Drought,48158
42,OKLAHOMA,40,14,Wildfire,40014
44,WEST VIRGINIA,54,30,Winter Weather,54030
45,WEST VIRGINIA,54,20,Winter Weather,54020
46,WEST VIRGINIA,54,32,Winter Weather,54032


In [61]:
# These are all event types from the metadata, edited to remove marine events
event_types = ['Astronomical Low Tide','Avalanche','Blizzard','Coastal Flood',
               'Cold/Wind Chill','Debris Flow','Dense Fog','Dense Smoke',
               'Drought','Dust Devil','Dust Storm','Excessive Heat',
               'Extreme Cold/Wind Chill','Flash Flood','Flood','Frost/Freeze',
               'Funnel Cloud','Freezing Fog','Hail','Heat','Heavy Rain',
               'Heavy Snow','High Surf','High Wind','Hurricane (Typhoon)',
               'Ice Storm','Lake-Effect Snow','Lakeshore Flood','Lightning',
               'Rip Current','Seiche','Sleet','Storm Surge/Tide','Strong Wind',
               'Thunderstorm Wind','Tornado','Tropical Depression',
               'Tropical Storm','Tsunami','Volcanic Ash','Wildfire',
               'Winter Storm','Winter Weather',]

62169

In [37]:
df2.CZ_FIPS.describe()

count    62169.000000
mean       105.247149
std        136.718774
min          1.000000
25%         25.000000
50%         63.000000
75%        117.000000
max        876.000000
Name: CZ_FIPS, dtype: float64

In [28]:
df3 = df2[df2.BEGIN_YEARMONTH < 201900]
df3.shape

(62169, 5)

In [30]:
df2.BEGIN_YEARMONTH.describe()

count     62169.000000
mean     201806.037092
std           3.030091
min      201801.000000
25%      201804.000000
50%      201806.000000
75%      201808.000000
max      201812.000000
Name: BEGIN_YEARMONTH, dtype: float64

In [31]:
# df1.EVENT_TYPE.value_counts()

In [32]:
# df1.isnull().sum()