In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import datetime as dt

## Read CSV File into DataFrames

In [2]:
# global land temperature data by city, with latitude/longitude values
filename = 'Global-Land-Temperatures-By-City.csv'
temp_df = pd.read_csv(filename)
temp_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [3]:
temp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8599212 entries, 0 to 8599211
Data columns (total 7 columns):
dt                               object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
City                             object
Country                          object
Latitude                         object
Longitude                        object
dtypes: float64(2), object(5)
memory usage: 459.2+ MB


In [4]:
# US federal emergency data, join on county
filename1 = 'federal_emergencies.csv'
disaster_df = pd.read_csv(filename1)
disaster_df.head()

Unnamed: 0,Declaration Number,Declaration Type,Declaration Date,State,County,Disaster Type,Disaster Title,Start Date,End Date,Close Date,Individual Assistance Program,Individuals & Households Program,Public Assistance Program,Hazard Mitigation Program
0,DR-1,Disaster,05/02/1953,GA,,Tornado,Tornado,05/02/1953,05/02/1953,06/01/1954,Yes,No,Yes,Yes
1,DR-2,Disaster,05/15/1953,TX,,Tornado,Tornado and Heavy Rainfall,05/15/1953,05/15/1953,01/01/1958,Yes,No,Yes,Yes
2,DR-3,Disaster,05/29/1953,LA,,Flood,Flood,05/29/1953,05/29/1953,02/01/1960,Yes,No,Yes,Yes
3,DR-4,Disaster,06/02/1953,MI,,Tornado,Tornado,06/02/1953,06/02/1953,02/01/1956,Yes,No,Yes,Yes
4,DR-5,Disaster,06/06/1953,MT,,Flood,Floods,06/06/1953,06/06/1953,12/01/1955,Yes,No,Yes,Yes


In [5]:
disaster_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46185 entries, 0 to 46184
Data columns (total 14 columns):
Declaration Number                  46185 non-null object
Declaration Type                    46185 non-null object
Declaration Date                    46185 non-null object
State                               46185 non-null object
County                              45988 non-null object
Disaster Type                       46185 non-null object
Disaster Title                      46185 non-null object
Start Date                          46185 non-null object
End Date                            45843 non-null object
Close Date                          35210 non-null object
Individual Assistance Program       46185 non-null object
Individuals & Households Program    46185 non-null object
Public Assistance Program           46185 non-null object
Hazard Mitigation Program           46185 non-null object
dtypes: object(14)
memory usage: 4.9+ MB


In [6]:
# data of latitude/longitude and county to merge two dataframes
filename2 = 'zip_codes_states.csv'
us_join = pd.read_csv(filename2)
us_join.head()

Unnamed: 0,zip_code,latitude,longitude,city,state,county
0,501,40.922326,-72.637078,Holtsville,NY,Suffolk
1,544,40.922326,-72.637078,Holtsville,NY,Suffolk
2,601,18.165273,-66.722583,Adjuntas,PR,Adjuntas
3,602,18.393103,-67.180953,Aguada,PR,Aguada
4,603,18.455913,-67.14578,Aguadilla,PR,Aguadilla


# Cleaning DataFrames

### temp_df 

In [7]:
temp_us = temp_df[temp_df['Country'] == 'United States']
temp_us.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 687289 entries, 47555 to 8439246
Data columns (total 7 columns):
dt                               687289 non-null object
AverageTemperature               661524 non-null float64
AverageTemperatureUncertainty    661524 non-null float64
City                             687289 non-null object
Country                          687289 non-null object
Latitude                         687289 non-null object
Longitude                        687289 non-null object
dtypes: float64(2), object(5)
memory usage: 41.9+ MB


In [8]:
# temp_df: drop all countries except US and drop NaN values
temp_us = temp_df[temp_df['Country'] == 'United States'].dropna()
temp_us.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
47555,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W
47556,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W
47557,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W
47558,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W
47559,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W


In [9]:
temp_us.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 661524 entries, 47555 to 8439246
Data columns (total 7 columns):
dt                               661524 non-null object
AverageTemperature               661524 non-null float64
AverageTemperatureUncertainty    661524 non-null float64
City                             661524 non-null object
Country                          661524 non-null object
Latitude                         661524 non-null object
Longitude                        661524 non-null object
dtypes: float64(2), object(5)
memory usage: 40.4+ MB


In [10]:
# temp_df: create new lat/long columns dropping NESW direction
temp_us['lat_n'] = [float(lat[:-1]) if lat[-1]=='N' else -1*float(lat[:-1]) for lat in temp_us.loc[:,'Latitude']]
temp_us['lon_n'] = [float(lon[:-1]) if lon[-1]=='E' else -1*float(lon[:-1]) for lon in temp_us.loc[:,'Longitude']]
temp_us.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,lat_n,lon_n
47555,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W,32.95,-100.53
47556,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W,32.95,-100.53
47557,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W,32.95,-100.53
47558,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W,32.95,-100.53
47559,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W,32.95,-100.53


In [11]:
# unique coordinates in the temp_us dataframe
temp_us_coords = temp_us[['lat_n','lon_n']].drop_duplicates()
temp_us_coords.head()

Unnamed: 0,lat_n,lon_n
47555,32.95,-100.53
137066,40.99,-80.95
168075,34.56,-107.03
187528,39.38,-76.99
202251,40.99,-74.56


In [12]:
# function adds a column with pythagorean theorem 
def coord2loc(coords):
    us2 = us_join.copy()
    us2['dist'] = ((us2.latitude-coords.lat_n)**2+(us2.longitude-coords.lon_n)**2)**(1/2)
    state = us2.loc[us2.dist==min(us2.dist)]['state'].values[0]
    county = us2.loc[us2.dist==min(us2.dist)]['county'].values[0]
    return([coords.lat_n,coords.lon_n,state,county])

In [13]:
# create dataframe to join on between
join = pd.DataFrame([coord2loc(coords[1]) for coords in temp_us_coords.iterrows()])
join.columns = ['lat_n','lon_n','state','county']
join.head()

Unnamed: 0,lat_n,lon_n,state,county
0,32.95,-100.53,TX,Fisher
1,40.99,-80.95,OH,Mahoning
2,34.56,-107.03,NM,Valencia
3,39.38,-76.99,MD,Howard
4,40.99,-74.56,NJ,Morris


In [14]:
# temp_us_coords.iloc[47,]

In [15]:
# extract coordinates & understanding the function for one coordinate
# coords = temp_us_coords.iloc[47,]
# us2 = us_join.copy()
# us2['dist'] = ((us2.latitude-coords.lat_n)**2+(us2.longitude-coords.lon_n)**2)**(1/2)
# us2.sort_values('dist').head()

In [16]:
# state = us2.loc[us2.dist==min(us2.dist)]['state'].values[0]
# county = us2.loc[us2.dist==min(us2.dist)]['county'].values[0]
# us2.iloc[47,]

In [17]:
# merge temp_df and us_join
temp_county = pd.merge(temp_us, join, how='left', on = ['lat_n', 'lon_n'])
temp_county.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,lat_n,lon_n,state,county
0,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher
1,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher
2,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher
3,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher
4,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher


In [18]:
temp_county['year'] = [date[:4]
 for date in temp_county.loc[:,'dt']]
temp_county.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,lat_n,lon_n,state,county,year
0,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820
1,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820
2,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820
3,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820
4,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820


In [19]:
temp_county['countyname'] = temp_county.county +' County'

In [20]:
temp_county.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,lat_n,lon_n,state,county,year,countyname
0,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820,Fisher County
1,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820,Fisher County
2,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820,Fisher County
3,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820,Fisher County
4,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,1820,Fisher County


In [21]:
temp_county.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 661524 entries, 0 to 661523
Data columns (total 13 columns):
dt                               661524 non-null object
AverageTemperature               661524 non-null float64
AverageTemperatureUncertainty    661524 non-null float64
City                             661524 non-null object
Country                          661524 non-null object
Latitude                         661524 non-null object
Longitude                        661524 non-null object
lat_n                            661524 non-null float64
lon_n                            661524 non-null float64
state                            661524 non-null object
county                           661524 non-null object
year                             661524 non-null object
countyname                       661524 non-null object
dtypes: float64(4), object(9)
memory usage: 70.7+ MB


### disaster_df

In [22]:
disaster_df.head()

Unnamed: 0,Declaration Number,Declaration Type,Declaration Date,State,County,Disaster Type,Disaster Title,Start Date,End Date,Close Date,Individual Assistance Program,Individuals & Households Program,Public Assistance Program,Hazard Mitigation Program
0,DR-1,Disaster,05/02/1953,GA,,Tornado,Tornado,05/02/1953,05/02/1953,06/01/1954,Yes,No,Yes,Yes
1,DR-2,Disaster,05/15/1953,TX,,Tornado,Tornado and Heavy Rainfall,05/15/1953,05/15/1953,01/01/1958,Yes,No,Yes,Yes
2,DR-3,Disaster,05/29/1953,LA,,Flood,Flood,05/29/1953,05/29/1953,02/01/1960,Yes,No,Yes,Yes
3,DR-4,Disaster,06/02/1953,MI,,Tornado,Tornado,06/02/1953,06/02/1953,02/01/1956,Yes,No,Yes,Yes
4,DR-5,Disaster,06/06/1953,MT,,Flood,Floods,06/06/1953,06/06/1953,12/01/1955,Yes,No,Yes,Yes


In [23]:
#disaster_county = disaster_df[['Declaration Date','Disaster Type','State','County']].dropna()

In [24]:
disaster_county = disaster_df.copy()

In [25]:
disaster_county['Year'] = [date[-4:]
 for date in disaster_county.loc[:,'Declaration Date']]

In [26]:
#countylist = state_county['county'].tolist()

In [27]:
#statelist = state_county['state'].drop_duplicates().tolist()

In [28]:
#disaster_county[disaster_county['State'].isin(statelist)]

In [29]:
disaster_county.head()

Unnamed: 0,Declaration Number,Declaration Type,Declaration Date,State,County,Disaster Type,Disaster Title,Start Date,End Date,Close Date,Individual Assistance Program,Individuals & Households Program,Public Assistance Program,Hazard Mitigation Program,Year
0,DR-1,Disaster,05/02/1953,GA,,Tornado,Tornado,05/02/1953,05/02/1953,06/01/1954,Yes,No,Yes,Yes,1953
1,DR-2,Disaster,05/15/1953,TX,,Tornado,Tornado and Heavy Rainfall,05/15/1953,05/15/1953,01/01/1958,Yes,No,Yes,Yes,1953
2,DR-3,Disaster,05/29/1953,LA,,Flood,Flood,05/29/1953,05/29/1953,02/01/1960,Yes,No,Yes,Yes,1953
3,DR-4,Disaster,06/02/1953,MI,,Tornado,Tornado,06/02/1953,06/02/1953,02/01/1956,Yes,No,Yes,Yes,1953
4,DR-5,Disaster,06/06/1953,MT,,Flood,Floods,06/06/1953,06/06/1953,12/01/1955,Yes,No,Yes,Yes,1953


In [30]:
disaster_county.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46185 entries, 0 to 46184
Data columns (total 15 columns):
Declaration Number                  46185 non-null object
Declaration Type                    46185 non-null object
Declaration Date                    46185 non-null object
State                               46185 non-null object
County                              45988 non-null object
Disaster Type                       46185 non-null object
Disaster Title                      46185 non-null object
Start Date                          46185 non-null object
End Date                            45843 non-null object
Close Date                          35210 non-null object
Individual Assistance Program       46185 non-null object
Individuals & Households Program    46185 non-null object
Public Assistance Program           46185 non-null object
Hazard Mitigation Program           46185 non-null object
Year                                46185 non-null object
dtypes: object(15)
me

In [31]:
avg_year_temp = temp_county.groupby(['countyname','year']).mean().reset_index()
avg_year_temp.head()

Unnamed: 0,countyname,year,AverageTemperature,AverageTemperatureUncertainty,lat_n,lon_n
0,Allen County,1743,2.973,2.15,40.99,-85.21
1,Allen County,1744,12.04525,2.06675,40.99,-85.21
2,Allen County,1745,0.74525,2.32125,40.99,-85.21
3,Allen County,1750,10.306,2.092455,40.99,-85.21
4,Allen County,1751,11.03625,2.016,40.99,-85.21


In [32]:
join_avg_year_temp = avg_year_temp[avg_year_temp['year'] >= '1953']

In [33]:
join_avg_year_temp.head()

Unnamed: 0,countyname,year,AverageTemperature,AverageTemperatureUncertainty,lat_n,lon_n
206,Allen County,1953,11.195833,0.194083,40.99,-85.21
207,Allen County,1954,10.760333,0.256,40.99,-85.21
208,Allen County,1955,10.619917,0.227333,40.99,-85.21
209,Allen County,1956,10.356667,0.27975,40.99,-85.21
210,Allen County,1957,10.082083,0.3555,40.99,-85.21


In [34]:
join_avg_year_temp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5917 entries, 206 to 22256
Data columns (total 6 columns):
countyname                       5917 non-null object
year                             5917 non-null object
AverageTemperature               5917 non-null float64
AverageTemperatureUncertainty    5917 non-null float64
lat_n                            5917 non-null float64
lon_n                            5917 non-null float64
dtypes: float64(4), object(2)
memory usage: 323.6+ KB


In [35]:
join = pd.merge(disaster_county, join_avg_year_temp, how='left', left_on=['County','Year'], right_on=['countyname','year'])
join

Unnamed: 0,Declaration Number,Declaration Type,Declaration Date,State,County,Disaster Type,Disaster Title,Start Date,End Date,Close Date,...,Individuals & Households Program,Public Assistance Program,Hazard Mitigation Program,Year,countyname,year,AverageTemperature,AverageTemperatureUncertainty,lat_n,lon_n
0,DR-1,Disaster,05/02/1953,GA,,Tornado,Tornado,05/02/1953,05/02/1953,06/01/1954,...,No,Yes,Yes,1953,,,,,,
1,DR-2,Disaster,05/15/1953,TX,,Tornado,Tornado and Heavy Rainfall,05/15/1953,05/15/1953,01/01/1958,...,No,Yes,Yes,1953,,,,,,
2,DR-3,Disaster,05/29/1953,LA,,Flood,Flood,05/29/1953,05/29/1953,02/01/1960,...,No,Yes,Yes,1953,,,,,,
3,DR-4,Disaster,06/02/1953,MI,,Tornado,Tornado,06/02/1953,06/02/1953,02/01/1956,...,No,Yes,Yes,1953,,,,,,
4,DR-5,Disaster,06/06/1953,MT,,Flood,Floods,06/06/1953,06/06/1953,12/01/1955,...,No,Yes,Yes,1953,,,,,,
5,DR-6,Disaster,06/09/1953,MI,,Tornado,Tornado,06/09/1953,06/09/1953,03/30/1956,...,No,Yes,Yes,1953,,,,,,
6,DR-7,Disaster,06/11/1953,MA,,Tornado,Tornado,06/11/1953,06/11/1953,06/01/1956,...,No,Yes,Yes,1953,,,,,,
7,DR-8,Disaster,06/11/1953,IA,,Flood,Flood,06/11/1953,06/11/1953,11/01/1955,...,No,Yes,Yes,1953,,,,,,
8,DR-9,Disaster,06/19/1953,TX,,Flood,Flood,06/19/1953,06/19/1953,01/01/1958,...,No,Yes,Yes,1953,,,,,,
9,DR-11,Disaster,07/02/1953,NH,,Fire,Forest Fire,07/02/1953,07/02/1953,02/01/1956,...,No,Yes,Yes,1953,,,,,,


In [36]:
join.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46185 entries, 0 to 46184
Data columns (total 21 columns):
Declaration Number                  46185 non-null object
Declaration Type                    46185 non-null object
Declaration Date                    46185 non-null object
State                               46185 non-null object
County                              45988 non-null object
Disaster Type                       46185 non-null object
Disaster Title                      46185 non-null object
Start Date                          46185 non-null object
End Date                            45843 non-null object
Close Date                          35210 non-null object
Individual Assistance Program       46185 non-null object
Individuals & Households Program    46185 non-null object
Public Assistance Program           46185 non-null object
Hazard Mitigation Program           46185 non-null object
Year                                46185 non-null object
countyname           

In [37]:
avg_year_temp.shape

(22257, 6)

In [38]:
disaster_county.shape

(46185, 15)

In [39]:
# sample_county = ['Allen','Bibb','Briscoe','Buchanan','Carroll']
# sample_df = temp_county[(temp_county['county'].isin(sample_county)) & (temp_county['year'].str.startswith('191'))]
# sample_df.head()

In [40]:
countylist = avg_year_temp['countyname'].drop_duplicates().tolist()

In [41]:
modified_df = disaster_county[disaster_county['County'].isin(countylist)]
modified_df.sort_values('Year')

Unnamed: 0,Declaration Number,Declaration Type,Declaration Date,State,County,Disaster Type,Disaster Title,Start Date,End Date,Close Date,Individual Assistance Program,Individuals & Households Program,Public Assistance Program,Hazard Mitigation Program,Year
195,DR-183,Disaster,12/24/1964,CA,Nevada County,Flood,Heavy Rains and Flooding,12/24/1964,12/24/1964,04/05/1976,Yes,No,Yes,Yes,1964
288,DR-186,Disaster,12/31/1964,ID,Lincoln County,Flood,Heavy Rains and Flooding,12/31/1964,12/31/1964,02/14/1968,Yes,No,Yes,Yes,1964
264,DR-185,Disaster,12/29/1964,WA,Spokane County,Flood,Heavy Rains and Flooding,12/29/1964,12/29/1964,02/02/1968,Yes,No,Yes,Yes,1964
255,DR-185,Disaster,12/29/1964,WA,King County,Flood,Heavy Rains and Flooding,12/29/1964,12/29/1964,02/02/1968,Yes,No,Yes,Yes,1964
250,DR-185,Disaster,12/29/1964,WA,Clark County,Flood,Heavy Rains and Flooding,12/29/1964,12/29/1964,02/02/1968,Yes,No,Yes,Yes,1964
245,DR-184,Disaster,12/24/1964,OR,Washington County,Flood,Heavy Rains and Flooding,12/24/1964,12/24/1964,08/12/1968,Yes,No,Yes,Yes,1964
295,DR-186,Disaster,12/31/1964,ID,Washington County,Flood,Heavy Rains and Flooding,12/31/1964,12/31/1964,02/14/1968,Yes,No,Yes,Yes,1964
232,DR-184,Disaster,12/24/1964,OR,Lincoln County,Flood,Heavy Rains and Flooding,12/24/1964,12/24/1964,08/12/1968,Yes,No,Yes,Yes,1964
231,DR-184,Disaster,12/24/1964,OR,Lane County,Flood,Heavy Rains and Flooding,12/24/1964,12/24/1964,08/12/1968,Yes,No,Yes,Yes,1964
227,DR-184,Disaster,12/24/1964,OR,Jefferson County,Flood,Heavy Rains and Flooding,12/24/1964,12/24/1964,08/12/1968,Yes,No,Yes,Yes,1964


In [42]:
modified_join = pd.merge(modified_df, avg_year_temp, left_on=['County','Year'], right_on=['countyname','year'], how='left')
modified_join.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5430 entries, 0 to 5429
Data columns (total 21 columns):
Declaration Number                  5430 non-null object
Declaration Type                    5430 non-null object
Declaration Date                    5430 non-null object
State                               5430 non-null object
County                              5430 non-null object
Disaster Type                       5430 non-null object
Disaster Title                      5430 non-null object
Start Date                          5430 non-null object
End Date                            5381 non-null object
Close Date                          4103 non-null object
Individual Assistance Program       5430 non-null object
Individuals & Households Program    5430 non-null object
Public Assistance Program           5430 non-null object
Hazard Mitigation Program           5430 non-null object
Year                                5430 non-null object
countyname                          50

### test code with Allen County

In [43]:
# allen = temp_county_year[temp_county_year['countyname'] == 'Allen County']
# allen

In [44]:
# allen.shape

In [45]:
# join = pd.merge(allen, disaster_county, how='left', left_on=['countyname','year'], right_on=['County','year'])
# join

In [46]:
# join.shape


## analyze county names to merge on

In [47]:
# disaster_df.info()

In [48]:
# disaster_df2 = disaster_df.copy()
# disaster_df2.head()

In [49]:
# disaster_df2['Year'] = [date[-4:]
#  for date in disaster_df2.loc[:,'Declaration Date']]

In [50]:
# disaster_df2.info()

In [51]:
# avg_year_temp.head()

In [52]:
# join = pd.merge(disaster_df2, avg_year_temp, how='left', left_on=['County','Year'], right_on=['countyname','year'])
# join

In [53]:
# join.info()

In [54]:
# disaster_df2 = disaster_df.copy()

In [55]:
#disaster_df2['Year'] = [date[-4:]
# for date in disaster_df2.loc[:,'Declaration Date']]

In [56]:
#disaster_df2 = disaster_df2.dropna(subset=['County'])

In [57]:
#disaster_df2.info()

In [58]:
#disaster_df2.head()

In [59]:
#dis_county = disaster_df2[['County','Year']]
#dis_county.head()

In [60]:
#temp_county2 = temp_county[['countyname','year']]
#temp_county2.head()

In [61]:
#countyjoin = pd.merge(dis_county, temp_county2, how='left', left_on=['County','Year'], right_on=['countyname','year'])
#countyjoin

In [62]:
#countyjoin.info()

In [63]:
#countyjoin.sort_values(['County','countyname'])

## misc

In [64]:
# plot of the 97 counties average temperature over every year. 
# fig, ax = plt.subplots()
# temp_county.groupby(['year','county']).mean()['AverageTemperature'].unstack().plot(ax=ax, legend=False)

In [65]:
# test code: 5 counties over 10 years 
# sample_county = ['Allen','Bibb','Briscoe','Buchanan','Carroll']
# sample_df = temp_county[(temp_county['county'].isin(sample_county)) & (temp_county['year'].str.startswith('191'))]
# sample_df.head()

In [66]:
# fig, ax = plt.subplots()
# sample_df.groupby(['year','county']).mean()['AverageTemperature'].unstack().plot(ax=ax)

In [67]:
# temp_county.groupby(['state','county']).count().head()
# temp_county[['state','county']].nunique()

In [68]:
# state and county values we need to merge on
# pd.set_option("display.max_rows", None)
# state_county = temp_county[['state','county']].drop_duplicates().sort_values('state')
# state_county['countyname'] = state_county.county +' County'
# state_county