# Import necessary packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import datetime as dt
%matplotlib inline

from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from scipy import stats
from statsmodels.formula.api import ols

# Raw DataFrames

In [2]:
# global land temperature data by city, with latitude/longitude values
filename = 'Global-Land-Temperatures-By-City.csv'
temp_df = pd.read_csv(filename)

In [3]:
# US federal emergency data, join on county
filename1 = 'federal_emergencies.csv'
disaster_df = pd.read_csv(filename1)

In [4]:
# data of latitude/longitude and county to merge two dataframes
filename2 = 'zip_codes_states.csv'
us_join = pd.read_csv(filename2)

# Cleaning steps

In [5]:
# temp_df: drop all countries except US and drop NaN values
temp_us = temp_df[temp_df['Country'] == 'United States'].dropna()

In [6]:
# temp_df: create new lat/long columns dropping NESW direction
temp_us['lat_n'] = [float(lat[:-1]) if lat[-1]=='N' else -1*float(lat[:-1]) for lat in temp_us.loc[:,'Latitude']]
temp_us['lon_n'] = [float(lon[:-1]) if lon[-1]=='E' else -1*float(lon[:-1]) for lon in temp_us.loc[:,'Longitude']]

In [7]:
# unique coordinates in the temp_us dataframe
temp_us_coords = temp_us[['lat_n','lon_n']].drop_duplicates()

In [8]:
# function adds a column with pythagorean theorem 
def coord2loc(coords):
    us2 = us_join.copy()
    us2['dist'] = ((us2.latitude-coords.lat_n)**2+(us2.longitude-coords.lon_n)**2)**(1/2)
    state = us2.loc[us2.dist==min(us2.dist)]['state'].values[0]
    county = us2.loc[us2.dist==min(us2.dist)]['county'].values[0]
    return([coords.lat_n,coords.lon_n,state,county])

In [9]:
# create dataframe to join on between
join = pd.DataFrame([coord2loc(coords[1]) for coords in temp_us_coords.iterrows()])
join.columns = ['lat_n','lon_n','state','county']

In [10]:
# merge temp_df and us_join
temp_county = pd.merge(temp_us, join, how='left', on = ['lat_n', 'lon_n'])

In [11]:
# add "County" to the end of the county names to join on the disaster dataframe
temp_county['countyname'] = temp_county.county +' County'

In [12]:
temp_county['dt'] = pd.to_datetime(temp_county['dt'], format='%Y/%m/%d')
temp_county['year'] = temp_county['dt'].dt.year

In [13]:
temp_county.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,lat_n,lon_n,state,county,countyname,year
0,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820
1,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820
2,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820
3,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820
4,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820


In [14]:
# Extract month number for each row
temp_county_seasons = temp_county.copy()
temp_county_seasons['month'] = temp_county_seasons['dt'].dt.month

In [15]:
# Assign seasons to each date
temp_county_seasons['month'] = temp_county_seasons['month'].astype(str).astype(int)
season_map = {12: 'Winter', 1: 'Winter', 2: 'Winter', 
              3: 'Spring', 4: 'Spring', 5: 'Spring', 
              6: 'Summer', 7: 'Summer', 8: 'Summer', 
              9: 'Fall', 10: 'Fall', 11: 'Fall'}
def mapper(month):
    return season_map[month]
temp_county_seasons['season'] = temp_county_seasons['month'].apply(mapper)

In [16]:
# Assign United States common regions to the states
## Midwest Region
east_north_central_midwest_region = ['IL','IN','MI','OH','WI']
d1 = dict.fromkeys(east_north_central_midwest_region, 'east north central midwest region')

west_north_central_midwest_region = ['IA','KS','MO','MN','ND','SD','NE']
d2 = dict.fromkeys(west_north_central_midwest_region, 'west north central midwest region')

## Northeast Region
new_england_northeast_region = ['CT','ME','MA','NH','RI','VT']
d3 = dict.fromkeys(new_england_northeast_region, 'new england northeast region')

midatlantic_northeast_region = ['NY','PA','NJ']
d4 = dict.fromkeys(midatlantic_northeast_region, 'midatlantic northeast region')

## West Region
pacific_west_region = ['AK','OR','WA','CA','HI']
d5 = dict.fromkeys(pacific_west_region, 'pacific west region')

mountain_west_region = ['AZ','CO','NM','UT','NV','WY','ID','MT']
d6 = dict.fromkeys(mountain_west_region, 'mountain west region')

## South Region
west_south_central_south_region = ['AR','LA','OK','TX']
d7 = dict.fromkeys(west_south_central_south_region, 'west south central south region')

east_south_central_south_region = ['AL','MS','TN','KY']
d8 = dict.fromkeys(east_south_central_south_region, 'east south central south region')

south_atlantic_south_region = ['WV','MD','DC','DE','VA','NC','SC','GA','FL']
d9 = dict.fromkeys(south_atlantic_south_region, 'south atlantic south region')


In [17]:
temp_county_region = temp_county_seasons.copy()
d = {**d1, **d2, **d3, **d4, **d5, **d6, **d7, **d8, **d9}
temp_county_region['region'] = temp_county_region['state'].map(d)

In [18]:
temp_county_region['date_delta'] = (temp_county_region['dt'] - temp_county_region['dt'].min())  / np.timedelta64(1,'D')


#### Join for RF Model

In [19]:
temp_county_region.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,lat_n,lon_n,state,county,countyname,year,month,season,region,date_delta
0,1820-01-01,2.101,3.217,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820,1,Winter,west south central south region,27819.0
1,1820-02-01,6.926,2.853,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820,2,Winter,west south central south region,27850.0
2,1820-03-01,10.767,2.395,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820,3,Spring,west south central south region,27879.0
3,1820-04-01,17.989,2.202,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820,4,Spring,west south central south region,27910.0
4,1820-05-01,21.809,2.036,Abilene,United States,32.95N,100.53W,32.95,-100.53,TX,Fisher,Fisher County,1820,5,Spring,west south central south region,27940.0


In [20]:
# disaster data - create year column
disasterdf = disaster_df.copy()
disasterdf['Declaration Date'] = pd.to_datetime(disasterdf['Declaration Date'], format='%m/%d/%Y')
disasterdf['Year'] = disasterdf['Declaration Date'].dt.year

# create region column
disasterdf['Region'] = disasterdf['State'].map(d)

# select columns for merge 
disasterdf = disasterdf[['Declaration Type','Declaration Date','State','County','Disaster Type','Year','Region']]


In [21]:
# disaster data - delete duplicate listing of disasters on the same day (ie. unique disaster per day per state)
disasterdf = disasterdf.drop_duplicates(subset=['Declaration Date','Disaster Type','State','Year']).sort_values('Declaration Date')

# select top 5 disasters
disasterdf = disasterdf[disasterdf['Disaster Type'].isin(['Tornado','Flood','Fire','Hurricane','Storm'])]

# create new disaster count column 
disasterdf['Disaster Count'] = disasterdf['Disaster Type']


In [22]:
# disaster data groupby, get disaster count by region, disaster type, and year
disasterdf = disasterdf.groupby(['Region','Disaster Type','Year'])[['Disaster Count']].count().unstack(fill_value=0).stack().reset_index()
disasterdf

Unnamed: 0,Region,Disaster Type,Year,Disaster Count
0,east north central midwest region,Fire,1953,0
1,east north central midwest region,Fire,1954,0
2,east north central midwest region,Fire,1955,0
3,east north central midwest region,Fire,1956,0
4,east north central midwest region,Fire,1957,0
5,east north central midwest region,Fire,1958,0
6,east north central midwest region,Fire,1959,0
7,east north central midwest region,Fire,1960,0
8,east north central midwest region,Fire,1961,0
9,east north central midwest region,Fire,1962,0


In [23]:
# copy of temperature data to organize for the join
tempdf = temp_county_region.copy()
tempdf = tempdf.sort_values('dt')

In [24]:
# select columns for join
tempdf = tempdf[['year','AverageTemperature','season','region']]

In [25]:
# temperature data - feature engineer temperature by region and year
tempdf = tempdf.groupby(['region','year']).agg({'AverageTemperature': ['mean','min','max','std']}).unstack(fill_value=0).stack().reset_index()
tempdf.columns = ["_".join(x) for x in tempdf.columns.ravel()]

In [26]:
# rename columns in temperature data frame
tempdf = tempdf.rename(columns={"region_": "region", "year_": "year"});

In [27]:
tempdf.head()

Unnamed: 0,region,year,AverageTemperature_mean,AverageTemperature_min,AverageTemperature_max,AverageTemperature_std
0,east north central midwest region,1743,3.101138,0.992,5.436,1.215334
1,east north central midwest region,1744,11.849349,-5.782,24.699,8.169105
2,east north central midwest region,1745,0.662147,-7.862,11.562,5.186148
3,east north central midwest region,1750,10.150147,-7.664,26.739,10.191365
4,east north central midwest region,1751,10.872681,-7.814,25.557,10.759784


In [28]:
# create dataframe for seasonal temperature by year and region
season = temp_county_region.pivot_table('AverageTemperature', index=['year','region'], columns='season', fill_value='NaN').reset_index()
season.head()

season,year,region,Fall,Spring,Summer,Winter
0,1743,east north central midwest region,3.10114,,,
1,1743,east south central south region,8.143,,,
2,1743,midatlantic northeast region,3.77578,,,
3,1743,new england northeast region,1.188,,,
4,1743,south atlantic south region,10.9985,,,


In [29]:
# join the temperature dataframe with the seasonal temperatures to obtain more features
join_dataframe = pd.merge(tempdf, season, left_on=['region','year'], right_on=['region','year'], how='left')
join_dataframe

Unnamed: 0,region,year,AverageTemperature_mean,AverageTemperature_min,AverageTemperature_max,AverageTemperature_std,Fall,Spring,Summer,Winter
0,east north central midwest region,1743,3.101138,0.992,5.436,1.215334,3.10114,,,
1,east north central midwest region,1744,11.849349,-5.782,24.699,8.169105,9.5243,12.7892,21.6998,-2.75593
2,east north central midwest region,1745,0.662147,-7.862,11.562,5.186148,,4.76259,,-3.43829
3,east north central midwest region,1750,10.150147,-7.664,26.739,10.191365,13.1871,9.14032,22.2995,-3.01402
4,east north central midwest region,1751,10.872681,-7.814,25.557,10.759784,16.3001,5.86155,22.1556,-3.75436
5,east north central midwest region,1752,2.954793,-10.788,11.689,6.044789,7.57381,5.73334,,-4.44278
6,east north central midwest region,1753,9.546578,-8.022,25.487,10.000526,10.2772,9.43994,22.1723,-3.70309
7,east north central midwest region,1754,9.693674,-7.825,25.307,10.953768,14.2623,2.34821,21.9211,-3.13099
8,east north central midwest region,1755,6.754024,-8.565,24.163,9.001517,10.1147,8.98146,21.3252,-3.69114
9,east north central midwest region,1756,9.992031,-6.984,25.053,10.556222,10.0938,,21.8625,-1.98023


In [30]:
disasterdf.head()

Unnamed: 0,Region,Disaster Type,Year,Disaster Count
0,east north central midwest region,Fire,1953,0
1,east north central midwest region,Fire,1954,0
2,east north central midwest region,Fire,1955,0
3,east north central midwest region,Fire,1956,0
4,east north central midwest region,Fire,1957,0


In [31]:
# join temperature and disaster dataframe 
joindf = pd.merge(disasterdf, join_dataframe, left_on=['Region','Year'], right_on=['region','year'], how='left')

In [32]:
# drop duplicate columns
joindf = joindf.drop(columns = ['region','year'])

In [33]:
joindf.head()

Unnamed: 0,Region,Disaster Type,Year,Disaster Count,AverageTemperature_mean,AverageTemperature_min,AverageTemperature_max,AverageTemperature_std,Fall,Spring,Summer,Winter
0,east north central midwest region,Fire,1953,0,11.046523,-4.808,25.87,9.009154,12.6579,9.05599,22.5851,-0.112862
1,east north central midwest region,Fire,1954,0,10.610394,-6.416,27.563,9.187059,12.1459,8.61549,22.1624,-0.48223
2,east north central midwest region,Fire,1955,0,10.50494,-6.884,27.186,10.275308,11.2949,10.6548,22.8636,-2.79356
3,east north central midwest region,Fire,1956,0,10.129736,-5.804,24.367,9.154006,11.8911,8.14903,21.6039,-1.12514
4,east north central midwest region,Fire,1957,0,10.011848,-10.172,25.552,9.295035,10.6796,9.18855,21.7707,-1.59148


In [34]:
joindf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2860 entries, 0 to 2859
Data columns (total 12 columns):
Region                     2860 non-null object
Disaster Type              2860 non-null object
Year                       2860 non-null int64
Disaster Count             2860 non-null int64
AverageTemperature_mean    2684 non-null float64
AverageTemperature_min     2684 non-null float64
AverageTemperature_max     2684 non-null float64
AverageTemperature_std     2684 non-null float64
Fall                       2684 non-null object
Spring                     2684 non-null object
Summer                     2684 non-null object
Winter                     2684 non-null object
dtypes: float64(4), int64(2), object(6)
memory usage: 290.5+ KB


# Machine Learning

### Random Forest for disaster/region pairs

In [35]:
disasters = joindf['Disaster Type'].unique()
regions = joindf['Region'].unique()

combined = [(s, f) for s in regions for f in disasters]

In [36]:
joindf = joindf.set_index(['Region','Disaster Type']).dropna()

### Random Forest Model

In [37]:
feature_headers = joindf.drop(['Disaster Count'], axis=1).columns
target_header = 'Disaster Count'

In [38]:
def rfmodel(dataframe):
    
    # random forest regression
    X = dataframe.drop(['Disaster Count'], axis=1).values
    y = dataframe['Disaster Count'].values
    names = join_dataframe[['AverageTemperature_mean','AverageTemperature_min','AverageTemperature_max','AverageTemperature_std','Fall','Spring','Summer','Winter']]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

    # Create the regressor: 
    rf = RandomForestRegressor(n_estimators = 100, random_state = 42)

    # Fit the regressor to the training data
    rf.fit(X_train, y_train)

    # Predict on the test data: y_pred
    y_pred = rf.predict(X_test)
    
    errors = abs(y_pred - y_test)

    print('Mean Absolute Error:', round(np.mean(errors), 2))
    print('R^2 or Score:', rf.score(X_test, y_test))
    print ("Features sorted by their importance:", 
       sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names), reverse=True))


### Mountain West Fires

In [39]:
# mountain west region / fire dataframe
mtw_fire_df = joindf.loc[('mountain west region','Fire')]

In [40]:
rfmodel(mtw_fire_df)

Mean Absolute Error: 2.29
R^2 or Score: 0.6563602152742183
Features sorted by their importance: [(0.7257, 'AverageTemperature_mean'), (0.0804, 'Winter'), (0.0631, 'AverageTemperature_std'), (0.034, 'Spring'), (0.0257, 'Summer'), (0.0243, 'AverageTemperature_min'), (0.0162, 'AverageTemperature_max'), (0.0149, 'Fall')]


### New England Northeast Storms

In [41]:
# new england northeast region / storm dataframe
nene_storm_df = joindf.loc[('new england northeast region','Storm')]

In [42]:
rfmodel(nene_storm_df)

Mean Absolute Error: 1.2
R^2 or Score: 0.35965480769230784
Features sorted by their importance: [(0.588, 'AverageTemperature_mean'), (0.1611, 'AverageTemperature_min'), (0.1265, 'Spring'), (0.0222, 'Summer'), (0.0217, 'AverageTemperature_std'), (0.0214, 'AverageTemperature_max'), (0.0169, 'Fall'), (0.0159, 'Winter')]


### Midatlantic Northeast Hurricanes

In [43]:
# midatlantic northeast region / hurricane dataframe
midne_hurricane_df = joindf.loc[('midatlantic northeast region','Hurricane')] 

In [44]:
rfmodel(midne_hurricane_df)

Mean Absolute Error: 0.65
R^2 or Score: 0.3776851764705882
Features sorted by their importance: [(0.2375, 'Winter'), (0.2084, 'Spring'), (0.185, 'AverageTemperature_std'), (0.0977, 'AverageTemperature_mean'), (0.0933, 'Summer'), (0.074, 'Fall'), (0.0457, 'AverageTemperature_min'), (0.0248, 'AverageTemperature_max')]


# Selected region/disaster pairs to examine

I will use those 3 region/disaster pairings to focus on for predicting average temperature and disaster counts.

In [45]:
columns = joindf.columns.drop('Disaster Count')

# linear regression to obtain independent variable predictions to use for random forest
def linearreg(key, column, year):
    X = joindf.loc[key][['Year']].values
    y = joindf.loc[key][column].values

    X = X.reshape(-1,1)
    y = y.reshape(-1,1)

    model = linear_model.LinearRegression()
    model.fit(X, y)
    
    return model.predict(year)
    #plt.scatter(X, y,color='r')

    #plt.plot(X, model.predict(X),color='k')
    #plt.show()

In [46]:
def rfmodel(x_var, key):
    
    # random forest regression
    X = joindf.loc[key].drop(['Disaster Count'], axis=1).values
    y = joindf.loc[key]['Disaster Count'].values
    names = join_dataframe[['AverageTemperature_mean','AverageTemperature_min','AverageTemperature_max','AverageTemperature_std','Fall','Spring','Summer','Winter']]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

    # Create the regressor: 
    rf = RandomForestRegressor(n_estimators = 100, random_state = 42)

    # Fit the regressor to the training data
    rf.fit(X_train, y_train)

    # Predict on the test data: y_pred
    y_var = rf.predict(X_test)
    
    return rf

### Mountain West Fires

#### predict 2070

In [47]:
# 2070 independent variables
x_2070 = [[linearreg(('mountain west region','Fire'), col, 2070).item() for col in columns]]

In [48]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2070, ('mountain west region','Fire')).predict(x_2070)

array([13.98])

#### predict 2120

In [49]:
# 2120 independent variables
x_2120 = [[linearreg(('mountain west region','Fire'), col, 2120).item() for col in columns]]

In [50]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2120, ('mountain west region','Fire')).predict(x_2120)

array([13.72])

#### predict 2220

In [51]:
# 2220 independent variables
x_2220 = [[linearreg(('mountain west region','Fire'), col, 2220).item() for col in columns]]

In [52]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2220, ('mountain west region','Fire')).predict(x_2220)

array([12.54])

### New England Northeast Storms

In [53]:
# 2070 independent variables
x_2070 = [[linearreg(('new england northeast region','Storm'), col, 2070).item() for col in columns]]

In [54]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2070, ('new england northeast region','Storm')).predict(x_2070)

array([7.74])

#### predict 2120

In [55]:
# 2120 independent variables
x_2120 = [[linearreg(('new england northeast region','Storm'), col, 2120).item() for col in columns]]

In [56]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2120, ('new england northeast region','Storm')).predict(x_2120)

array([7.41])

#### predict 2220

In [57]:
# 2220 independent variables
x_2220 = [[linearreg(('new england northeast region','Storm'), col, 2220).item() for col in columns]]

In [58]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2220, ('new england northeast region','Storm')).predict(x_2220)

array([7.39])

###  Midatlantic Northeast Hurricanes

In [59]:
# 2070 independent variables
x_2070 = [[linearreg(('midatlantic northeast region','Hurricane'), col, 2070).item() for col in columns]]

In [60]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2070, ('midatlantic northeast region','Hurricane')).predict(x_2070)

array([3.9])

#### predict 2120

In [61]:
# 2120 independent variables
x_2120 = [[linearreg(('midatlantic northeast region','Hurricane'), col, 2120).item() for col in columns]]

In [62]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2120, ('midatlantic northeast region','Hurricane')).predict(x_2120)

array([4.02])

#### predict 2220

In [63]:
# 2220 independent variables
x_2220 = [[linearreg(('midatlantic northeast region','Hurricane'), col, 2220).item() for col in columns]]

In [64]:
# random forest using predicted independent variables to predict disaster counts
rfmodel(x_2220, ('midatlantic northeast region','Hurricane')).predict(x_2220)

array([4.04])

### look at mulitple linear regressions for certain disaster / region pair

In [65]:
# train and test data for modeling
def lrvalues(key):
    X = joindf.loc[key].drop(['Disaster Count'], axis=1).values
    y = joindf.loc[key]['Disaster Count'].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


    lm = linear_model.LinearRegression()
    lm.fit(X_train, y_train)
    
    y_pred = lm.predict(X_test)
    
    errors = abs(y_pred - y_test)
    
    return ("R^2 (Score): {}".format(lm.score(X_test, y_test))), "Mean Absolute Error: {}".format(round(np.mean(errors), 2))


In [66]:
# train and test data for modeling
def lrmodel(key):
    X = joindf.loc[key].drop(['Disaster Count'], axis=1).values
    y = joindf.loc[key]['Disaster Count'].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


    lm = linear_model.LinearRegression()
    lm.fit(X_train, y_train)
    
    y_pred = lm.predict(X_test)
    
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    return lm



### Mountain West Fires

In [67]:
lrvalues(('mountain west region', 'Fire'))

('R^2 (Score): 0.4422854289567397', 'Mean Absolute Error: 4.53')

#### predict 2070

In [68]:
# 2070 independent variables
x_2070 = [[linearreg(('mountain west region', 'Fire'), col, 2070).item() for col in columns]]

In [69]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('mountain west region', 'Fire')).predict(x_2070)

array([22.54352105])

#### predict 2120

In [70]:
# 2120 independent variables
x_2120 = [[linearreg(('mountain west region', 'Fire'), col, 2120).item() for col in columns]]

In [71]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('mountain west region', 'Fire')).predict(x_2120)

array([33.71324113])

#### predict 2220

In [72]:
# 2220 independent variables
x_2220 = [[linearreg(('mountain west region', 'Fire'), col, 2220).item() for col in columns]]

In [73]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('mountain west region', 'Fire')).predict(x_2220)

array([56.05268131])

### New England Northeast Storms

In [74]:
lrvalues(('new england northeast region','Storm'))

('R^2 (Score): 0.016837948297870886', 'Mean Absolute Error: 1.84')

#### predict 2070

In [75]:
# 2070 independent variables
x_2070 = [[linearreg(('new england northeast region','Storm'), col, 2070).item() for col in columns]]

In [76]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('new england northeast region','Storm')).predict(x_2070)

array([10.55488549])

#### predict 2120

In [77]:
# 2120 independent variables
x_2120 = [[linearreg(('new england northeast region','Storm'), col, 2120).item() for col in columns]]

In [78]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('new england northeast region','Storm')).predict(x_2120)

array([15.76451569])

#### predict 2220

In [79]:
# 2220 independent variables
x_2220 = [[linearreg(('new england northeast region','Storm'), col, 2220).item() for col in columns]]

In [80]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('new england northeast region','Storm')).predict(x_2220)

array([26.18377609])

###  Midatlantic Northeast Hurricanes

In [81]:
lrvalues(('midatlantic northeast region','Hurricane'))

('R^2 (Score): 0.25293892134807916', 'Mean Absolute Error: 0.91')

#### predict 2070

In [82]:
# 2070 independent variables
x_2070 = [[linearreg(('midatlantic northeast region','Hurricane'), col, 2070).item() for col in columns]]

In [83]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('midatlantic northeast region','Hurricane')).predict(x_2070)

array([1.82226231])

#### predict 2120

In [84]:
# 2120 independent variables
x_2120 = [[linearreg(('midatlantic northeast region','Hurricane'), col, 2120).item() for col in columns]]

In [85]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('midatlantic northeast region','Hurricane')).predict(x_2120)

array([2.58658222])

#### predict 2220

In [86]:
# 2220 independent variables
x_2220 = [[linearreg(('midatlantic northeast region','Hurricane'), col, 2220).item() for col in columns]]

In [87]:
# random forest using predicted independent variables to predict disaster counts
lrmodel(('midatlantic northeast region','Hurricane')).predict(x_2220)

array([4.11522203])

### look at single linear regressions for certain disaster / region pair

In [88]:
# train and test data for modeling
def singlelrvalues(key):
    X = joindf.loc[key]['AverageTemperature_mean'].values.reshape(-1,1)
    y = joindf.loc[key]['Disaster Count'].values.reshape(-1,1)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


    lm = linear_model.LinearRegression()
    lm.fit(X_train, y_train)
    
    y_pred = lm.predict(X_test)
    
    errors = abs(y_pred - y_test)
    
    return ("R^2 (Score): {}".format(lm.score(X_test, y_test))), "Mean Absolute Error: {}".format(round(np.mean(errors), 2))


In [89]:
# train and test data for modeling
def singlelrmodel(key):
    X = joindf.loc[key]['AverageTemperature_mean'].values.reshape(-1,1)
    y = joindf.loc[key]['Disaster Count'].values.reshape(-1,1)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


    lm = linear_model.LinearRegression()
    lm.fit(X_train, y_train)
    
    y_pred = lm.predict(X_test)
    
    return lm



### Mountain West Fires

In [90]:
singlelrvalues(('mountain west region', 'Fire'))

('R^2 (Score): 0.14879969432065432', 'Mean Absolute Error: 5.38')

#### predict 2070

In [91]:
# 2070 independent variables
x_2070 = linearreg(('mountain west region', 'Fire'), 'AverageTemperature_mean', 2070)

In [92]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('mountain west region', 'Fire')).predict(x_2070)

array([[9.12095369]])

#### predict 2120

In [93]:
# 2120 independent variables
x_2120 = linearreg(('mountain west region', 'Fire'), 'AverageTemperature_mean', 2120)

In [94]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('mountain west region', 'Fire')).predict(x_2120)

array([[12.57059004]])

#### predict 2220

In [95]:
# 2220 independent variables
x_2220 = linearreg(('mountain west region', 'Fire'), 'AverageTemperature_mean', 2220)

In [96]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('mountain west region', 'Fire')).predict(x_2220)

array([[19.46986275]])

### New England Northeast Storms

In [97]:
singlelrvalues(('new england northeast region', 'Storm'))

('R^2 (Score): -0.12206284165732617', 'Mean Absolute Error: 1.78')

#### predict 2070

In [98]:
# 2070 independent variables
x_2070 = linearreg(('new england northeast region', 'Storm'), 'AverageTemperature_mean', 2070)

In [99]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('new england northeast region', 'Storm')).predict(x_2070)

array([[6.1015616]])

#### predict 2120

In [100]:
# 2120 independent variables
x_2120 = linearreg(('new england northeast region', 'Storm'), 'AverageTemperature_mean', 2120)

In [101]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('new england northeast region', 'Storm')).predict(x_2120)

array([[8.66634097]])

#### predict 2220

In [102]:
# 2220 independent variables
x_2220 = linearreg(('new england northeast region', 'Storm'), 'AverageTemperature_mean', 2220)

In [103]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('new england northeast region', 'Storm')).predict(x_2220)

array([[13.79589971]])

### Midatlantic Hurricane

In [104]:
singlelrvalues(('midatlantic northeast region', 'Hurricane'))

('R^2 (Score): 0.22909470780608177', 'Mean Absolute Error: 0.82')

#### predict 2070

In [105]:
# 2070 independent variables
x_2070 = linearreg(('midatlantic northeast region', 'Hurricane'), 'Winter', 2070)

In [106]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('midatlantic northeast region', 'Hurricane')).predict(x_2070)

array([[-4.4499376]])

#### predict 2120

In [107]:
# 2120 independent variables
x_2120 = linearreg(('midatlantic northeast region', 'Hurricane'), 'Winter', 2120)

In [108]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('midatlantic northeast region', 'Hurricane')).predict(x_2120)

array([[-3.70790503]])

#### predict 2220

In [109]:
# 2220 independent variables
x_2220 = linearreg(('midatlantic northeast region', 'Hurricane'), 'Winter', 2220)

In [110]:
# random forest using predicted independent variables to predict disaster counts
singlelrmodel(('midatlantic northeast region', 'Hurricane')).predict(x_2220)

array([[-2.22383989]])