# Task 3: Juracan (Analyzing Storm Data)

### Hannah Jensen

Instructions:  
 Part 4. RISK-PROFILE ANALYSIS (Part 1)
 
 A. Research and Identify potential natural factors influencing hurricane activity. Rank factors in order of perceived severity.
 1. Sea surface temperatures
 2. El Niño/La Niña patterns
 3. Atlantic Multidecadal Oscillation
 4. Saharan dust levels
 5. Upper-level wind patterns
 
 B. Perform spatial correlation analysis between identified factors and hurricane
 activity.

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import seaborn as sb

In [43]:
city_coords_path = 'data/cities_with_coordinats.pkl'
city_coords_data = pd.read_pickle(city_coords_path)
city_coords_data.head()

Unnamed: 0,City Name,Country,Latitude,Longitude
0,New Orleans,USA,29.975998,-90.078213
1,Houston,USA,29.758938,-95.367697
2,Tampa,USA,27.94776,-82.458444
3,Miami,USA,25.774173,-80.19362
4,Corpus Christi,USA,27.76353,-97.403319


In [44]:
d_storm_w_cities_path = 'data/detailed_storm_data_with_cities.pkl'
d_storm_w_cities_data = pd.read_pickle(d_storm_w_cities_path)
d_storm_w_cities_data.head()

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_id,storm_name,Storm ID,Datetime,City,Distance (km)
0,1851-06-25 00:00:00,0,,HU,28.0,-94.8,80.0,,north_atlantic,AL011851,UNNAMED,,NaT,,
1,1851-06-25 06:00:00,0,,HU,28.0,-95.4,80.0,,north_atlantic,AL011851,UNNAMED,,NaT,,
2,1851-06-25 12:00:00,0,,HU,28.0,-96.0,80.0,,north_atlantic,AL011851,UNNAMED,,NaT,,
3,1851-06-25 18:00:00,0,,HU,28.1,-96.5,80.0,,north_atlantic,AL011851,UNNAMED,AL011851,1851-06-25 18:00:00,Corpus Christi,96.307258
4,1851-06-25 21:00:00,1,L,HU,28.2,-96.8,80.0,,north_atlantic,AL011851,UNNAMED,AL011851,1851-06-25 21:00:00,Corpus Christi,76.584748


In [45]:
sea_surface_temp_path = 'external data/sea-surface-temp_fig-1.csv'
sea_surf_temp_data = pd.read_csv(sea_surface_temp_path)
sea_surf_temp_data.head()

Unnamed: 0,Year,Annual anomaly,Lower 95% confidence interval,Upper 95% confidence interval
0,1880,-0.418,-0.626,-0.21
1,1881,-0.33,-0.531,-0.128
2,1882,-0.344,-0.541,-0.146
3,1883,-0.459,-0.653,-0.265
4,1884,-0.589,-0.797,-0.381


In [46]:
elNino_laNina_path = 'external data/el-nino-la-nina_fig-1.csv'
elNino_laNina_data = pd.read_csv(elNino_laNina_path)
elNino_laNina_data.head()

Unnamed: 0,Season,ENSO Type,JJA,JAS,ASO,SON,OND,NDJ,DJF,JFM,FMA,MAM,AMJ,MJJ
0,1950-1951,ME,-0.5,-0.4,-0.4,-0.4,-0.6,-0.8,-0.8,-0.5,-0.2,0.2,0.4,0.6
1,1951-1952,ME,0.7,0.9,1.0,1.2,1.0,0.8,0.5,0.4,0.3,0.3,0.2,0.0
2,1952-1953,WE,-0.1,0.0,0.2,0.1,0.0,0.1,0.4,0.6,0.6,0.7,0.8,0.8
3,1953-1954,WE,0.7,0.7,0.8,0.8,0.8,0.8,0.8,0.5,0.0,-0.4,-0.5,-0.5
4,1954-1955,WL,-0.6,-0.8,-0.9,-0.8,-0.7,-0.7,-0.7,-0.6,-0.7,-0.8,-0.8,-0.7


In [47]:
AMO_path = 'external data/amon-sm-long-data_fig-1.csv'
AMO_data = pd.read_csv(AMO_path)
# -99.99 represents missing data, 'Atlantic Multidecadal Oscillation Index (Monthly)'
AMO_data.head()

Unnamed: 0,Year,1,2,3,4,5,6,7,8,9,10,11,12
0,1856,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1,1857,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
2,1858,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
3,1859,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
4,1860,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99


In [50]:
df1 = pd.DataFrame(d_storm_w_cities_data)
df2 = pd.DataFrame(sea_surf_temp_data.iloc[:,:2])
df3 = pd.DataFrame(elNino_laNina_data.iloc[:,:2])
df4 = pd.DataFrame(AMO_data)

# Formatting the storm data, preparing to merge with the new variables
df1 = df1.dropna(subset = ['Datetime'])
df1['Year'] = (pd.to_datetime(df1['Datetime']).dt.year).astype(int)
df1['Month'] = (pd.to_datetime(df1['Datetime']).dt.month).astype(int)

# Formatting SST (Sea Surface Temperature)
df2['Year'] = (df2['Year']).astype(int)
df2.rename(columns={'Annual anomaly': 'SST'}, inplace=True)

# Formatting ENSO (El Nino-Southern Oscillation)
df3['Year'] = (df3['Season'].str[:4]).astype(int)
df3 = df3.drop('Season', axis=1)
df3.rename(columns={'ENSO Type': 'ENSO'}, inplace=True)

# Formatting AMO (Atlantic Multidecadal Oscillation)
df4 = pd.melt(df4, id_vars=['Year'], var_name='Month', value_name='AMO')
df4['Year'] = (df4['Year']).astype(int)
df4['Month'] = (df4['Month']).astype(int)


In [51]:
# Including the SST and ENSO data by Year
merged_df_p1 = pd.merge(df1, df2, on='Year', how='left')
merged_df_p2 = pd.merge(merged_df_p1, df3, on='Year', how='left')

In [53]:
# Including the AMO data by Year and Month
merged_df_p3 = pd.merge(merged_df_p2, df4, on=['Year', 'Month'], how='left')
merged_df_p3['AMO'] = merged_df_p3['AMO'].replace(-99.99, np.nan)

# Removing rows with blank entries for all columns
df_cleaned = merged_df_p3.dropna(subset=['SST', 'ENSO', 'AMO'], how='all')
print(df_cleaned.head())
print(df_cleaned.columns)

                  time  extra_obs special type   lat   lon  vmax  mslp  \
71 1861-08-15 12:00:00          0           HU  23.7 -80.8  80.0   NaN   
72 1861-08-15 18:00:00          0           HU  23.9 -81.5  80.0   NaN   
73 1861-08-15 18:00:00          0           HU  23.9 -81.5  80.0   NaN   
74 1861-08-16 00:00:00          0           HU  24.2 -82.0  80.0   NaN   
75 1861-08-16 06:00:00          0           HU  24.6 -82.5  80.0   NaN   

         wmo_basin  storm_id storm_name  Storm ID            Datetime  \
71  north_atlantic  AL021861    UNNAMED  AL021861 1861-08-15 12:00:00   
72  north_atlantic  AL021861    UNNAMED  AL021861 1861-08-15 18:00:00   
73  north_atlantic  AL021861    UNNAMED  AL021861 1861-08-15 18:00:00   
74  north_atlantic  AL021861    UNNAMED  AL021861 1861-08-16 00:00:00   
75  north_atlantic  AL021861    UNNAMED  AL021861 1861-08-16 06:00:00   

        City  Distance (km)  Year  Month  SST ENSO    AMO  
71  Varadero      77.129515  1861      8  NaN  NaN  0.05

In [None]:
df = df_cleaned.loc[:, ['City', 'lat', 'lon', 'vmax', 'mslp', 'Distance (km)', 'Year', 'Month', 'SST', 'ENSO', 'AMO']] # df_cleaned.copy()

#WE=Weak El Niño, ME=Moderate El Niño, SE=Strong El Niño, VSE=Very Strong El Niño WL=Weak La Niña, ML=Moderate La Niña, SL=Strong La Niña
# Transforming the ENSO value to be usable for our model (since the research indicated that La Niña was positively correlated with hurricanes, we factorize accordingly)

df.head()

  df['ENSO'] = df['ENSO'].replace(key, factorize_X2[key])


Unnamed: 0,City,lat,lon,vmax,mslp,Distance (km),Year,Month,SST,ENSO,AMO
71,Varadero,23.7,-80.8,80.0,,77.129515,1861,8,,,0.420123
72,Key West,23.9,-81.5,80.0,,78.993645,1861,8,,,0.420123
73,Varadero,23.9,-81.5,80.0,,86.955948,1861,8,,,0.420123
74,Key West,24.2,-82.0,80.0,,44.255441,1861,8,,,0.420123
75,Key West,24.6,-82.5,80.0,,70.753388,1861,8,,,0.420123


In [76]:
# Filling in empty values either as 0 or using the mean (might be a better way to interpolate these values or something)
# Could also exclude rows where at least one is N/A, rather than all three. Should still be plenty of entries
df['SST'] = df['SST'].fillna(df['SST'].mean())
df['ENSO'] = df['ENSO'].fillna(0)
df['AMO'] = df['AMO'].fillna(df['AMO'].mean())

# Doing a z-score normalization on all of the attributes we are using:
for Xn in ['Distance (km)', 'SST', 'ENSO', 'AMO']:
    mean = df[Xn].mean()
    std_dev = df[Xn].std()
    df[Xn] = (df[Xn] - mean) / std_dev

# TODO: Could change weights for attributes that are more indicative of hurricane formation
df['Risk Score'] = (df['Distance (km)'] + df['SST'] + df['ENSO'] + df['AMO']) / 4

# Get cumulative risk scores for each city
cities_risks = {}

for city in df['City'].unique():
    cities_risks[city] = df[df['City'] == city]['Risk Score'].sum()

for city in cities_risks:
    print(city, ":", cities_risks[city])


Varadero : 4.6774382082892805
Key West : -3.5966243472337895
Miami : -12.973725386181702
Tampico : -4.629014913768138
Houston : 6.3492871273458675
Galveston : 0.30766949462936743
New Orleans : 7.128217509671061
Biloxi : -12.285483511851911
Belize City : -21.002171034226695
Progreso : -7.42312189103718
Cancun : -9.290795488786681
Havana : 5.34763104722829
Corpus Christi : 3.0688991176003086
Campeche : 0.2415621570468141
Veracruz : 9.838307494167235
Nassau : -6.350251248910359
Pensacola : 9.26516747315302
Mobile : 11.94765178964235
Cienfuegos : 2.218413642447909
Tampa : -3.0051606712691052
Tuxpan : 5.91365855100899
Coatzacoalcos : 5.031974471625798
Ciudad del Carmen : 8.333316309136142
Merida : 0.8871541002731362
