# Named Storms in the Atlantic Basin

In [3]:
# loading packages and data
import altair
import pandas
import numpy

NAMED_STORMS_CSV = "data/Named Storm Data - since 1950.csv"

named_storms = pandas.read_csv(NAMED_STORMS_CSV)
named_storms.head()

Unnamed: 0,index,Year,Storm Name,Start Date,End Date,Dates,Max Wind Speed (mph),Min pressure (mb),Storm Type
0,0,1950,ABLE,08-12-50,08-24-50,8/12 - 8/24,110,953.0,Hurricane
1,1,1950,BAKER,08-18-50,09-01-50,8/18 - 9/1,90,979.0,Hurricane
2,2,1950,CHARLIE,08-21-50,09-05-50,8/21 - 9/5,95,974.0,Hurricane
3,3,1950,EASY,09-01-50,09-09-50,9/1 - 9/9,105,960.0,Hurricane
4,4,1950,FOX,09-08-50,09-17-50,9/8 - 9/17,120,946.0,Hurricane


In [4]:
## cleaning data

# removing bad data at the end of the CSV file
named_storms = named_storms[:808]

# converting data types
named_storms["Year"] = named_storms["Year"].astype("Int64")
named_storms["Max Wind Speed (mph)"] = named_storms["Max Wind Speed (mph)"].astype("Int64")
named_storms["Min pressure (mb)"] = named_storms["Min pressure (mb)"].astype("Int64")

# filtering for all years between 1950 and 2020
named_storms = named_storms[(named_storms["Year"] >= 1950) & (named_storms["Year"] <= 2020)]

# converting dates - this is a bit tricky because the dates are in the format MM-DD-YY
start_date_split = named_storms["Start Date"].apply(lambda x: x.split("-"))
named_storms["Start Year"] = start_date_split.apply(lambda x: '19' + x[2] if int(x[2]) > 30 else '20' + x[2])
named_storms["Start Month"] = start_date_split.apply(lambda x: x[0])
named_storms["Start Day"] = start_date_split.apply(lambda x: x[1])
named_storms["My Start Date"] = named_storms[["Start Year", "Start Month", "Start Day"]].apply(lambda x: pandas.to_datetime(x[0] + '-' + x[1] + '-' + x[2]), axis=1)
named_storms.head()

end_date_split = named_storms["End Date"].apply(lambda x: x.split("-"))
named_storms["End Year"] = end_date_split.apply(lambda x: '19' + x[2] if int(x[2]) > 30 else '20' + x[2])
named_storms["End Month"] = end_date_split.apply(lambda x: x[0])
named_storms["End Day"] = end_date_split.apply(lambda x: x[1])
named_storms["My End Date"] = named_storms[["End Year", "End Month", "End Day"]].apply(lambda x: pandas.to_datetime(x[0] + '-' + x[1] + '-' + x[2]), axis=1)

# calculating duration
named_storms["Duration"] = named_storms[["My Start Date", "My End Date"]].apply(lambda x: (x[1] - x[0]).days, axis=1)

# storm categories
median_max_wind_speed = numpy.median(named_storms["Max Wind Speed (mph)"])
mean_max_wind_speed = numpy.median(named_storms["Max Wind Speed (mph)"])
major_hurricane_wind_speed = 110
hurricane_wind_speed = 75
named_storms['Max Wind Speed Above Median'] = named_storms["Max Wind Speed (mph)"].apply(lambda x: 'Yes' if x > median_max_wind_speed else 'No')
named_storms['Major Hurricane'] = named_storms["Max Wind Speed (mph)"].apply(lambda x: 'Yes' if x >= major_hurricane_wind_speed else 'No')
named_storms['Hurricane'] = named_storms["Max Wind Speed (mph)"].apply(lambda x: 'Yes' if x >= hurricane_wind_speed else 'No')
named_storms['Storm Category'] = named_storms["Max Wind Speed (mph)"].apply(
    lambda x: 'Major Hurricane' if x >= major_hurricane_wind_speed else 'Hurricane' if x >= hurricane_wind_speed else 'Tropical Storm'
)

# difference from mean min pressure
mean_min_pressure = numpy.mean(named_storms["Min pressure (mb)"])
named_storms["Mean min pressure diff"] = named_storms["Min pressure (mb)"].apply(lambda x: x - mean_min_pressure)

named_storms.head()


  named_storms["My Start Date"] = named_storms[["Start Year", "Start Month", "Start Day"]].apply(lambda x: pandas.to_datetime(x[0] + '-' + x[1] + '-' + x[2]), axis=1)
  named_storms["My End Date"] = named_storms[["End Year", "End Month", "End Day"]].apply(lambda x: pandas.to_datetime(x[0] + '-' + x[1] + '-' + x[2]), axis=1)
  named_storms["Duration"] = named_storms[["My Start Date", "My End Date"]].apply(lambda x: (x[1] - x[0]).days, axis=1)


Unnamed: 0,index,Year,Storm Name,Start Date,End Date,Dates,Max Wind Speed (mph),Min pressure (mb),Storm Type,Start Year,...,End Year,End Month,End Day,My End Date,Duration,Max Wind Speed Above Median,Major Hurricane,Hurricane,Storm Category,Mean min pressure diff
0,0,1950,ABLE,08-12-50,08-24-50,8/12 - 8/24,110,953,Hurricane,1950,...,1950,8,24,1950-08-24,12,Yes,Yes,Yes,Major Hurricane,-25.450593
1,1,1950,BAKER,08-18-50,09-01-50,8/18 - 9/1,90,979,Hurricane,1950,...,1950,9,1,1950-09-01,14,Yes,No,Yes,Hurricane,0.549407
2,2,1950,CHARLIE,08-21-50,09-05-50,8/21 - 9/5,95,974,Hurricane,1950,...,1950,9,5,1950-09-05,15,Yes,No,Yes,Hurricane,-4.450593
3,3,1950,EASY,09-01-50,09-09-50,9/1 - 9/9,105,960,Hurricane,1950,...,1950,9,9,1950-09-09,8,Yes,No,Yes,Hurricane,-18.450593
4,4,1950,FOX,09-08-50,09-17-50,9/8 - 9/17,120,946,Hurricane,1950,...,1950,9,17,1950-09-17,9,Yes,Yes,Yes,Major Hurricane,-32.450593


In [10]:
# calculate cumulative number of named storms by date, by category
# this is a bit of a hack, but it works. I should revisit this
named_storms_by_date_group_1 = named_storms[named_storms["Storm Category"] == "Tropical Storm"]
named_storms_by_date_group_1 = named_storms_by_date_group_1.groupby(["My Start Date","Storm Category","Storm Name","Max Wind Speed (mph)","Duration","Min pressure (mb)"]).size().cumsum().reset_index(name="Cumulative Count of Storms")

named_storms_by_date_group_2 = named_storms[named_storms["Storm Category"] == "Hurricane"]
named_storms_by_date_group_2 = named_storms_by_date_group_2.groupby(["My Start Date","Storm Category","Storm Name","Max Wind Speed (mph)","Duration","Min pressure (mb)"]).size().cumsum().reset_index(name="Cumulative Count of Storms")

named_storms_by_date_group_3 = named_storms[named_storms["Storm Category"] == "Major Hurricane"]
named_storms_by_date_group_3 = named_storms_by_date_group_3.groupby(["My Start Date","Storm Category","Storm Name","Max Wind Speed (mph)","Duration","Min pressure (mb)"]).size().cumsum().reset_index(name="Cumulative Count of Storms")

# union the dataframes
named_storms_by_date = pandas.concat([named_storms_by_date_group_1, named_storms_by_date_group_2, named_storms_by_date_group_3])


In [13]:
# main chart! 
chart = altair.Chart(named_storms_by_date).mark_circle().encode(
    x='My Start Date',
    y='Cumulative Count of Storms',
    tooltip=['Storm Name', 'My Start Date', 'Max Wind Speed (mph)'],
    # size='Max Wind Speed (mph)',
    color=altair.Color('Storm Category', scale=altair.Scale(scheme='category10'))
).interactive()

chart.save("visualizations/cumulative_count_of_storms_by_category.png")
chart.show()

In [20]:
# chart for max wind speed over time
chart = altair.Chart(named_storms).mark_point().encode(
    x='My Start Date',
    y='Max Wind Speed (mph)',
)
chart.save("visualizations/max_wind_speed_over_time.png")
chart.show()

In [21]:

# chart for min pressure over time
chart = altair.Chart(named_storms).mark_point().encode(
    x='My Start Date',
    y='Min pressure (mb)',
)
chart.save("visualizations/min_pressure_over_time.png")
chart.show()

In [22]:
# chart for mean min pressure diff by max wind speed
chart = altair.Chart(named_storms).mark_circle().encode(
    x='Max Wind Speed (mph)',
    y='Mean min pressure diff',
    # size='Duration',
    tooltip=['Storm Name', 'Start Date'],
    color=altair.Color('Duration', scale=altair.Scale(scheme='tealblues'))
).interactive()
chart.save("visualizations/mean_min_pressure_diff_by_max_wind_speed.png")
chart.show()
