# Hurricane Modeling

*Week 5*

Here we're importing the HURDAT2 dataset from NOAA. This contains detailed position and windspeed data for hurricanes in the Atlantic dating back to 1851.

In [1]:
# Standard imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import urllib.request
import linecache

# Retrieve the HURDAT2 dataset from NOAA
url = 'https://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2018-120319.txt'
rawfile = 'hurdat2.txt'
urllib.request.urlretrieve(url, rawfile)

('hurdat2.txt', <http.client.HTTPMessage at 0x7fc2e870a5d0>)

We can do some pre-processing to the CSV so that it's easier to load into Pandas.

In [2]:
# Clean up the file:
# Make each row have its hurricane ID and name

current = 0
count = 0

cleanfile = 'hurdat2_clean.txt'
with open(rawfile, 'r') as source, open(cleanfile, 'w') as dest:
    for i, line in enumerate(source):
        if current == i:
            heads = [a.lstrip().rstrip() for a in line.split(', ')]
            rows = int(heads[2][:-1])
            for row in range(1,rows+1):
                r = linecache.getline(rawfile, current+row+1)
                time = r[0:4] + '-' + r[4:6] + '-' + r[6:8] + ' ' + r[10:12] + ':' + r[12:14]
                place = r[23:28] + ' ' + r[30:36]
                n = str(count) + ', ' + heads[0] + ', ' + heads[1] + ', ' + time + r[14:-2] + '\n'
                dest.write(n)
                count += 1
            current += rows+1

In [3]:
n = ['ID', 'Name', 'Time', 'Record ID', 'Status', 'Latitude', 'Longitude', 'Max wind (knots)', 'Min pressure (mbar)', \
          '34kt wind radii max NE', '34kt wind radii max SE', '34kt wind radii max SW', '34kt wind radii max NW', \
          '50kt wind radii max NE', '50kt wind radii max SE', '50kt wind radii max SW', '50kt wind radii max NW', \
          '64kt wind radii max NE', '64kt wind radii max SE', '64kt wind radii max SW', '64kt wind radii max NW' ]
data = pd.read_csv(cleanfile, header=None, names=n, skipinitialspace=True, na_values=[-999])
data['Time'] =  pd.to_datetime(data['Time'], format='%Y-%m-%d %H:%M')
data.head()

Unnamed: 0,ID,Name,Time,Record ID,Status,Latitude,Longitude,Max wind (knots),Min pressure (mbar),34kt wind radii max NE,...,34kt wind radii max SW,34kt wind radii max NW,50kt wind radii max NE,50kt wind radii max SE,50kt wind radii max SW,50kt wind radii max NW,64kt wind radii max NE,64kt wind radii max SE,64kt wind radii max SW,64kt wind radii max NW
0,AL011851,UNNAMED,1851-06-25 00:00:00,,HU,28.0N,94.8W,80,,,...,,,,,,,,,,
1,AL011851,UNNAMED,1851-06-25 06:00:00,,HU,28.0N,95.4W,80,,,...,,,,,,,,,,
2,AL011851,UNNAMED,1851-06-25 12:00:00,,HU,28.0N,96.0W,80,,,...,,,,,,,,,,
3,AL011851,UNNAMED,1851-06-25 18:00:00,,HU,28.1N,96.5W,80,,,...,,,,,,,,,,
4,AL011851,UNNAMED,1851-06-25 21:00:00,L,HU,28.2N,96.8W,80,,,...,,,,,,,,,,


In [4]:
data.count()

ID                        51346
Name                      51346
Time                      51346
Record ID                  1072
Status                    51346
Latitude                  51346
Longitude                 51346
Max wind (knots)          51346
Min pressure (mbar)       20386
34kt wind radii max NE     7725
34kt wind radii max SE     7725
34kt wind radii max SW     7725
34kt wind radii max NW     7725
50kt wind radii max NE     7725
50kt wind radii max SE     7725
50kt wind radii max SW     7725
50kt wind radii max NW     7725
64kt wind radii max NE     7725
64kt wind radii max SE     7725
64kt wind radii max SW     7725
64kt wind radii max NW     7725
dtype: int64

So now we've successfully imported our data into a Jupyter notebook, and we can see that there is a sizable dataset of detailed wind information included here.

In [5]:
data.to_csv('wind.csv')