#import all packages

In [1]:
import pandas as pd
from tabulate import tabulate

#initialize path to HURDAT2 data

In [2]:
path = 'https://www.aoml.noaa.gov/hrd/hurdat/hurdat2.html'
# https://www.nhc.noaa.gov/data/hurdat/hurdat2-format-atlantic.pdf

#assign column names and read CSV data

In [3]:
col_names = ['Date','Time','RecordIdentifier','SystemStatus','Latitude','Longitude','MaxSustWind','MaxPressure',
             'NE34','SE34','SW34','NW34',
             'NE50','SE50','SW50','NW50',
             'NE64','SE64','SW64','NW64']
df = pd.read_csv(path, skiprows = 2, low_memory=False, names=col_names)
print(len(df))
print(df.dtypes)
#print(tabulate(df[52100:52200], headers = col_names))

53737
Date                 object
Time                 object
RecordIdentifier     object
SystemStatus         object
Latitude             object
Longitude            object
MaxSustWind         float64
MaxPressure         float64
NE34                float64
SE34                float64
SW34                float64
NW34                float64
NE50                float64
SE50                float64
SW50                float64
NW50                float64
NE64                float64
SE64                float64
SW64                float64
NW64                float64
dtype: object


#change data types to strings

In [4]:
df.Date.apply(str)
df.Time.apply(str)

0                        nan
1                    UNNAMED
2                       0000
3                       0600
4                       1200
                ...         
53732                   0600
53733                   1200
53734                    nan
53735                    nan
53736                    nan
Name: Time, Length: 53737, dtype: object

#print first 10 rows of data

In [18]:
#remove ghost row
newdf = df.drop([0])
print(tabulate(newdf[0:10], headers = col_names))

    Date      Time     RecordIdentifier    SystemStatus    Latitude    Longitude      MaxSustWind    MaxPressure    NE34    SE34    SW34    NW34    NE50    SE50    SW50    NW50    NE64    SE64    SW64    NW64
--  --------  -------  ------------------  --------------  ----------  -----------  -------------  -------------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------
 1  AL011851  UNNAMED  14                  nan             nan         nan                    nan            nan     nan     nan     nan     nan     nan     nan     nan     nan     nan     nan     nan     nan
 2  18510625  0000                         HU              28.0N       94.8W                   80           -999    -999    -999    -999    -999    -999    -999    -999    -999    -999    -999    -999    -999
 3  18510625  0600                         HU              28.0N       95.4W                   80           -999    -999    -999    -999    -999    -999    -999    

#check if row is convoluted header row (contains ALPHA characters)

In [20]:
print(newdf['Date'][0:10])
newdf['IsStormHdr'] = ~newdf['Date'].str.isdigit()
print(newdf['IsStormHdr'][0:10])
#print(tabulate(df[1:10], headers = col_names))

1         AL011851
2         18510625
3         18510625
4         18510625
5         18510625
6         18510625
7         18510626
8         18510626
9         18510626
10        18510626
Name: Date, dtype: object
1      True
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
Name: IsStormHdr, dtype: bool


#create empty columns to receive header data

In [24]:
newdf['Identifier'] = ''
newdf['Name'] = ''
newdf['Samples'] = ''

print(tabulate(newdf[0:10], headers = col_names))
print(newdf.dtypes)

                                Date    Time      RecordIdentifier    SystemStatus    Latitude    Longitude    MaxSustWind    MaxPressure    NE34    SE34    SW34    NW34    NE50    SE50    SW50    NW50  NE64    SE64    SW64    NW64
--  --------  -------  --  ---  ------  ------  ------------------  --------------  ----------  -----------  -------------  -------------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------
 1  AL011851  UNNAMED  14  nan  nan     nan                    nan             nan         nan          nan            nan            nan     nan     nan     nan     nan     nan     nan     nan     nan  True
 2  18510625  0000         HU   28.0N   94.8W                   80            -999        -999         -999           -999           -999    -999    -999    -999    -999    -999    -999    -999    -999  False
 3  18510625  0600         HU   28.0N   95.4W                   80            -999        -999         -999          

#Iterate over rows to get header data and write to list

In [40]:
Lidentifier = []
Lname = []
Lsamples = []

identifier = ""
name = ""
samples = ""

for row in newdf.itertuples(index=True):
    if (getattr(row, "IsStormHdr") == True):
        identifier = getattr(row, "Date")
        name = getattr(row, "Time")
        samples = getattr(row, "RecordIdentifier")
    Lidentifier.append(identifier)
    Lname.append(name)
    Lsamples.append(samples

#write list data into dataframe

In [43]:
newdf.Identifier = Lidentifier
newdf.Name = Lname
newdf.Samples = Lsamples

#separate storm header data from track data

In [46]:
storms = newdf[newdf['IsStormHdr'] == True]
storms = storms[['Identifier','Name','Samples']]
print(storms[1600:1800])
tracks = newdf[newdf['IsStormHdr'] == False]
print(tracks)

      Identifier                 Name  Samples
44383   AL112002            JOSEPHINE        9
44393   AL122002                 KYLE       90
44484   AL132002                 LILI       57
44542   AL142002              UNNAMED       11
44554   AL012003                  ANA       39
...          ...                  ...      ...
50522   AL092013             HUMBERTO       46
50569   AL102013               INGRID       22
50592   AL112013                JERRY       33
50626   AL122013                KAREN       14
50641   AL132013              LORENZO       21

[200 rows x 3 columns]
           Date   Time RecordIdentifier SystemStatus Latitude Longitude  \
2      18510625   0000                            HU    28.0N     94.8W   
3      18510625   0600                            HU    28.0N     95.4W   
4      18510625   1200                            HU    28.0N     96.0W   
5      18510625   1800                            HU    28.1N     96.5W   
6      18510625   2100               