#import all packages

In [1]:
import pandas as pd
from tabulate import tabulate
from sphere import RegionCoverer, Cell, LatLng, LatLngRect, CellId

#initialize path to HURDAT2 data

In [2]:
path = 'https://www.aoml.noaa.gov/hrd/hurdat/hurdat2.html'
# https://www.nhc.noaa.gov/data/hurdat/hurdat2-format-atlantic.pdf

#assign column names and read CSV data

In [3]:
col_names = ['Date','Time','RecordIdentifier','SystemStatus','Latitude','Longitude','MaxSustWind','MaxPressure',
             'NE34','SE34','SW34','NW34',
             'NE50','SE50','SW50','NW50',
             'NE64','SE64','SW64','NW64']
df = pd.read_csv(path, skiprows = 2, low_memory=False, names=col_names)
print(len(df))
#print(df.dtypes)
#print(tabulate(df[52100:52200], headers = col_names))

53737


#change data types to strings

In [4]:
df.Date.apply(str)
df.Time.apply(str)

0                        nan
1                    UNNAMED
2                       0000
3                       0600
4                       1200
                ...         
53732                   0600
53733                   1200
53734                    nan
53735                    nan
53736                    nan
Name: Time, Length: 53737, dtype: object

#print first 10 rows of data

In [5]:
#remove ghost row
newdf = df.drop([0])
print(tabulate(newdf[0:10], headers = col_names))

    Date      Time     RecordIdentifier    SystemStatus    Latitude    Longitude      MaxSustWind    MaxPressure    NE34    SE34    SW34    NW34    NE50    SE50    SW50    NW50    NE64    SE64    SW64    NW64
--  --------  -------  ------------------  --------------  ----------  -----------  -------------  -------------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------  ------
 1  AL011851  UNNAMED  14                  nan             nan         nan                    nan            nan     nan     nan     nan     nan     nan     nan     nan     nan     nan     nan     nan     nan
 2  18510625  0000                         HU              28.0N       94.8W                   80           -999    -999    -999    -999    -999    -999    -999    -999    -999    -999    -999    -999    -999
 3  18510625  0600                         HU              28.0N       95.4W                   80           -999    -999    -999    -999    -999    -999    -999    

#check if row is convoluted header row (contains ALPHA characters)

In [6]:
newdf['IsStormHdr'] = ~newdf['Date'].str.isdigit()
#print(tabulate(df[1:10], headers = col_names))

#create empty columns to receive header data

In [7]:
newdf['Identifier'] = ''
newdf['Name'] = ''
newdf['Samples'] = ''

#print(tabulate(newdf[0:10], headers = col_names))
#print(newdf.dtypes)

#Iterate over rows to get header data and write to list

In [8]:
Lidentifier = []
Lname = []
Lsamples = []

identifier = ""
name = ""
samples = ""

for row in newdf.itertuples(index=True):
    if (getattr(row, "IsStormHdr") == True):
        identifier = getattr(row, "Date")
        name = getattr(row, "Time")
        samples = getattr(row, "RecordIdentifier")
    Lidentifier.append(identifier)
    Lname.append(name)
    Lsamples.append(samples)

#write list data into dataframe

In [9]:
newdf.Identifier = Lidentifier
newdf.Name = Lname
newdf.Samples = Lsamples

#separate storm header data from track data

In [10]:
#fix chaining issue
storms = newdf[newdf['IsStormHdr'] == True].copy()
storms = storms[['Identifier','Name','Samples']]
#print(storms[1600:1800])

tracks = newdf[newdf['IsStormHdr'] == False].copy()
#print(tracks)

In [11]:
import re
def parsell(string):
    string = string.strip().lower()

    if string.endswith('w') or string.endswith('s'):
        sign = -1
    else:
        sign = 1

    string = re.sub(r"[^0-9.]", " ", string).strip()

    numeric_ll = float(string)
    return numeric_ll * sign

In [12]:
Tlatitude = [parsell(lat) for lat in tracks['Latitude']]
tracks['Latitude'] = Tlatitude

Tlongitude = [parsell(lon) for lon in tracks['Longitude']]
tracks['Longitude'] = Tlongitude

In [13]:
print(tracks)

           Date   Time RecordIdentifier SystemStatus  Latitude  Longitude  \
2      18510625   0000                            HU      28.0      -94.8   
3      18510625   0600                            HU      28.0      -95.4   
4      18510625   1200                            HU      28.0      -96.0   
5      18510625   1800                            HU      28.1      -96.5   
6      18510625   2100                L           HU      28.2      -96.8   
...         ...    ...              ...          ...       ...        ...   
53729  20191126   1200                            EX      52.2       -9.3   
53730  20191126   1800                            EX      52.2       -8.9   
53731  20191127   0000                            EX      51.8       -8.2   
53732  20191127   0600                            EX      51.4       -6.0   
53733  20191127   1200                            EX      51.3       -2.1   

       MaxSustWind  MaxPressure   NE34   SE34  ...   SW50   NW50   NE64  \


In [17]:
s2level = 10
tracks['S2LL'] = [LatLng.from_degrees(x, y) for x, y in zip(tracks['Latitude'], tracks['Longitude'])]
tracks['S2CellID'] = [CellId().from_lat_lng(xy) for xy in tracks['S2LL']]
tracks['S2Region'] = [z.parent(s2level) for z in tracks['S2CellID']]

In [18]:
print(tracks)

           Date   Time RecordIdentifier SystemStatus  Latitude  Longitude  \
2      18510625   0000                            HU      28.0      -94.8   
3      18510625   0600                            HU      28.0      -95.4   
4      18510625   1200                            HU      28.0      -96.0   
5      18510625   1800                            HU      28.1      -96.5   
6      18510625   2100                L           HU      28.2      -96.8   
...         ...    ...              ...          ...       ...        ...   
53729  20191126   1200                            EX      52.2       -9.3   
53730  20191126   1800                            EX      52.2       -8.9   
53731  20191127   0000                            EX      51.8       -8.2   
53732  20191127   0600                            EX      51.4       -6.0   
53733  20191127   1200                            EX      51.3       -2.1   

       MaxSustWind  MaxPressure   NE34   SE34  ...   SE64   SW64   NW64  \
