#import all packages

In [None]:
import pandas as pd
from tabulate import tabulate
from sphere import RegionCoverer, Cell, LatLng, LatLngRect, CellId

#initialize path to HURDAT2 data

In [None]:
path = 'https://www.aoml.noaa.gov/hrd/hurdat/hurdat2.html'
# https://www.nhc.noaa.gov/data/hurdat/hurdat2-format-atlantic.pdf

#assign column names and read CSV data

In [None]:
col_names = ['Date','Time','RecordIdentifier','SystemStatus','Latitude','Longitude','MaxSustWind','MaxPressure',
             'NE34','SE34','SW34','NW34',
             'NE50','SE50','SW50','NW50',
             'NE64','SE64','SW64','NW64']
df = pd.read_csv(path, skiprows = 2, low_memory=False, names=col_names)
print(len(df))
#print(df.dtypes)
#print(tabulate(df[52100:52200], headers = col_names))

#change data types to strings

In [None]:
df.Date.apply(str)
df.Time.apply(str)

#print first 10 rows of data

In [None]:
#remove ghost row
newdf = df.drop([0])
print(tabulate(newdf[0:10], headers = col_names))

#check if row is convoluted header row (contains ALPHA characters)

In [None]:
newdf['IsStormHdr'] = ~newdf['Date'].str.isdigit()
#print(tabulate(df[1:10], headers = col_names))

#create empty columns to receive header data

In [None]:
newdf['Identifier'] = ''
newdf['Name'] = ''
newdf['Samples'] = ''

#print(tabulate(newdf[0:10], headers = col_names))
#print(newdf.dtypes)

#Iterate over rows to get header data and write to list

In [None]:
Lidentifier = []
Lname = []
Lsamples = []

identifier = ""
name = ""
samples = ""

for row in newdf.itertuples(index=True):
    if (getattr(row, "IsStormHdr") == True):
        identifier = getattr(row, "Date")
        name = getattr(row, "Time")
        samples = getattr(row, "RecordIdentifier")
    Lidentifier.append(identifier)
    Lname.append(name)
    Lsamples.append(samples)

#write list data into dataframe

In [None]:
newdf.Identifier = Lidentifier
newdf.Name = Lname
newdf.Samples = Lsamples

#separate storm header data from track data

In [None]:
#fix chaining issue
storms = newdf[newdf['IsStormHdr'] == True].copy()
storms = storms[['Identifier','Name','Samples']]
#print(storms[1600:1800])

tracks = newdf[newdf['IsStormHdr'] == False].copy()
#print(tracks)

In [None]:
import re
def parsell(string):
    string = string.strip().lower()

    if string.endswith('w') or string.endswith('s'):
        sign = -1
    else:
        sign = 1

    string = re.sub(r"[^0-9.]", " ", string).strip()

    numeric_ll = float(string)
    return numeric_ll * sign

In [None]:
Tlatitude = [parsell(lat) for lat in tracks['Latitude']]
tracks['Latitude'] = Tlatitude

Tlongitude = [parsell(lon) for lon in tracks['Longitude']]
tracks['Longitude'] = Tlongitude

In [None]:
print(tracks)

In [None]:


s2level = 10

start0 = time.time()
tracks['S2LL'] = [LatLng.from_degrees(x, y) for x, y in zip(tracks['Latitude'], tracks['Longitude'])]
end0 = time.time()
print(end0 - start0)
#57s

start1 = time.time()
tracks['S2CellID'] = [CellId().from_lat_lng(xy) for xy in tracks['S2LL']]
end1 = time.time()
print(end1 - start1)
#426s
