In [33]:
import pandas as pd 
import tropycal.tracks as tracks 
from geopy.geocoders import Nominatim
from math import radians, sin, cos, sqrt, atan2

In [34]:
# Get basin data with info about all storms in all years
basin = tracks.TrackDataset(basin='both', source='hurdat', include_btk=True)

--> Starting to read in HURDAT2 data
--> Completed reading in HURDAT2 data (1.91 seconds)
--> Starting to read in best track data
--> Completed reading in best track data (9.96 seconds)


## Get All Named Storms
Return all the storms that occurred in each year

In [35]:
# Return storm data for all seasons 
storm_data = basin.to_dataframe()

# Loop through each season and get all storms
storm_details = []
for year in storm_data.index:
    season = basin.get_season(year).to_dataframe()
    season['year'] = year 
    storm_details.append(season)

# Concatenate all storms into one dataframe
all_storms = pd.concat(storm_details).set_index('year')

In [36]:
all_storms.head()

Unnamed: 0_level_0,id,name,vmax,mslp,category,ace,start_time,end_time,start_lat,start_lon
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1851,AL011851,UNNAMED,80,,1,4.9,1851-06-25 00:00:00,1851-06-28 00:00:00,28.0,-94.8
1851,AL021851,UNNAMED,80,,1,0.6,1851-07-05 12:00:00,1851-07-05 12:00:00,22.2,-97.6
1851,AL031851,UNNAMED,50,,0,0.2,1851-07-10 12:00:00,1851-07-10 12:00:00,12.0,-60.0
1851,AL041851,UNNAMED,100,,3,21.8,1851-08-16 00:00:00,1851-08-27 18:00:00,13.4,-48.0
1851,AL051851,UNNAMED,50,,0,4.0,1851-09-13 00:00:00,1851-09-16 18:00:00,32.5,-73.5


In [37]:
all_storms.shape 

(3223, 10)

# Detailed Info About Each Storm
Get detailed tracking info about each storm and its path

In [38]:
# Get detailed data about individual storm
all_storms_detailed = []

year_storm_id_name = zip(all_storms.index, all_storms['id'], all_storms['name'])
for year, id, name in year_storm_id_name:
    storm = basin.get_storm(id).to_dataframe()
    storm['storm_id'] = id 
    storm['year'] = year
    storm['storm_name'] = name
    all_storms_detailed.append(storm)

# Concatenate all the data 
all_storms_detailed = pd.concat(all_storms_detailed)

In [41]:
all_storms_detailed.head()

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_id,year,storm_name
0,1851-06-25 00:00:00,0,,HU,28.0,-94.8,80.0,,north_atlantic,AL011851,1851,UNNAMED
1,1851-06-25 06:00:00,0,,HU,28.0,-95.4,80.0,,north_atlantic,AL011851,1851,UNNAMED
2,1851-06-25 12:00:00,0,,HU,28.0,-96.0,80.0,,north_atlantic,AL011851,1851,UNNAMED
3,1851-06-25 18:00:00,0,,HU,28.1,-96.5,80.0,,north_atlantic,AL011851,1851,UNNAMED
4,1851-06-25 21:00:00,1,L,HU,28.2,-96.8,80.0,,north_atlantic,AL011851,1851,UNNAMED


In [42]:
all_storms_detailed.shape 

(86823, 12)

## Get Geolocations for specified Cities

In [44]:
# import cities data
cities = pd.read_csv('data/Major_Gulf_cities.csv')

In [45]:
# Define the geolocator with a user-agent
geolocator = Nominatim(user_agent="City-Tracking")

def get_coordinates(city_name: str):
    """
    Get the latitude and longitude of a city by name.

    Args:
    - city_name (str): Name of the city to look up.

    Returns:
    - tuple: A tuple containing the latitude and longitude, or (None, None) if not found.
    """
    try:
        location = geolocator.geocode(city_name)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except Exception as e:
        print(f"Error for {city_name}: {e}")
        return None, None


In [46]:
# Apply the get_coordinates function to each city in the 'City' column
cities[["Latitude", "Longitude"]] = cities["City Name"].apply(
    lambda x: pd.Series(get_coordinates(x))
)

# Display the DataFrame with new latitude and longitude columns
cities.head()

Unnamed: 0,City Name,Country,Latitude,Longitude
0,New Orleans,USA,29.975998,-90.078213
1,Houston,USA,29.758938,-95.367697
2,Tampa,USA,27.94776,-82.458444
3,Miami,USA,25.774173,-80.19362
4,Corpus Christi,USA,27.76353,-97.403319


## Tag Records in `detailed_storm_data` with Cities
This block tags each record of storm data where it is within a certain radius of one of the cities based on a threshold distance.

In [47]:
def haversine(lat1, lon1, lat2, lon2):
    """Returns the distance between two geolocations. Source: ChatGPT"""
    # Radius of the Earth in km
    R = 6371.0
    # Convert coordinates to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance


# Set a distance threshold (in km) to consider a storm passing through a city
distance_threshold = 100


In [48]:
# Iterate over each hurricane observation and city to calculate distance
filtered_storms = []
for storm_index, storm in all_storms_detailed.iterrows():
    for _, city in cities.iterrows():
        distance = haversine(
            storm["lat"], storm["lon"], city["Latitude"], city["Longitude"]
        )
        if distance <= distance_threshold:
            filtered_storms.append(
                {
                    "idx": storm_index,  # Use storm_index here
                    "Storm ID": storm["storm_id"],
                    "Storm Name": storm["storm_name"],
                    "Datetime": storm["time"],
                    "City": city["City Name"],
                    "Distance (km)": distance,
                }
            )

# Convert results to a DataFrame
result_df = pd.DataFrame(filtered_storms)

# Optionally set the index from the original storm index
result_df.set_index("idx", inplace=True)

In [49]:
result_df

Unnamed: 0_level_0,Storm ID,Storm Name,Datetime,City,Distance (km)
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,AL011851,UNNAMED,1851-06-25 18:00:00,Corpus Christi,96.307258
4,AL011851,UNNAMED,1851-06-25 21:00:00,Corpus Christi,76.584748
5,AL011851,UNNAMED,1851-06-26 00:00:00,Corpus Christi,62.641503
6,AL011851,UNNAMED,1851-06-26 06:00:00,Corpus Christi,62.698467
0,AL021851,UNNAMED,1851-07-05 12:00:00,Tampico,26.594421
...,...,...,...,...,...
16,AL092024,HELENE,2024-09-25 18:00:00,Cancun,97.492169
24,AL142024,MILTON,2024-10-10 00:00:00,Tampa,86.488635
11,AL152024,NADINE,2024-10-19 12:00:00,Belize City,97.941707
12,AL152024,NADINE,2024-10-19 18:00:00,Belize City,63.748467


In [50]:
# Merge with all_storms_detailed to tag with city
result = pd.merge(
    all_storms_detailed, 
    result_df[['Storm ID', 'City', 'Distance (km)']], 
    left_on='storm_id', 
    right_on='Storm ID', 
    how='left'
)


In [51]:
result 

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_id,year,storm_name,Storm ID,City,Distance (km)
0,1851-06-25 00:00:00,0,,HU,28.0,-94.8,80.0,,north_atlantic,AL011851,1851,UNNAMED,AL011851,Corpus Christi,96.307258
1,1851-06-25 00:00:00,0,,HU,28.0,-94.8,80.0,,north_atlantic,AL011851,1851,UNNAMED,AL011851,Corpus Christi,76.584748
2,1851-06-25 00:00:00,0,,HU,28.0,-94.8,80.0,,north_atlantic,AL011851,1851,UNNAMED,AL011851,Corpus Christi,62.641503
3,1851-06-25 00:00:00,0,,HU,28.0,-94.8,80.0,,north_atlantic,AL011851,1851,UNNAMED,AL011851,Corpus Christi,62.698467
4,1851-06-25 06:00:00,0,,HU,28.0,-95.4,80.0,,north_atlantic,AL011851,1851,UNNAMED,AL011851,Corpus Christi,96.307258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151665,2024-11-06 18:00:00,0,,TD,13.6,-105.4,25.0,1006.0,east_pacific,EP142024,2024,FOURTEEN,,,
151666,2024-11-07 00:00:00,0,,TD,13.5,-105.1,25.0,1007.0,east_pacific,EP142024,2024,FOURTEEN,,,
151667,2024-11-07 06:00:00,0,,TD,13.5,-105.0,25.0,1007.0,east_pacific,EP142024,2024,FOURTEEN,,,
151668,2024-11-07 12:00:00,0,,TD,13.4,-104.6,25.0,1007.0,east_pacific,EP142024,2024,FOURTEEN,,,
