In [45]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geopy.distance import geodesic
from math import radians, sin, cos, sqrt, atan2
import openpyxl

In [46]:
tc_harold = gpd.read_file('TC_Harold_2020.geojson')

In [47]:
tc_harold = gpd.GeoDataFrame(tc_harold, crs="EPSG:4326")  # Assuming WGS84

In [48]:
# Set CRS to WGS84 (EPSG:4326)
tc_harold.set_crs(epsg=4326, inplace=True)

# Convert to UTM zone 59S (EPSG:32759)
# gdf_utm = tc_harold.to_crs(epsg=32759)
tc_harold = tc_harold.to_crs(epsg=32759)

In [49]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (-2290192.703 9405158.365)
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (-2203387.024 9360154.711)
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (-2092516.423 9304572.791)
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (-1934317.539 9239768.528)
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (-1800627.378 9162886.701)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,
108,,,,,,,,,,,,,,
109,,,,,,,,,,,,,,
110,,,,,,,,,,,,,,


In [50]:
distances = []

In [51]:
# Calculate distances between each point and the next point
tc_harold['next_geometry'] = tc_harold['geometry'].shift(-1)
tc_harold['distance_to_next'] = tc_harold.apply(
    lambda row: row['geometry'].distance(row['next_geometry']) if pd.notnull(row['next_geometry']) else None,
    axis=1
)

# Drop the next_geometry column for a cleaner output
# gdf.drop(columns=['next_geometry'], inplace=True)

print(tc_harold[['ISO_time', 'Mean Wind Speed', 'geometry', 'distance_to_next']])

            ISO_time Mean Wind Speed                          geometry  \
0    3/29/2020 12:00              15  POINT (-2290192.703 9405158.365)   
1    3/29/2020 18:00              15  POINT (-2203387.024 9360154.711)   
2     3/30/2020 0:00              15  POINT (-2092516.423 9304572.791)   
3     3/30/2020 6:00              15  POINT (-1934317.539 9239768.528)   
4    3/30/2020 12:00              15  POINT (-1800627.378 9162886.701)   
..               ...             ...                               ...   
107                                                               None   
108                                                               None   
109                                                               None   
110                                                               None   
111                                                               None   

     distance_to_next  
0        97778.089519  
1       124022.740175  
2       170957.536388  
3       154220.

In [52]:
print(tc_harold.columns)

Index(['Serial_Num', 'Season', 'Num', 'Basin', 'Sub_basin', 'Name', 'ISO_time',
       'Nature', 'Latitude', 'Longitude', 'Mean Wind Speed',
       'Mean Central Pressure', 'Center(s) or Source', 'geometry',
       'next_geometry', 'distance_to_next'],
      dtype='object')


In [53]:
# Create a GeoDataFrame
tc_harold['timestamp'] = pd.to_datetime(tc_harold['ISO_time'])
tc_harold = gpd.GeoDataFrame(tc_harold, geometry='geometry')

In [54]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry,next_geometry,distance_to_next,timestamp
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (-2290192.703 9405158.365),POINT (-2203387.024 9360154.711),97778.089519,2020-03-29 12:00:00
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (-2203387.024 9360154.711),POINT (-2092516.423 9304572.791),124022.740175,2020-03-29 18:00:00
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (-2092516.423 9304572.791),POINT (-1934317.539 9239768.528),170957.536388,2020-03-30 00:00:00
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (-1934317.539 9239768.528),POINT (-1800627.378 9162886.701),154220.214410,2020-03-30 06:00:00
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (-1800627.378 9162886.701),POINT (-1656307.772 9087342.978),162895.681179,2020-03-30 12:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,,,,NaT
108,,,,,,,,,,,,,,,,,NaT
109,,,,,,,,,,,,,,,,,NaT
110,,,,,,,,,,,,,,,,,NaT


In [55]:
# Convert the 'timestamp' column to datetime format
tc_harold['timestamp'] = pd.to_datetime(tc_harold['timestamp'])

# Calculate the time difference between the current row and the previous row
tc_harold['time_diff'] = tc_harold['timestamp'].diff()

# tc_harold = tc_harold.dropna['time_difference']

In [56]:
# Convert 'time_diff' to hours as a float
tc_harold['time_diff_hours'] = tc_harold['time_diff'].dt.total_seconds() / 3600

# fill empty values
tc_harold['time_diff_hours'] = tc_harold['time_diff_hours'].fillna(0)

# Optionally, if you need it as an integer (total hours rounded down)
tc_harold['time_diff_hours'] = tc_harold['time_diff_hours'].astype(int)

In [57]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry,next_geometry,distance_to_next,timestamp,time_diff,time_diff_hours
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (-2290192.703 9405158.365),POINT (-2203387.024 9360154.711),97778.089519,2020-03-29 12:00:00,NaT,0
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (-2203387.024 9360154.711),POINT (-2092516.423 9304572.791),124022.740175,2020-03-29 18:00:00,0 days 06:00:00,6
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (-2092516.423 9304572.791),POINT (-1934317.539 9239768.528),170957.536388,2020-03-30 00:00:00,0 days 06:00:00,6
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (-1934317.539 9239768.528),POINT (-1800627.378 9162886.701),154220.214410,2020-03-30 06:00:00,0 days 06:00:00,6
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (-1800627.378 9162886.701),POINT (-1656307.772 9087342.978),162895.681179,2020-03-30 12:00:00,0 days 06:00:00,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,,,,NaT,NaT,0
108,,,,,,,,,,,,,,,,,NaT,NaT,0
109,,,,,,,,,,,,,,,,,NaT,NaT,0
110,,,,,,,,,,,,,,,,,NaT,NaT,0


In [58]:
# Calculate velocity and velocity difference 
tc_harold['velocity'] = tc_harold.apply(
    lambda row: row['distance_to_next'] / row['time_diff_hours'] if row['time_diff_hours'] != 0 else 0,
    axis=1
)

In [59]:
tc_harold['velocity_diff'] = tc_harold['velocity'].diff()

In [60]:
# Calculate velocity and velocity difference 
tc_harold['acceleration_ms-1'] = tc_harold.apply(
    lambda row: row['velocity_diff'] / row['time_diff_hours'] if row['time_diff_hours'] != 0 else 0,
    axis=1
)

In [61]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,...,Center(s) or Source,geometry,next_geometry,distance_to_next,timestamp,time_diff,time_diff_hours,velocity,velocity_diff,acceleration_ms-1
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,...,ASW,POINT (-2290192.703 9405158.365),POINT (-2203387.024 9360154.711),97778.089519,2020-03-29 12:00:00,NaT,0,0.000000,,0.000000
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,...,ASW,POINT (-2203387.024 9360154.711),POINT (-2092516.423 9304572.791),124022.740175,2020-03-29 18:00:00,0 days 06:00:00,6,20670.456696,20670.456696,3445.076116
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,...,ASW,POINT (-2092516.423 9304572.791),POINT (-1934317.539 9239768.528),170957.536388,2020-03-30 00:00:00,0 days 06:00:00,6,28492.922731,7822.466035,1303.744339
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,...,ASW,POINT (-1934317.539 9239768.528),POINT (-1800627.378 9162886.701),154220.214410,2020-03-30 06:00:00,0 days 06:00:00,6,25703.369068,-2789.553663,-464.925611
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,...,ASW,POINT (-1800627.378 9162886.701),POINT (-1656307.772 9087342.978),162895.681179,2020-03-30 12:00:00,0 days 06:00:00,6,27149.280196,1445.911128,240.985188
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,...,,,,,NaT,NaT,0,0.000000,0.000000,0.000000
108,,,,,,,,,,,...,,,,,NaT,NaT,0,0.000000,0.000000,0.000000
109,,,,,,,,,,,...,,,,,NaT,NaT,0,0.000000,0.000000,0.000000
110,,,,,,,,,,,...,,,,,NaT,NaT,0,0.000000,0.000000,0.000000


In [39]:
tc_harold.to_excel('tc_harold.xlsx', sheet_name='Sheet1', index=False)

In [40]:
print(tc_harold.crs) 

EPSG:32759
