In [25]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geopy.distance import geodesic
from math import radians, sin, cos, sqrt, atan2

In [32]:
tc_harold = gpd.read_file('TC_Harold_2020.geojson')

In [33]:
tc_harold = gpd.GeoDataFrame(tc_harold, crs="EPSG:4326")  # Assuming WGS84

In [34]:
# Set CRS to WGS84 (EPSG:4326)
tc_harold.set_crs(epsg=4326, inplace=True)

# Convert to UTM zone 59S (EPSG:32759)
gdf_utm = tc_harold.to_crs(epsg=32759)


In [35]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (146.60000 -4.90000)
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (147.30000 -5.30000)
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (148.20000 -5.80000)
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (149.50000 -6.40000)
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (150.60000 -7.10000)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,
108,,,,,,,,,,,,,,
109,,,,,,,,,,,,,,
110,,,,,,,,,,,,,,


In [36]:
distances = []

In [37]:
# Calculate distances between each point and the next point
tc_harold['next_geometry'] = tc_harold['geometry'].shift(-1)
tc_harold['distance_to_next'] = tc_harold.apply(
    lambda row: row['geometry'].distance(row['next_geometry']) if pd.notnull(row['next_geometry']) else None,
    axis=1
)

# Drop the next_geometry column for a cleaner output
# gdf.drop(columns=['next_geometry'], inplace=True)

print(tc_harold[['ISO_time', 'Mean Wind Speed', 'geometry', 'distance_to_next']])

            ISO_time Mean Wind Speed                    geometry  \
0    3/29/2020 12:00              15  POINT (146.60000 -4.90000)   
1    3/29/2020 18:00              15  POINT (147.30000 -5.30000)   
2     3/30/2020 0:00              15  POINT (148.20000 -5.80000)   
3     3/30/2020 6:00              15  POINT (149.50000 -6.40000)   
4    3/30/2020 12:00              15  POINT (150.60000 -7.10000)   
..               ...             ...                         ...   
107                                                         None   
108                                                         None   
109                                                         None   
110                                                         None   
111                                                         None   

     distance_to_next  
0            0.806226  
1            1.029563  
2            1.431782  
3            1.303840  
4            1.389244  
..                ...  
107            

In [38]:
print(tc_harold.columns)

Index(['Serial_Num', 'Season', 'Num', 'Basin', 'Sub_basin', 'Name', 'ISO_time',
       'Nature', 'Latitude', 'Longitude', 'Mean Wind Speed',
       'Mean Central Pressure', 'Center(s) or Source', 'geometry',
       'next_geometry', 'distance_to_next'],
      dtype='object')


In [39]:
# Create a GeoDataFrame
tc_harold['timestamp'] = pd.to_datetime(tc_harold['ISO_time'])
tc_harold = gpd.GeoDataFrame(tc_harold, geometry='geometry')

In [40]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry,next_geometry,distance_to_next,timestamp
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (146.60000 -4.90000),POINT (147.30000 -5.30000),0.806226,2020-03-29 12:00:00
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (147.30000 -5.30000),POINT (148.20000 -5.80000),1.029563,2020-03-29 18:00:00
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (148.20000 -5.80000),POINT (149.50000 -6.40000),1.431782,2020-03-30 00:00:00
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (149.50000 -6.40000),POINT (150.60000 -7.10000),1.303840,2020-03-30 06:00:00
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (150.60000 -7.10000),POINT (151.80000 -7.80000),1.389244,2020-03-30 12:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,,,,NaT
108,,,,,,,,,,,,,,,,,NaT
109,,,,,,,,,,,,,,,,,NaT
110,,,,,,,,,,,,,,,,,NaT


In [41]:
# Work out time difference in seconds
for i in range(len(gdf_utm)):
    for j in range(i + 1, len(gdf_utm)):
        time1 = tc_harold.iloc[i]['timestamp']
        time2 = tc_harold.iloc[j]['timestamp']
        time_diff = abs((time2 - time1).total_seconds()) / 3600  # Time difference

In [42]:
# Create a GeoDataFrame
gdf = pd.DataFrame(tc_harold)

# Convert the 'timestamp' column to datetime
gdf['timestamp'] = pd.to_datetime(gdf['timestamp'], errors='coerce')

# Check for any NaT values in the 'timestamp' column
if gdf['timestamp'].isna().any():
    print("There are missing or invalid datetime values in the 'timestamp' column.")


There are missing or invalid datetime values in the 'timestamp' column.


In [43]:
gdf

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry,next_geometry,distance_to_next,timestamp
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (146.60000 -4.90000),POINT (147.30000 -5.30000),0.806226,2020-03-29 12:00:00
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (147.30000 -5.30000),POINT (148.20000 -5.80000),1.029563,2020-03-29 18:00:00
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (148.20000 -5.80000),POINT (149.50000 -6.40000),1.431782,2020-03-30 00:00:00
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (149.50000 -6.40000),POINT (150.60000 -7.10000),1.303840,2020-03-30 06:00:00
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (150.60000 -7.10000),POINT (151.80000 -7.80000),1.389244,2020-03-30 12:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,,,,NaT
108,,,,,,,,,,,,,,,,,NaT
109,,,,,,,,,,,,,,,,,NaT
110,,,,,,,,,,,,,,,,,NaT


In [44]:
tc_harold['time_difference'] = time_diff

In [45]:
tc_harold

Unnamed: 0,Serial_Num,Season,Num,Basin,Sub_basin,Name,ISO_time,Nature,Latitude,Longitude,Mean Wind Speed,Mean Central Pressure,Center(s) or Source,geometry,next_geometry,distance_to_next,timestamp,time_difference
0,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 12:00,NR,-4.9,146.6,15,1010,ASW,POINT (146.60000 -4.90000),POINT (147.30000 -5.30000),0.806226,2020-03-29 12:00:00,
1,2020089S05147,2020,12,SP,EA,HAROLD,3/29/2020 18:00,NR,-5.3,147.3,15,1010,ASW,POINT (147.30000 -5.30000),POINT (148.20000 -5.80000),1.029563,2020-03-29 18:00:00,
2,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 0:00,NR,-5.8,148.2,15,1010,ASW,POINT (148.20000 -5.80000),POINT (149.50000 -6.40000),1.431782,2020-03-30 00:00:00,
3,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 6:00,NR,-6.4,149.5,15,1010,ASW,POINT (149.50000 -6.40000),POINT (150.60000 -7.10000),1.303840,2020-03-30 06:00:00,
4,2020089S05147,2020,12,SP,EA,HAROLD,3/30/2020 12:00,NR,-7.1,150.6,15,1010,ASW,POINT (150.60000 -7.10000),POINT (151.80000 -7.80000),1.389244,2020-03-30 12:00:00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,,,,,,,,,,,,,,,,,NaT,
108,,,,,,,,,,,,,,,,,NaT,
109,,,,,,,,,,,,,,,,,NaT,
110,,,,,,,,,,,,,,,,,NaT,


In [48]:
tc_harold.to_csv("tc_harold_acceleration.csv")

In [18]:
# tc_harold['acceleration'] = tc_harold['