In [21]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

def gps_to_grid(lat, lon, lat_min, lat_max, lon_min, lon_max, grid_size):
    """Convert GPS coordinates to grid cell indices."""
    lat_bins = np.arange(lat_min, lat_max, grid_size)
    lon_bins = np.arange(lon_min, lon_max, grid_size)
    
    lat_idx = np.digitize(lat, lat_bins) - 1
    lon_idx = np.digitize(lon, lon_bins) - 1
    
    return lat_idx, lon_idx

def bin_time(timestamp, start_time, interval):
    """Bin timestamps into fixed intervals."""
    delta = timestamp - start_time
    return delta // interval

def split_trajectories(df, time_threshold=timedelta(minutes=5)):
    """Split a trajectory into separate segments if time difference exceeds threshold."""
    df = df.sort_values(by='timestamp').reset_index(drop=True)
    df['trajectory_id'] = (df['timestamp'].diff() > time_threshold).cumsum()
    return df

def process_gps_data(df, grid_size=0.01, time_interval=5):
    """Process GPS data into grid cells and time bins."""
    lat_min, lat_max = df['latitude'].min(), df['latitude'].max()
    lon_min, lon_max = df['longitude'].min(), df['longitude'].max()
    
    # Convert timestamps to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    start_time = df['timestamp'].min()
    interval = timedelta(minutes=time_interval)
    
    # Split trajectories based on time threshold
    df = split_trajectories(df)
    
    # Convert GPS to grid and bin times
    df[['grid_x', 'grid_y']] = df.apply(lambda row: gps_to_grid(
        row['latitude'], row['longitude'], lat_min, lat_max, lon_min, lon_max, grid_size
    ), axis=1, result_type='expand')
    
    df['time_bin'] = df['timestamp'].apply(lambda x: bin_time(x, start_time, interval))
    
    return df

In [23]:
# Sample GPS data
data = [
        [32.7157, -117.1611, '2025-03-06 08:00:00'],
        [32.9158, -117.1620, '2025-03-06 08:05:00'],
        [32.7160, -117.1650, '2025-03-06 08:20:00'],
        [32.7160, -117.1650, '2025-03-06 08:21:00'],
        [32.7200, -117.1700, '2025-03-06 08:40:00']
    ]
    
# Convert list of lists to DataFrame
df = pd.DataFrame(data, columns=['latitude', 'longitude', 'timestamp'])
processed_df = process_gps_data(df, grid_size=0.01, time_interval=5)

print(processed_df)


   latitude  longitude           timestamp  trajectory_id  grid_x  grid_y  \
0   32.7157  -117.1611 2025-03-06 08:00:00              0       0       0   
1   32.9158  -117.1620 2025-03-06 08:05:00              0      20       0   
2   32.7160  -117.1650 2025-03-06 08:20:00              1       0       0   
3   32.7160  -117.1650 2025-03-06 08:21:00              1       0       0   
4   32.7200  -117.1700 2025-03-06 08:40:00              2       0       0   

   time_bin  
0         0  
1         1  
2         4  
3         4  
4         8  
