In [82]:
#!pip install openpyxl pandas matplotlib seaborn folium

In [83]:
import pandas as pd
import numpy as np
import folium
import os
import re
import math
from pathlib import Path
from datetime import datetime

# Configuration
CONFIG = {
    'filename': Path('./data/Marty_07_20_2025.xlsx'),
    'timezone': 'US/Central',
    'map_center': {'lat': 27.71357483718376, 'lon': -97.32477530414555},
    'zoom_level': 18,
    'stationary_params': {
        'max_speed_mps': 0,  # Maximum speed in meters per second (0.5 m/s = 1.8 km/h)
        'min_duration_seconds': 20,  # Minimum duration in seconds
    },
    'path_splitting': {
        'time_gap_minutes': 10
    }
}

def load_and_preprocess_data(filename, timezone='US/Central'):
    """Load CSV data and preprocess coordinates and timestamps"""
    print(f"Loading data from: {filename}")
    
    # Read CSV file
    df = pd.read_excel(filename, skiprows=[1,2])
    df.columns = df.columns.str.strip()
    
    # Convert timestamp to datetime with timezone
    df['datetime'] = pd.to_datetime(df['TIMESTAMP'])
    df['datetime'] = df['datetime'].dt.tz_localize('UTC').dt.tz_convert(timezone)
    
    # Convert coordinates to numeric
    df['Full_DecLatitude'] = pd.to_numeric(df['Full_DecLatitude'], errors='coerce')
    df['Full_DecLongitude'] = pd.to_numeric(df['Full_DecLongitude'], errors='coerce')
    
    # Find NaN indices
    nan_indices = df[df.isna().any(axis=1)].index.tolist()
    
    print(f"Total rows: {len(df)}")
    print(f"NaN coordinate indices: {len(nan_indices)} locations")
    
    return df, nan_indices

# Load data
df, nan_indices = load_and_preprocess_data(CONFIG['filename'], CONFIG['timezone'])

Loading data from: data\Marty_07_20_2025.xlsx
Total rows: 12068
NaN coordinate indices: 25 locations
Total rows: 12068
NaN coordinate indices: 25 locations


In [84]:
def extract_date_from_filename(filename):
    """Extract date from filename and return folder name"""
    filename_str = str(filename)
    
    # Pattern 1: MM_DD_YYYY format
    date_pattern1 = r'(\d{2})_(\d{2})_(\d{4})'
    match1 = re.search(date_pattern1, filename_str)
    
    if match1:
        month_str, day_str, year_str = match1.groups()
        return f"{year_str}-{month_str}-{day_str}"
    
    # Pattern 2: MonthYear format
    month_year_pattern = r'([A-Za-z]+)(\d{2,4})'
    match2 = re.search(month_year_pattern, filename_str)
    
    if match2:
        month_str, year_str = match2.groups()
        if len(year_str) == 2:
            year_str = "20" + year_str
        
        try:
            month_num = datetime.strptime(month_str, "%B").month
        except ValueError:
            try:
                month_num = datetime.strptime(month_str, "%b").month
            except ValueError:
                return "UnknownDate"
        
        return f"{year_str}-{month_num:02d}-{month_str}"
    
    return datetime.now().strftime("%Y-%m-%d")

def setup_output_folder(filename):
    """Create and clear output folder for the current date"""
    date_folder = extract_date_from_filename(filename)
    # Use absolute path to ensure proper directory creation
    base_dir = Path(__file__).parent if '__file__' in globals() else Path.cwd()
    output_folder = base_dir / 'individual_paths' / date_folder
    
    # Create folders if they don't exist
    output_folder.mkdir(parents=True, exist_ok=True)
    
    # Clear existing files
    for file_path in output_folder.glob('*.html'):
        file_path.unlink()
    
    print(f"Output folder: {output_folder}")
    print(f"Absolute path: {output_folder.absolute()}")
    return str(output_folder)

# Setup output folder
output_folder = setup_output_folder(CONFIG['filename'])

Output folder: c:\Users\spagadala1\Documents\Heatwave\individual_paths\2025-07-20
Absolute path: c:\Users\spagadala1\Documents\Heatwave\individual_paths\2025-07-20


In [85]:
def split_data_at_nan(df, nan_indices):
    """Split dataframe into multiple paths at NaN coordinates"""
    if not nan_indices:
        df_clean = df.dropna(subset=['Full_DecLatitude', 'Full_DecLongitude']).copy()
        return [df_clean] if len(df_clean) > 0 else []
    
    print(f"Splitting at {len(nan_indices)} NaN locations")
    
    paths = []
    split_points = [0] + nan_indices + [len(df)]
    
    for i in range(len(split_points) - 1):
        start = split_points[i]
        end = split_points[i + 1]
        
        if start in nan_indices:
            start += 1
        
        if start < end:
            segment = df.iloc[start:end].copy()
            segment_clean = segment.dropna(subset=['Full_DecLatitude', 'Full_DecLongitude'])
            
            if len(segment_clean) > 1:
                segment_clean = segment_clean.reset_index(drop=True)
                paths.append(segment_clean)
                print(f"Created path with {len(segment_clean)} points")
    
    return paths

def split_data_by_time_gaps(df, gap_threshold_minutes=10):
    """Split dataframe into paths based on time gaps"""
    df_clean = df.dropna(subset=['Full_DecLatitude', 'Full_DecLongitude']).copy()
    df_clean = df_clean.sort_values('datetime').reset_index(drop=True)
    
    if len(df_clean) < 2:
        return [df_clean] if len(df_clean) > 0 else []
    
    df_clean['time_diff'] = df_clean['datetime'].diff().dt.total_seconds()
    gap_threshold_seconds = gap_threshold_minutes * 60
    break_indices = df_clean[df_clean['time_diff'] > gap_threshold_seconds].index.tolist()
    
    if not break_indices:
        return [df_clean.drop('time_diff', axis=1)]
    
    print(f"Found {len(break_indices)} time-based break points")
    
    paths = []
    start_idx = 0
    
    for break_idx in break_indices:
        if start_idx < break_idx:
            path_segment = df_clean.iloc[start_idx:break_idx].copy().drop('time_diff', axis=1)
            if len(path_segment) > 1:
                paths.append(path_segment.reset_index(drop=True))
        start_idx = break_idx
    
    # Add remaining data
    if start_idx < len(df_clean):
        final_segment = df_clean.iloc[start_idx:].copy().drop('time_diff', axis=1)
        if len(final_segment) > 1:
            paths.append(final_segment.reset_index(drop=True))
    
    return paths

# Split data into paths
paths_nan = split_data_at_nan(df, nan_indices)
paths_time = split_data_by_time_gaps(df, CONFIG['path_splitting']['time_gap_minutes'])

# Use the method that produces more paths
paths = paths_time if len(paths_time) > len(paths_nan) else paths_nan
split_method = "time-based" if len(paths_time) > len(paths_nan) else "NaN-based"

print(f"\nUsing {split_method} splitting: {len(paths)} paths found")
for i, path in enumerate(paths):
    duration = (path['datetime'].max() - path['datetime'].min()).total_seconds() / 60
    print(f"Path {i+1}: {len(path)} points, {duration:.1f} min")

Splitting at 25 NaN locations
Created path with 2426 points
Created path with 2136 points
Created path with 1955 points
Created path with 1815 points
Created path with 1854 points
Created path with 1857 points
Found 5 time-based break points

Using NaN-based splitting: 6 paths found
Path 1: 2426 points, 80.8 min
Path 2: 2136 points, 71.2 min
Path 3: 1955 points, 65.1 min
Path 4: 1815 points, 60.5 min
Path 5: 1854 points, 61.8 min
Path 6: 1857 points, 61.9 min


In [86]:
def calculate_speed(lat1, lon1, lat2, lon2, time_diff_seconds):
    """Calculate speed between two points in meters per second"""
    if time_diff_seconds <= 0:
        return 0
    
    # Calculate distance using Haversine formula (more accurate for GPS coordinates)
    R = 6371000  # Earth's radius in meters
    lat1_rad, lon1_rad, lat2_rad, lon2_rad = map(math.radians, [lat1, lon1, lat2, lon2])
    
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad
    
    a = (math.sin(dlat/2)**2 + 
         math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon/2)**2)
    c = 2 * math.asin(math.sqrt(a))
    distance_meters = R * c
    
    return distance_meters / time_diff_seconds

def find_stationary_locations_by_speed(path_df, max_speed_mps=0, min_duration_seconds=40):
    """Find stationary locations by grouping consecutive zero-speed rows"""
    if len(path_df) < 25:  # Need at least 25 points for minimum group size
        return []
    path_df = path_df.copy().reset_index(drop=True)
    path_df['speed_mps'] = 0.0
    for i in range(1, len(path_df)):
        lat1 = path_df.iloc[i-1]['Full_DecLatitude']
        lon1 = path_df.iloc[i-1]['Full_DecLongitude']
        lat2 = path_df.iloc[i]['Full_DecLatitude']
        lon2 = path_df.iloc[i]['Full_DecLongitude']
        time_diff = (path_df.iloc[i]['datetime'] - path_df.iloc[i-1]['datetime']).total_seconds()
        speed = calculate_speed(lat1, lon1, lat2, lon2, time_diff)
        path_df.iloc[i, path_df.columns.get_loc('speed_mps')] = speed

    zero_speed_groups = []
    current_group = []
    for i in range(len(path_df)):
        if path_df.iloc[i]['speed_mps'] <= max_speed_mps:
            current_group.append(i)
        else:
            if len(current_group) > 25:
                zero_speed_groups.append(current_group)
            current_group = []
    if len(current_group) > 25:
        zero_speed_groups.append(current_group)

    print(f"Found {len(zero_speed_groups)} groups with >25 consecutive zero-speed points")
    for i, group in enumerate(zero_speed_groups):
        print(f"  Group {i+1}: {len(group)} points (indices {group[0]}-{group[-1]})")

    stationary_locations = []
    location_counter = 1
    for group in zero_speed_groups:
        start_idx = group[0]
        end_idx = group[-1]
        segment_duration = (path_df.iloc[end_idx]['datetime'] - 
                          path_df.iloc[start_idx]['datetime']).total_seconds()
        if segment_duration >= min_duration_seconds:
            segment_data = path_df.iloc[start_idx:end_idx+1]
            avg_lat = segment_data['Full_DecLatitude'].mean()
            avg_lon = segment_data['Full_DecLongitude'].mean()
            bearing = None
            next_moving_idx = end_idx + 1
            if next_moving_idx < len(path_df):
                next_lat = path_df.iloc[next_moving_idx]['Full_DecLatitude']
                next_lon = path_df.iloc[next_moving_idx]['Full_DecLongitude']
                bearing = calculate_bearing(avg_lat, avg_lon, next_lat, next_lon)
            avg_speed = segment_data['speed_mps'].mean()
            max_speed = segment_data['speed_mps'].max()
            stationary_locations.append({
                'latitude': avg_lat,
                'longitude': avg_lon,
                'location_number': location_counter,
                'duration': segment_duration,
                'start_time': path_df.iloc[start_idx]['datetime'],
                'end_time': path_df.iloc[end_idx]['datetime'],
                'point_count': end_idx - start_idx + 1,
                'bearing': bearing,
                'avg_speed_mps': avg_speed,
                'max_speed_mps': max_speed,
                'avg_speed_kmh': avg_speed * 3.6,
                'max_speed_kmh': max_speed * 3.6,
                'group_indices': group
            })
            location_counter += 1
    return stationary_locations

def calculate_bearing(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    dlon = lon2 - lon1
    y = math.sin(dlon) * math.cos(lat2)
    x = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(dlon)
    bearing = math.atan2(y, x)
    bearing = math.degrees(bearing)
    return (bearing + 360) % 360

def calculate_distance(lat1, lon1, lat2, lon2):
    return ((lat1 - lat2) ** 2 + (lon1 - lon2) ** 2) ** 0.5

print("=== STATIONARY LOCATION DETECTION (GROUPING METHOD) ===")
print(f"Max speed threshold: {CONFIG['stationary_params']['max_speed_mps']} m/s")
print(f"Minimum duration: {CONFIG['stationary_params']['min_duration_seconds']} seconds")
print("Method: Group consecutive zero-speed rows, require >25 entries per group")

for i, path in enumerate(paths):
    stops = find_stationary_locations_by_speed(
        path, 
        CONFIG['stationary_params']['max_speed_mps'], 
        CONFIG['stationary_params']['min_duration_seconds']
    )
    duration = (path['datetime'].max() - path['datetime'].min()).total_seconds() / 60
    print(f"\nPath {i+1}: {len(path)} points, {len(stops)} stops, {duration:.1f} min")
    for stop in stops:
        bearing_text = f", exit: {stop['bearing']:.1f}°" if stop['bearing'] else ""
        print(f"  Stop {stop['location_number']}: {stop['point_count']} points, "
              f"{stop['duration']:.1f}s ({stop['duration']/60:.1f} min), "
              f"avg speed: {stop['avg_speed_kmh']:.2f} km/h{bearing_text}")

=== STATIONARY LOCATION DETECTION (GROUPING METHOD) ===
Max speed threshold: 0 m/s
Minimum duration: 20 seconds
Method: Group consecutive zero-speed rows, require >25 entries per group
Found 17 groups with >25 consecutive zero-speed points
  Group 1: 49 points (indices 16-64)
  Group 2: 26 points (indices 93-118)
  Group 3: 40 points (indices 583-622)
  Group 4: 26 points (indices 653-678)
  Group 5: 40 points (indices 821-860)
  Group 6: 28 points (indices 873-900)
  Group 7: 39 points (indices 1055-1093)
  Group 8: 75 points (indices 1187-1261)
  Group 9: 31 points (indices 1450-1480)
  Group 10: 29 points (indices 1657-1685)
  Group 11: 39 points (indices 1709-1747)
  Group 12: 38 points (indices 1851-1888)
  Group 13: 47 points (indices 1914-1960)
  Group 14: 42 points (indices 2011-2052)
  Group 15: 34 points (indices 2069-2102)
  Group 16: 41 points (indices 2222-2262)
  Group 17: 50 points (indices 2300-2349)

Path 1: 2426 points, 17 stops, 80.8 min
  Stop 1: 49 points, 96.0s (1

**Latitude** and **Longitude** columns (such as `Full_DecLatitude` and `Full_DecLongitude`) are typically used to find stationary points in GPS tracking data.

A stationary point in this context means a location where the device/person remains in the same place (within a small threshold) for a period of time. This is usually detected by:

- Calculating the speed between consecutive latitude/longitude points.
- If the speed is close to zero (below a threshold) for a sustained period, those points are considered stationary.

So, for movement or stop detection, use the latitude and longitude columns.