In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.distance import geodesic
import numpy as np
from math import radians, sin, cos, sqrt, atan2

%matplotlib inline

In [2]:
df = pd.read_csv('../data/gps.csv')

In [3]:
df['DateTime_TimeStamp']=pd.to_datetime(df['TimeStamp'], unit='ms')

In [4]:
def date_time_formating(series):
    return pd.to_datetime(series, format='%d/%m/%Y %I:%M:%S %p')

df['Date'] = date_time_formating(df['Date'])
df['DateTime'] = date_time_formating(df['DateTime'])
df['DeviceTime'] = date_time_formating(df['DeviceTime'])
df['DateTime_TimeStamp'] = date_time_formating(df['DateTime_TimeStamp'])

In [5]:
len(df)

166593

In [5]:
df.drop(columns=['Id'], axis=1, inplace=True)

In [6]:
df.drop_duplicates(ignore_index=True, inplace=True)

In [7]:
len(df)

29843

In [8]:
gps_data = df[['Longitude','Latitude','DeviceTime','DateTime_TimeStamp']]
gps_data.head(3)

Unnamed: 0,Longitude,Latitude,DeviceTime,DateTime_TimeStamp
0,80.5559,7.16898,2023-09-26 07:36:54,2023-09-26 02:06:52.000
1,80.55589,7.16899,2023-09-26 07:37:05,2023-09-26 02:07:05.000
2,80.55589,7.16899,2023-09-26 07:37:08,2023-09-26 02:07:07.676


In [9]:
counts = gps_data['DateTime_TimeStamp'].value_counts()

outliers = counts[counts > 1]
outliers

Series([], Name: count, dtype: int64)

In [49]:
def calculate_speed(df):
    distances = []
    times = []

    for i in range(1, len(df)):
        point_1 = (df.iloc[i-1]['Latitude'], df.iloc[i-1]['Longitude'])
        point_2 = (df.iloc[i]['Latitude'], df.iloc[i]['Longitude'])
        distance = geodesic(point_1, point_2).meters
        time = (df.iloc[i]['DateTime_TimeStamp'] - df.iloc[i-1]['DateTime_TimeStamp']).total_seconds()

        distances.append(distance)
        times.append(time)

    times = np.array(times)
    speeds = np.array(distances)/times

    df['Speed'] = 0.0
    df['Distance'] = 0.0
    df['TimeDiff'] = 0.0

    df['Speed'].iloc[1:] = speeds * 3.6
    df['Distance'].iloc[1:]  = distances
    df['TimeDiff'].iloc[1:]  = times
        
    return df
    
new_df = calculate_speed(gps_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Speed'] = 0.0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Distance'] = 0.0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['TimeDiff'] = 0.0
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the def

In [50]:
new_df.head()

Unnamed: 0,Longitude,Latitude,DeviceTime,DateTime_TimeStamp,Speed,Distance,TimeDiff
0,80.5559,7.16898,2023-09-26 07:36:54,2023-09-26 02:06:52.000,0.0,0.0,0.0
1,80.55589,7.16899,2023-09-26 07:37:05,2023-09-26 02:07:05.000,0.432841,1.563036,13.0
2,80.55589,7.16899,2023-09-26 07:37:08,2023-09-26 02:07:07.676,0.0,0.0,2.676
3,80.55589,7.16899,2023-09-26 07:37:08,2023-09-26 02:07:08.176,0.0,0.0,0.5
4,80.55586,7.16899,2023-09-26 07:37:09,2023-09-26 02:07:09.000,14.477111,3.31365,0.824


In [11]:
def detect_anomalies(df, speed_threshold=120, distance_threshold=1000):
    # Detect anomalies
    anomalies = df[(df['Speed'] > speed_threshold) | (df['Distance'] > distance_threshold)]
    non_anomalous_data = df[(df['Speed'] <= speed_threshold) & (df['Distance'] <= distance_threshold)]

    return non_anomalous_data, anomalies

def remove_anomalies(df, speed_threshold=120, distance_threshold=1000):
    non_anomalous_data, anomalies = detect_anomalies(df, speed_threshold, distance_threshold)
    return non_anomalous_data


cleaned_df = remove_anomalies(new_df)

# View the anomalies if needed
_, anomalies = detect_anomalies(new_df)
print("Detected Anomalies:")
anomalies

Detected Anomalies:


Unnamed: 0,Longitude,Latitude,DeviceTime,DateTime_TimeStamp,Speed,Distance,TimeDiff
4195,80.57464,7.15842,2023-09-26 08:48:09,2023-09-26 03:18:08.779,125.758552,18.130191,0.519
5861,80.57249,7.16236,2023-09-26 09:17:07,2023-09-26 03:47:06.637,7366.974369,1303.545187,0.637
5862,80.56648,7.17248,2023-09-26 09:17:08,2023-09-26 03:47:08.111,3178.092866,1301.252468,1.474
8648,80.57779,7.17844,2023-09-26 10:04:08,2023-09-26 04:34:08.080,145.298819,20.180392,0.5
12620,80.58533,7.1816,2023-09-26 11:14:26,2023-09-26 05:44:26.153,149.925187,833.001,20.002
12622,80.58858,7.18414,2023-09-26 11:14:46,2023-09-26 05:44:46.634,130.25624,36.182289,1.0
14200,80.59088,7.1867,2023-09-26 11:41:07,2023-09-26 06:11:06.978,2710.721588,736.412698,0.978
14201,80.59753,7.18714,2023-09-26 11:41:08,2023-09-26 06:11:08.128,2304.335192,736.107075,1.15
15183,80.59876,7.19995,2023-09-26 13:44:21,2023-09-26 08:14:21.000,0.824783,1469.259349,6413.0
15193,80.59871,7.19991,2023-09-26 13:47:07,2023-09-26 08:17:06.804,234.931363,47.312566,0.725


In [70]:
def detect_stay_points(df, distance_threshold=20, time_threshold=30):
    stay_points = []
    i = 0

    while i < len(df):
        j = i + 1
        while j < len(df):
            point1 = (df.iloc[i]['Latitude'], df.iloc[i]['Longitude'])
            point2 = (df.iloc[j]['Latitude'], df.iloc[j]['Longitude'])
            distance = geodesic(point1, point2).meters
            
            if distance > distance_threshold:
                break
            j += 1

        time_diff = (df.iloc[j-1]['DateTime_TimeStamp'] - df.iloc[i]['DateTime_TimeStamp']).total_seconds()
        if time_diff > time_threshold:
            centroid_lat = df.iloc[i:j]['Latitude'].mean()
            centroid_lon = df.iloc[i:j]['Longitude'].mean()
            stay_points.append((centroid_lat, centroid_lon, time_diff, df.iloc[i]['DateTime_TimeStamp'], df.iloc[j-1]['DateTime_TimeStamp']))
        
        i = j

    return stay_points

In [71]:
stay_points = detect_stay_points(cleaned_df)

In [72]:
import folium

def visualize_stay_points_and_path(df, stay_points):
    # Center the map around the first point in the dataframe
    center_lat = df.iloc[0]['Latitude']
    center_lon = df.iloc[0]['Longitude']
    mymap = folium.Map(location=[center_lat, center_lon], zoom_start=14)

    # Add the path to the map
    path = list(zip(df['Latitude'], df['Longitude']))
    folium.PolyLine(path, color="blue", weight=2.5, opacity=0.7).add_to(mymap)

    # Add stay points to the map
    for lat, lon, duration, start_time, end_time in stay_points:
        folium.Marker(
            location=[lat, lon],
            popup=f"Stayed from {start_time} to {end_time} for {duration:.2f} seconds",
            icon=folium.Icon(color='red', icon='info-sign')
        ).add_to(mymap)

    # Save the map to an HTML file
    mymap.save("stay_points_and_path_map.html")
    
    return mymap

# Example usage:
# Assuming 'df' is your DataFrame with 'Latitude', 'Longitude', and 'DateTime_TimeStamp' columns



# Step 2: Visualize the path and stay points on a map
map_with_path_and_stays = visualize_stay_points_and_path(cleaned_df, stay_points)

# If running in a Jupyter Notebook, display the map directly
map_with_path_and_stays

In [73]:
api_key = 'AIzaSyC3rnV7e1vSIedcUAhxRF5LrxDQHEezORA'

In [74]:
import requests
import logging

def snap_to_road(gps_coordinates, api_key, max_points=100):
    url = 'https://roads.googleapis.com/v1/snapToRoads'
    snapped_coords = []

    for i in range(0, len(gps_coordinates), max_points):
        batch = gps_coordinates[i:i + max_points]
        params = {
            'path': '|'.join([f"{lat},{lon}" for lat, lon in batch]),
            'interpolate': 'true',
            'key': api_key
        }
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()  # Raise an HTTPError for bad responses
            if response.content:
                snapped_points = response.json().get('snappedPoints', [])
                snapped_coords.extend([(point['location']['latitude'], point['location']['longitude']) for point in snapped_points])
            else:
                logging.error(f"Empty response content from the API for batch: {batch}")
        except requests.exceptions.RequestException as e:
            logging.error(f"Request failed: {e}")
            continue
        except ValueError as e:
            logging.error(f"JSON decoding failed: {e}")
            continue

    return snapped_coords


def calculate_road_distance(snapped_coords):
    total_distance = 0.0
    for i in range(1, len(snapped_coords)):
        point1 = snapped_coords[i-1]
        point2 = snapped_coords[i]
        distance = geodesic(point1, point2).meters
        total_distance += distance
    return total_distance

In [75]:
import pandas as pd
from geopy.distance import geodesic

def segment_stay_and_move_points(df, distance_threshold=20, time_threshold=30):
    stay_points = []
    moving_segments = []
    i = 0
    previous_end_index = 0

    while i < len(df):
        j = i + 1
        while j < len(df):
            point1 = (df.iloc[i]['Latitude'], df.iloc[i]['Longitude'])
            point2 = (df.iloc[j]['Latitude'], df.iloc[j]['Longitude'])
            distance = geodesic(point1, point2).meters
            
            if distance > distance_threshold:
                break
            j += 1

        time_diff = (df.iloc[j-1]['DateTime_TimeStamp'] - df.iloc[i]['DateTime_TimeStamp']).total_seconds()
        if time_diff > time_threshold:
            # Calculate the centroid of the stay points
            centroid_lat = df.iloc[i:j]['Latitude'].mean()
            centroid_lon = df.iloc[i:j]['Longitude'].mean()
            stay_points.append((centroid_lat, centroid_lon, time_diff, df.iloc[i]['DateTime_TimeStamp'], df.iloc[j-1]['DateTime_TimeStamp']))

            # Append the segment before the current stay point as a moving segment
            if previous_end_index < i:
                moving_segment = df.iloc[previous_end_index:i]
                moving_segments.append(moving_segment)
            
            # Update the previous end index
            previous_end_index = j
        
        i = j

    # Append the last moving segment after the final stay point
    if previous_end_index < len(df):
        moving_segment = df.iloc[previous_end_index:]
        moving_segments.append(moving_segment)

    return stay_points, moving_segments

def visualize_stay_points_and_moving_segments(stay_points, moving_segments):
    # Initialize the map centered around the first stay point or the first moving segment
    if stay_points:
        center_lat = stay_points[0][0]
        center_lon = stay_points[0][1]
    elif moving_segments:
        center_lat = moving_segments[0]['Latitude'].iloc[0]
        center_lon = moving_segments[0]['Longitude'].iloc[0]
    else:
        raise ValueError("No data to plot")

    mymap = folium.Map(location=[center_lat, center_lon], zoom_start=14)

    # Add moving segments to the map
    for segment in moving_segments:
        path = list(zip(segment['Latitude'], segment['Longitude']))
        folium.PolyLine(path, color="blue", weight=2.5, opacity=0.7).add_to(mymap)

    # Add stay points to the map
    for lat, lon, duration, start_time, end_time in stay_points:
        folium.Marker(
            location=[lat, lon],
            popup=f"Stayed from {start_time} to {end_time} for {duration:.2f} seconds",
            icon=folium.Icon(color='red', icon='info-sign')
        ).add_to(mymap)

    # Save the map to an HTML file
    mymap.save("stay_points_and_moving_segments_map.html")
    
    return mymap

# Example usage:
# Assuming 'df' is your DataFrame with 'Latitude', 'Longitude', and 'DateTime_TimeStamp' columns

# Segment the data into stay points and moving segments
stay_points, moving_segments = segment_stay_and_move_points(cleaned_df)

# Visualize the stay points and moving segments on a map
map_with_stays_and_movements = visualize_stay_points_and_moving_segments(stay_points, moving_segments)

# If running in a Jupyter Notebook, display the map directly
map_with_stays_and_movements

In [86]:
import requests
from geopy.distance import geodesic
import logging

# Set up logging
# logging.basicConfig(level=logging.INFO)

def snap_to_road(gps_coordinates, api_key, max_points=100):
    url = 'https://roads.googleapis.com/v1/snapToRoads'
    snapped_coords = []

    # Process the coordinates in batches of 'max_points' to avoid exceeding the API limit
    for i in range(0, len(gps_coordinates), max_points):
        batch = gps_coordinates[i:i + max_points]
        params = {
            'path': '|'.join([f"{lat},{lon}" for lat, lon in batch]),
            'interpolate': 'true',
            'key': api_key
        }

        try:
            response = requests.get(url, params=params)
            response.raise_for_status()  # Check for HTTP errors

            # Log raw API response for debugging
            # logging.info(f"API response for batch {i // max_points + 1}: {response.text}")

            # Get snapped points from the response
            snapped_points = response.json().get('snappedPoints', [])
            snapped_coords.extend([(point['location']['latitude'], point['location']['longitude']) for point in snapped_points])

        except requests.exceptions.RequestException as e:
            logging.error(f"Request to Roads API failed: {e}")
            continue
        except ValueError as e:
            logging.error(f"JSON decoding failed: {e}")
            continue

    return snapped_coords

def calculate_road_distance(snapped_coords):
    if not snapped_coords:
        # logging.error("No snapped coordinates to calculate distance.")
        return 0.0

    total_distance = 0.0
    for i in range(1, len(snapped_coords)):
        point1 = snapped_coords[i-1]
        point2 = snapped_coords[i]
        distance = geodesic(point1, point2).meters
        total_distance += distance
    return total_distance

def calculate_segment_distance(segment, api_key):
    # Extract GPS coordinates from the segment
    gps_coordinates = list(zip(segment['Latitude'], segment['Longitude']))
    # logging.info(f"Original GPS Coordinates: {gps_coordinates[:5]}...")  # Print first 5 points for brevity

    # Snap the segment's GPS points to the road network in batches
    snapped_coords = snap_to_road(gps_coordinates, api_key)
    # logging.info(f"Snapped Coordinates: {snapped_coords[:5]}...")  # Print first 5 snapped points for brevity

    if not snapped_coords:
        # logging.error("No snapped coordinates returned, unable to calculate road distance.")
        return 0.0

    # Calculate the road distance using the snapped points
    road_distance = calculate_road_distance(snapped_coords)
    
    return road_distance

# Example usage with the API key and the selected segment

segment = moving_segments[53]  # Assuming 'moving_segments[15]' is the segment of interest

road_distance = calculate_segment_distance(segment, api_key)

print(f"Total road distance for the segment: {road_distance:.2f} meters")


Total road distance for the segment: 16.65 meters


In [87]:
import folium

def visualize_single_segment(segment):
    # Check if the segment is not empty
    if segment.empty:
        raise ValueError("The segment is empty and cannot be visualized.")
    
    # Center the map on the first point of the segment
    center_lat = segment['Latitude'].iloc[0]
    center_lon = segment['Longitude'].iloc[0]
    mymap = folium.Map(location=[center_lat, center_lon], zoom_start=14)

    # Plot the path of the segment
    path = list(zip(segment['Latitude'], segment['Longitude']))
    folium.PolyLine(path, color="blue", weight=2.5, opacity=0.7).add_to(mymap)

    # Add markers for the start and end points of the segment
    folium.Marker(
        location=[segment['Latitude'].iloc[0], segment['Longitude'].iloc[0]],
        popup="Start",
        icon=folium.Icon(color='green')
    ).add_to(mymap)
    
    folium.Marker(
        location=[segment['Latitude'].iloc[-1], segment['Longitude'].iloc[-1]],
        popup="End",
        icon=folium.Icon(color='red')
    ).add_to(mymap)

    # Save the map to an HTML file
    mymap.save("single_moving_segment_map.html")
    
    return mymap

# Example usage:
# Assuming 'segment' is a DataFrame containing 'Latitude', 'Longitude', and 'DateTime_TimeStamp' for the selected segment

# Visualize the specific segment
map_with_single_segment = visualize_single_segment(moving_segments[53])

# If running in a Jupyter Notebook, display the map directly
map_with_single_segment
