In [21]:
import pandas as pd
import math
from typing import List, Tuple

class StoppageDetector:
    # Initialize the distance_range that determines if a location is considered as a stoppage or not
    def __init__(self, distance_range: float):
        self.distance_range = distance_range

    # Calculate the distance between two GPS locations using the Haversine formula
    def distance(self, lat1, lon1, lat2, lon2):
        R = 6371 # Earth radius in km
        lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        return R * c

    # Initialize variables to store start and end timestamps, current and previous locations
    def find_stoppage_points(self, data: pd.DataFrame) -> List[Tuple[str, str, List[dict]]]:
        # Finds the stoppage points in the data by comparing the distances between consecutive GPS coordinates
        start_timestamp = data.iloc[0]['ts']
        end_timestamp = data.iloc[0]['ts']
        current_location = {'lat': data.iloc[0]['latitude'], 'long': data.iloc[0]['longitude']}
        previous_location = {'lat': data.iloc[0]['latitude'], 'long': data.iloc[0]['longitude']}
        result = []

        # Loop through the GPS data
        for i in range(1, len(data)):
            d = self.distance(current_location['lat'], current_location['long'], previous_location['lat'], previous_location['long'])
            if d <= self.distance_range:
                end_timestamp = data.iloc[i]['ts']
                current_location = {'lat': data.iloc[i]['latitude'], 'long': data.iloc[i]['longitude']}
            else:
                result.append([start_timestamp, end_timestamp, [previous_location, current_location]])
                start_timestamp = data.iloc[i]['ts']
                end_timestamp = data.iloc[i]['ts']
                current_location = {'lat': data.iloc[i]['latitude'], 'long': data.iloc[i]['longitude']}
                previous_location = {'lat': data.iloc[i]['latitude'], 'long': data.iloc[i]['longitude']}
        result.append([start_timestamp, end_timestamp, [previous_location, current_location]])
        return result

    def predict(self, file_path: str) -> List[Tuple[str, str, List[dict]]]:
        data = pd.read_csv(file_path)
        return self.find_stoppage_points(data)

model = StoppageDetector(5)
result = model.predict('sample_gps1.csv')
df_stoppages = pd.DataFrame(result)
df_stoppages.columns=['start_timestamp','end_timestamp','[ {lat1, long1}, {lat2, long2}] ] ']

df_stoppages.to_csv("stoppages.csv",index=False)
