In [6]:
import json
import pandas as pd
import numpy as np
from typing import Tuple
import math

read_data = pd.read_json("fakeLocationData.json")

print(read_data.head())

                                           locations
0  {'coordinates': {'latitude': 37.23376399865852...
1  {'coordinates': {'latitude': 37.22510647241181...
2  {'coordinates': {'latitude': 37.23412164261865...
3  {'coordinates': {'latitude': 37.23467817316912...
4  {'coordinates': {'latitude': 37.22507253576990...


In [7]:
def haversine(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    """
    Calculate the great-circle distance between two points on the Earth using the Haversine formula.
    """
    R = 6371.0  # Earth radius in kilometers

    lat1, lon1 = coord1
    lat2, lon2 = coord2

    # Convert latitude and longitude from degrees to radians
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    # Haversine formula
    a = (
        math.sin(delta_phi / 2.0) ** 2
        + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2.0) ** 2
    )
    c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Distance in kilometers
    distance = R * c

    return distance


In [8]:
# print average spread of distance
# print average spread of time
from datetime import datetime

def calculate_average_spread(data):
    locations = data["locations"]
    num_locations = len(locations)
    
    if num_locations < 2:
        return 0, 0

    total_distance = 0
    total_time_diff = 0

    for i in range(num_locations - 1):
        coord1 = (locations[i]["coordinates"]["latitude"], locations[i]["coordinates"]["longitude"])
        coord2 = (locations[i + 1]["coordinates"]["latitude"], locations[i + 1]["coordinates"]["longitude"])
        total_distance += haversine(coord1, coord2)

        time1 = datetime.fromisoformat(locations[i]["requestTime"])
        time2 = datetime.fromisoformat(locations[i + 1]["requestTime"])
        total_time_diff += abs((time2 - time1).total_seconds())

    avg_distance_spread = total_distance / (num_locations - 1)
    avg_time_spread = total_time_diff / (num_locations - 1)

    return avg_distance_spread, avg_time_spread

avg_distance_spread, avg_time_spread = calculate_average_spread(read_data)
print(f"Average distance spread: {avg_distance_spread} km")
print(f"Average time spread: {avg_time_spread} seconds")


Average distance spread: 1.721725970521828 km
Average time spread: 1227.2727272727273 seconds
