In [9]:
import numpy as np
import math
from scipy.optimize import least_squares

# Haversine formula to calculate the distance between two points
def haversine(lat1, lon1, lat2, lon2) -> float:
    """
    Returns haversine distance in *km*
    """
    # Convert coordinates from degrees to radians
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = (
        math.sin(dlat / 2) ** 2
        + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    )
    c = 2 * math.asin(math.sqrt(a))

    # Radius of earth in kilometers is 6371
    km = 6371 * c
    return km

# Error function for least squares optimization
def error_func(restaurant_loc, customer_locs, distances):
    lat_r, lon_r = restaurant_loc
    errors = []
    for (lat_i, lon_i), d in zip(customer_locs, distances):
        dist = haversine(lat_i, lon_i, lat_r, lon_r)
        errors.append(dist - d)
    return errors

# Example data: customer locations (lat, lon) and distances
customer_locs = [(52.5200, 13.4050), (52.5201, 13.4052), (52.5198, 13.4048)]  # lat, lon in degrees
distances = [0.2, 1, 0.25]  # distances in kilometers

# Initial guess for the restaurant location
initial_guess = (52.5200, 13.4050)

# Solve using least squares
result = least_squares(error_func, initial_guess, args=(customer_locs, distances))
estimated_loc = result.x

print(f"Estimated restaurant location: {estimated_loc}")

Estimated restaurant location: [52.51678976 13.40012467]


In [None]:
import pandas as pd

# Extract data of customer locations and distances to restaurants when ordering
query = f"""
WITH MaxDate AS (
  SELECT
     MAX(ingestion_date) AS max_date
  FROM
    `dhub-yemek.brandzone_raw.daily_restaurants`
  
)

SELECT r.location_id , Id as rival_rest_id , latitude , longitude , name , isClosed , location.neighborhoodName , location.distance , ingestion_date
FROM `dhub-yemek.brandzone_raw.daily_restaurants` r
INNER JOIN MaxDate m
ON r.ingestion_date = m.max_date
where exists --extract location for leads used in training dataset, to focus checks here first
(select 1 from `dh-global-sales-data-dev.achilles_train_test_data_update.YS_TR_vendor_matching_annotations_v4_2`
  where `dh-global-sales-data.achilles.row_id_to_lead_id`(left_row_id) = Id
)
"""

triangulation_data = pd.read_gbq(query, use_bqstorage_api= True).reset_index(drop= True)

In [None]:
# Compute estimations
lead_ids = triangulation_data["rival_rest_id"].unique()
results_dict = {}

for lead_id in lead_ids:
    # Filter data for this lead_id
    lead_data = triangulation_data[triangulation_data["rival_rest_id"] == lead_id]
    
    # Extract data
    lats = lead_data["latitude"].tolist()
    lons = lead_data["longitude"].tolist()  # Changed from "long" to "lon"
    customer_locs = [(lat, lon) for lat, lon in zip(lats, lons)]
    distances = lead_data["distance"].tolist()
    
    # Initial guess: mean of latitudes and longitudes
    initial_guess = (np.mean(lats), np.mean(lons))
    
    # Optimize to find the restaurant's location
    result = least_squares(error_func, initial_guess, args=(customer_locs, distances))
    estimated_location = result.x  # Extract the optimized values
    
    # Store result
    results_dict[lead_id] = estimated_location