In [None]:
import pandas as pd
from math import asin, sqrt, cos, sin, radians

EARTH_R_MI = 3963


In [None]:
def haversine_distance(lat_lon_1, lat_lon_2):
    """
    Calculate the distance between two points on a sphere (like Earth) in miles.

    https://en.wikipedia.org/wiki/Haversine_formula

    :param lat1: latitude of first point
    :param lon1: longitude of first point
    :param lat2: latitude of second point
    :param lon2: longitude of second point

    :return: distance in miles
    """

    lat1, lon1 = lat_lon_1
    lat2, lon2 = lat_lon_2

    rlat1, rlon1, rlat2, rlon2 = (
        radians(lat1),
        radians(lon1),
        radians(lat2),
        radians(lon2),
    )

    d = (
        2
        * EARTH_R_MI
        * asin(
            sqrt(
                sin((rlat2 - rlat1) / 2) ** 2
                + cos(rlat1) * cos(rlat2) * sin((rlon2 - rlon1) / 2) ** 2
            )
        )
    )

    return d

In [None]:
def match_grocery_stores(stores1_df, stores2_df, distance):
    # find distance between each store
    # if distance is less than 5? 10? feet then link
    # give both a match_id
    # full join linking on unique id

    stores1_df["match_id"] = None
    stores2_df["match_id"] = None
    match_id = 1

    for store1 in stores1_df.iterrows():
        for store2 in stores2_df.iterrows():
            if (
                haversine_distance(store1["location"], store2["location"])
                <= distance / 5280 # distance feet away?
            ):  
                store1["match_id"] = match_id
                store2["match_id"] = match_id
                match_id += 1

    # full join linking on match_id
    merged_df = pd.merge(stores1_df, stores2_df, on="match_id", how="outer")

    # clean up columns
    columns = []
    final_df = merged_df[columns]

    return final_df