In [1]:
import pandas as pd
import numpy as np
from geopy.distance import distance
from haversine import haversine_vector, Unit


def distance_of_home_third_places(row):
    coords1 = (row["mean_lon_home"], row["mean_lat_home"])
    coords2 = (row["mean_lon_third"], row["mean_lat_third"])
    return distance(coords1, coords2).km


def distance_of_home_third_places_haversine_vector(row):
    return haversine_vector((row["mean_lat_home"], row["mean_lon_home"]),
                            (row["mean_lat_third"], row["mean_lon_third"]), unit=Unit.KILOMETERS)

## Generateing random sample data

In [2]:
SIZE = 1_000_000

df = pd.DataFrame({"mean_lon_home": np.random.uniform(18.5, 19.25, size=SIZE),
                   "mean_lat_home": np.random.uniform(47, 48, size=SIZE),
                   "mean_lon_third": np.random.uniform(18.5, 19.25, size=SIZE),
                   "mean_lat_third": np.random.uniform(47, 48, size=SIZE)})

## Run old version with geopy.distance 

In [3]:
df["distance"] = df.apply(distance_of_home_third_places, axis=1)

## Creating tuple vectors from the lon and the lat columns

In [11]:
df["home"] = df.apply(lambda x: (x["mean_lat_home"], x["mean_lon_home"]), axis=1)
df["third"] = df.apply(lambda x: (x["mean_lat_third"], x["mean_lon_third"]), axis=1)

### More efficient way

In [12]:
df["home2"] = list(zip(df["mean_lat_home"], df["mean_lon_home"]))
df["third2"] = list(zip(df["mean_lat_third"], df["mean_lon_third"]))

## Use haversine_vector from haversine package

In [10]:
haversine_vector(df["home"].tolist(), df["third"].tolist())

array([49.69694367, 39.3017731 , 42.81560515, ..., 11.52711792,
       66.36881106, 50.62300573])