In [1]:
import numpy as np
import pandas as pd
import dask as dd
import ray as rd

# Single Threaded Benchmarks
This code comes the following blog post: https://engineering.upside.com/a-beginners-guide-to-optimizing-pandas-code-for-speed-c09ef2c6a4d6

We use this as a starting point for our implementations as they provide benchmarked results of the two approaches we are comparing.

In [2]:
# Define a basic Haversine distance formula
def haversine(lat1, lon1, lat2, lon2):
    MILES = 3959
    lat1, lon1, lat2, lon2 = map(np.deg2rad, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1 
    dlon = lon2 - lon1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    total_miles = MILES * c
    return total_miles

In [3]:
# Define a function to manually loop over all rows and return a series of distances
def haversine_looping(df):
    distance_list = []
    for i in range(0, len(df)):
        d = haversine(40.671, -73.985, df.iloc[i]['latitude'], df.iloc[i]['longitude'])
        distance_list.append(d)
    return distance_list

In [4]:
## Load Data
df = pd.read_csv('../blog_code/new_york_hotels.csv', encoding='cp1252')

## Experiment 1: Looping

In [5]:
%%timeit

# Run the haversine looping function
df['distance'] = haversine_looping(df)

642 ms ± 57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Experiment 2: Iterrows

In [6]:
%%timeit
# Haversine applied on rows via iteration
haversine_series = []
for index, row in df.iterrows():
    haversine_series.append(haversine(40.671, -73.985,\
                                      row['latitude'], row['longitude']))
df['distance'] = haversine_series

154 ms ± 4.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Experiment 3: Apply 

In [7]:
%%timeit

# Timing apply on the Haversine function
df['distance'] = df.apply(lambda row: haversine(40.671, -73.985, row['latitude'], row['longitude']), axis=1)

63.8 ms ± 3.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Experiment 4: Vectorize Series

In [17]:
%%timeit 

# Vectorized implementation of Haversine applied on Pandas series
df['distance'] = haversine(40.671, -73.985, df['latitude'], df['longitude'])

1.42 ms ± 67.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Experiment 5: Vectorize Numpy ndarray

In [18]:
%%timeit 

# Vectorized implementation of Haversine applied on Pandas series
df['distance'] = haversine(40.671, -73.985, df['latitude'].values, df['longitude'].values)

282 µs ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Ray

In [8]:
#import pandas as pd
import ray.dataframe as rd

Process STDOUT and STDERR is being redirected to /tmp/raylogs/.
Waiting for redis server at 127.0.0.1:41736 to respond...
Waiting for redis server at 127.0.0.1:20344 to respond...
Starting local scheduler with the following resources: {'CPU': 8, 'GPU': 0}.

View the web UI at http://localhost:8890/notebooks/ray_ui92446.ipynb?token=ed172dab16ecf7f4c9b512309619586a02e567ab8f5463df



In [9]:
## Load Data
rdf = rd.read_csv('../blog_code/new_york_hotels.csv', encoding='cp1252')
rdf.head()

   ean_hotel_id                                     name  \
0        269955       Hilton Garden Inn Albany/SUNY Area   
1        113431     Courtyard by Marriott Albany Thruway   
2        108151                    Radisson Hotel Albany   
3        254756  Hilton Garden Inn Albany Medical Center   
4        198232  CrestHill Suites SUNY University Albany   

                 address1    city state_province postal_code  latitude  \
0     1389 Washington Ave  Albany             NY       12206  42.68751   
1  1455 Washington Avenue  Albany             NY       12206  42.68971   
2             205 Wolf Rd  Albany             NY       12205  42.72410   
3     62 New Scotland Ave  Albany             NY       12208  42.65157   
4  1415 Washington Avenue  Albany             NY       12206  42.68873   

   longitude  star_rating  high_rate  low_rate  
0  -73.81643          3.0   154.0272  124.0216  
1  -73.82021          3.0   179.0100  134.0000  
2  -73.79822          3.0   134.1700   84.1600 

In [10]:
%%timeit

# Run the haversine looping function
result = haversine_looping(rdf)

16.4 s ± 340 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
# Haversine applied on rows via iteration
haversine_series = []
for index, row in rdf.iterrows():
    haversine_series.append(haversine(40.671, -73.985,\
                                      row['latitude'], row['longitude']))
result = haversine_series

830 ms ± 53.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
%%timeit

# Timing apply on the Haversine function
results = rdf.apply(lambda row: haversine(40.671, -73.985, row['latitude'], row['longitude']), axis=1)

NotImplementedError: To contribute to Pandas on Ray, please visit github.com/ray-project/ray.

In [16]:
%%timeit 

# Vectorized implementation of Haversine applied on Pandas series
result = haversine(40.671, -73.985, rdf['latitude'], rdf['longitude'])

28.5 ms ± 725 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [19]:
%%timeit 

# Vectorized implementation of Haversine applied on Pandas series
result = haversine(40.671, -73.985, rdf['latitude'].values, rdf['longitude'].values)

27 ms ± 1.74 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
!wget https://raw.githubusercontent.com/s-heisler/pycon2017-optimizing-pandas/master/new_york_hotels.csv

--2018-05-09 19:38:45--  https://raw.githubusercontent.com/s-heisler/pycon2017-optimizing-pandas/master/new_york_hotels.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.32.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.32.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 222663 (217K) [text/plain]
Saving to: ‘new_york_hotels.csv’


2018-05-09 19:38:45 (4.54 MB/s) - ‘new_york_hotels.csv’ saved [222663/222663]



In [26]:
!tail -n +2 new_york_hotels.csv > new_york_hotels_no_header.csv

In [27]:
!cat new_york_hotels.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv new_york_hotels_no_header.csv > big_new_york_hotels.csv

In [28]:
%%time
## Load Data
df = pd.read_csv('big_new_york_hotels.csv', encoding='cp1252')

CPU times: user 2.23 s, sys: 309 ms, total: 2.54 s
Wall time: 2.34 s


In [32]:
%%time

# Vectorized implementation of Haversine applied on Pandas series
df['distance'] = haversine(40.671, -73.985, df['latitude'], df['longitude'])

38.4 ms ± 4.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [29]:
%time 

## Load Data
rdf = rd.read_csv('big_new_york_hotels.csv', encoding='cp1252')
rdf.head()

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs


   ean_hotel_id                                     name  \
0        269955       Hilton Garden Inn Albany/SUNY Area   
1        113431     Courtyard by Marriott Albany Thruway   
2        108151                    Radisson Hotel Albany   
3        254756  Hilton Garden Inn Albany Medical Center   
4        198232  CrestHill Suites SUNY University Albany   

                 address1    city state_province postal_code  latitude  \
0     1389 Washington Ave  Albany             NY       12206  42.68751   
1  1455 Washington Avenue  Albany             NY       12206  42.68971   
2             205 Wolf Rd  Albany             NY       12205  42.72410   
3     62 New Scotland Ave  Albany             NY       12208  42.65157   
4  1415 Washington Avenue  Albany             NY       12206  42.68873   

   longitude  star_rating  high_rate  low_rate  
0  -73.81643          3.0   154.0272  124.0216  
1  -73.82021          3.0   179.0100  134.0000  
2  -73.79822          3.0   134.1700   84.1600 

In [33]:
%%time

# Vectorized implementation of Haversine applied on Pandas series
result = haversine(40.671, -73.985, rdf['latitude'], rdf['longitude'])

1.02 s ± 100 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
%%time
# Haversine applied on rows via iteration
haversine_series = []
for index, row in rdf.iterrows():
    haversine_series.append(haversine(40.671, -73.985,\
                                      row['latitude'], row['longitude']))
result = haversine_series

CPU times: user 5min 27s, sys: 25.6 s, total: 5min 53s
Wall time: 8min 21s


In [37]:
%%time
# Haversine applied on rows via iteration
haversine_series = []
for index, row in df.iterrows():
    haversine_series.append(haversine(40.671, -73.985,\
                                      row['latitude'], row['longitude']))
df['distance'] = haversine_series

CPU times: user 1min 14s, sys: 311 ms, total: 1min 15s
Wall time: 1min 15s
