In [13]:
import numpy as np

In [42]:
#option 1: return matrix -> more complicated
def f(x,c):
    return x+c
def g(x,c):
    return 10*x+c
def matrix():
    def y(x):
        func_matrix= np.array([[f(x,1),f(x,2),f(x,3)], 
                              [ g(x,4),g(x,5),g(x,6)]])
        return func_matrix
    return y
final_func=matrix()
z1=final_func(42)
print(z1)

#option 2 Vectorised function
def myfunc(a, b):
    if a < 4:
        return b+a
    else:
        return 10*b+a 
final_func = np.vectorize(myfunc)
z2=final_func(np.array([[1, 2, 3],[4,5,6]]), 42)
print(z2)

[[ 43  44  45]
 [424 425 426]]
[[ 43  44  45]
 [424 425 426]]


# Another Example
[Blogpost](https://datascience.blog.wzb.eu/2018/02/02/vectorization-and-parallelization-in-python-with-numpy-and-pandas/)

In [21]:
import math
def haversine(row):
    a_lat, a_lng, b_lat, b_lng = row
    R = 6371     # earth radius in km

    a_lat = math.radians(a_lat)
    a_lng = math.radians(a_lng)
    b_lat = math.radians(b_lat)
    b_lng = math.radians(b_lng)

    d_lat = b_lat - a_lat
    d_lng = b_lng - a_lng

    a = math.pow(math.sin(d_lat / 2), 2) \
      + math.cos(a_lat) * math.cos(b_lat) \
      * math.pow(math.sin(d_lng / 2), 2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    return R * c
def vec_haversine(a_lat, a_lng, b_lat, b_lng):
    R = 6371  # earth radius in km

    a_lat = np.radians(a_lat)
    a_lng = np.radians(a_lng)
    b_lat = np.radians(b_lat)
    b_lng = np.radians(b_lng)

    d_lat = b_lat - a_lat
    d_lng = b_lng - a_lng

    d_lat_sq = np.sin(d_lat / 2) ** 2
    d_lng_sq = np.sin(d_lng / 2) ** 2

    a = d_lat_sq + np.cos(a_lat) * np.cos(b_lat) * d_lng_sq
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))

    return R * c  # returns distance between a and b in km

In [22]:

coords = np.array([
    # orig_lat, orig_lng,  dest_lat,  dest_lng
    [52.516667, 13.388889, 51.507222, -0.1275],   # Berlin-London
    [52.516667, 13.388889, 55.75, 37.616667],     # Berlin-Moscow
    [55.75, 37.616667, 51.507222, -0.1275],       # Moscow-London
])

In [23]:
%time np.apply_along_axis(haversine, 1, coords) #NOT VECTORISED

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 631 µs


array([  930.45355655,  1609.90936067,  2500.54316693])

In [25]:
%time vec_haversine(coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]) # vectorised

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 158 µs


array([  930.45355655,  1609.90936067,  2500.54316693])

In [28]:
#same test for pandas dataframe
import pandas as pd
labels = [
        'Berlin-London',
        'Berlin-Moscow',
        'Moscow-London',
     ]
df_coords = pd.DataFrame(coords, index=labels,
                             columns=['origin_lat', 'origin_lng',
                                      'destination_lat', 'destination_lng'])
df_coords

Unnamed: 0,origin_lat,origin_lng,destination_lat,destination_lng
Berlin-London,52.516667,13.388889,51.507222,-0.1275
Berlin-Moscow,52.516667,13.388889,55.75,37.616667
Moscow-London,55.75,37.616667,51.507222,-0.1275


In [39]:
%time df_coords.apply(haversine,axis=1) # seems like this is also vecotorised

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.06 ms


Berlin-London     930.453557
Berlin-Moscow    1609.909361
Moscow-London    2500.543167
dtype: float64

In [41]:
%time vec_haversine(df_coords.origin_lat, df_coords.origin_lng, df_coords.destination_lat, df_coords.destination_lng)

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.72 ms


Berlin-London     930.453557
Berlin-Moscow    1609.909361
Moscow-London    2500.543167
dtype: float64