In [6]:
import sys
sys.path.append('../')
import haversinevec as hv
import numpy as np

# Ex. 1: calculate a distance between a set of points and a reference point
This is used for example to calculate the distance between each point in a cluster and the median point in the cluster

In [19]:
points = np.random.randn(10,2)+(55, 12)
ref = np.random.randn(2)+(55, 12)
print 'Points: '
print points
print ''
print 'Reference: ', ref
print ''
print 'Distances: '
print hv.haversine(points, ref)

Points: 
[[ 53.92959389  11.14784068]
 [ 54.57559475  13.25844501]
 [ 54.39491123  10.37053309]
 [ 53.11800798  11.99267043]
 [ 55.01358938  13.02808719]
 [ 53.43024189  12.166042  ]
 [ 54.54267359  11.80501571]
 [ 54.48866451  12.33336138]
 [ 55.57376858  12.54324252]
 [ 55.11443969  10.20658839]]

Reference:  [ 54.52342104  13.06045042]

Distances: 
[ 140.92259325   14.04107049  174.63560765  171.45936476   54.60485386
  135.04436424   81.11736649   47.15346089  121.48649547  194.48897822]


# Ex. 2: calculate the distance between each two consecutive points in a list
This can be used for example to calculate speed between each two location measurements


In [20]:
points = np.random.randn(10,2)+(55, 12)
print 'Points: '
print points
print
print 'Distances: '
print hv.haversine(points[:-1], points[1:])


Points: 
[[ 55.25683998  11.40127582]
 [ 56.39122739  12.73955354]
 [ 55.89404112  13.01532699]
 [ 55.48794344  11.74450442]
 [ 54.95598141  13.31222948]
 [ 54.60386677  10.99465321]
 [ 54.30341301  12.12826663]
 [ 54.7578454   12.99074994]
 [ 54.60396952  11.52150872]
 [ 52.50591727  12.17902929]]

Distances: 
[ 151.48536336   57.92877942   91.65997256  115.82326136  153.85681642
   80.62704743   75.25131544   96.09321185  237.56425977]


# Ex. 3: calculate the distance between each two pairs of points in a list
This can be used to preprocess the list of points before clustering, where a distance matrix is needed

In [21]:
points = np.random.randn(10,2)+(55, 12)
print 'Points: '
print points
pdist = hv.haversine_pdist(points)
print 'Condensed array: '
print pdist
print
from scipy.spatial.distance import squareform
print 'Square array: '
print squareform(pdist)

Points: 
[[ 54.81817672  12.89489088]
 [ 56.4577278   12.32384339]
 [ 55.62170887  11.42058169]
 [ 54.47171207  11.18330578]
 [ 54.35009004  11.72688846]
 [ 54.68634463  12.87974198]
 [ 55.938958    11.4461517 ]
 [ 53.22198266  12.39189546]
 [ 54.80177749  12.78012131]
 [ 53.95037502  13.53383694]]
Condensed array: 
[ 186.0054707   129.47623257  116.79793151   91.6088223    14.70771643
  154.78886303  180.70564895    7.58567522  105.1076493   108.69987167
  232.4956506   237.63320189  200.26734646   79.30456383  360.22786915
  186.55525062  289.49559594  128.90812263  142.90119835  139.47693515
   35.35227034  274.43639751  125.6417907   230.23002528   37.72846256
  112.02597778  164.18393266  160.16279485  109.23957024  163.64486386
   83.36579268  177.7710436   132.97685087   84.53579113  125.92378676
  166.40089059  166.11359344   14.35609356   92.28076572  308.53757773
  152.13226474  258.4714867   177.68529849  110.76232452  106.63400604]

Square array: 
[[   0.          186.00547

# Ex 4: given two lists, calculate the distance between all combinations of points
For example: you have a trace and a list of weather stations. Calculate the distance to all the stations and find the closets one

In [27]:
points = np.random.randn(10,2)+(55, 12)
stations = np.random.randn(3,2)+(55, 12)
cdist = hv.haversine_cdist(points, stations)
print 'Full distance matrix with 10 points in the trace and three stations: '
print cdist
print
# now, find the closets station for each point
print 'The closest station id for each point: '
print np.argmin(cdist, axis=1)

Full distance matrix with 10 points in the trace and three stations: 
[[  98.84383691  112.36398194   46.83234064]
 [  94.65916568  190.35890088  127.42862051]
 [ 313.45956617  262.94636355  263.30213527]
 [ 103.03822328    9.31535769   91.42235313]
 [  53.7247292    60.4106995    81.86293184]
 [ 218.28132796  241.38929978  172.84770411]
 [ 216.12871368  167.04616404  167.18117801]
 [ 152.02125986   62.72634575  151.00285462]
 [  98.21680207  192.01044062  138.16234651]
 [ 132.17255353  225.47033192  148.13022948]]

The closest station id for each point: 
[2 0 1 1 0 2 1 1 0 0]


# Ex 5: is this faster than my default haversine?

In [41]:
from haversine import haversine
import timeit

def hv_default():
    points = np.random.randn(10000,2)+(55, 12)
    result = np.zeros((points.shape[0], ), dtype=np.float64)
    for idx in range(0, points.shape[0]-1):
        result[idx] = haversine(points[idx], points[-1])
    return result[idx]

def hv_vec():
    points = np.random.randn(10000,2)+(55, 12)
    return hv.haversine(points[:-1], points[-1])

# first measure how much time it takes to generate the random points
point_timeit = timeit.timeit('np.random.randn(10000,2)+(55, 12)', number=1000, setup="import numpy as np")

print 'Default haversine: '
hv_default_timeit = timeit.timeit('hv_default()', number=1000, setup = "from __main__ import hv_default")
print hv_default_timeit-point_timeit, 'seconds'

print
print 'haversine-vec: '
hv_vec_timeit = timeit.timeit('hv_vec()', number=1000, setup = "from __main__ import hv_vec")
print hv_vec_timeit-point_timeit, 'seconds'

print 'Speedup factor: ', (hv_default_timeit-point_timeit)/(hv_vec_timeit-point_timeit)




Default haversine: 
51.4074697495 seconds

haversine-vec: 
0.493114948273 seconds
Speedup factor:  104.250479385
