# Benchmark of backend methods

This benchmark file will assess the performance of different possibilities to implement the backend.

In [1]:
import json
from coordinates import Coordinates
from service_provider import ServiceProvider

## Distance calculation

Below cell demonstrates the usage of the `Coordinates`-class:

In [2]:
coords1 = Coordinates(13.719, 51.06)
coords2 = Coordinates(13.7345, 51.0384)
print(coords1.distance_to(coords2))

2900.7583914770216


## Example stats of a worker

The below cell provides an example worker:

In [3]:
sp = ServiceProvider(id=1, lat=50.89849, lon=7.01926, max_driving_distance=97000, picture_score=2., description_score=2.)

TypeError: ServiceProvider.__init__() got an unexpected keyword argument 'lat'

## Parsing postcodes and service providers

Testing the postcode iterator:

In [5]:
from postcode import AllPostCodes

print([pc.postcode for pc in AllPostCodes('./dataset/postcode.json')][:10])

[1067, 1069, 1097, 1099, 1108, 1109, 1127, 1129, 1139, 1156]


Testing the service provider iterator:

In [6]:
from service_provider import AllServiceProviders

print([sp.id for sp in AllServiceProviders('./dataset/service_provider_profile.json', './dataset/quality_factor_score.json')][-10:])

[67267, 67268, 67269, 67270, 67271, 67272, 67273, 67274, 67275, 67276]


## Graph-based approach

At first, we will generate the graph.

In [7]:
from approaches import GraphBasedApproach
from postcode import AllPostCodes
from service_provider import AllServiceProviders
import os

apc = AllPostCodes('../dataset/postcode.json')
asp = AllServiceProviders('../dataset/service_provider_profile.json', '../dataset/quality_factor_score.json')

gba = GraphBasedApproach(asp.service_providers, apc.postcodes)

100%|██████████| 8168/8168 [05:17<00:00, 25.69it/s]


Here, we can have some statistics:

In [8]:
print(f'There are {len(asp.service_providers)} service providers in {len(apc.postcodes)} post codes.')

There are 67276 service providers in 8168 post codes.


Now, we can export the graph:

In [9]:
savefile = 'complete-germany.gml'
if not os.path.isfile(savefile):
    gba.graph.save(savefile)

And find out the highest-ranked craftsmen:

In [10]:
import bisect
from tqdm import tqdm
import time


craftsman_dict = dict()

i = 0
for postcode in tqdm([pc.postcode for pc in apc.postcodes]):
    craftsmen = []
    vertex = gba.id_to_vertex(postcode, True)
    for edge in gba.graph.es[gba.graph.incident(vertex)]:
        edge_connects = [edge.source, edge.target]
        edge_connects.remove(vertex)
        craftsman = gba.vertex_to_id(edge_connects.pop(), False)
        crafts_weight = (craftsman, edge['weight'])
        bisect.insort(craftsmen, crafts_weight, key=lambda x: -x[1])
    #craftsmen = [craftsman for craftsman, _ in craftsmen]
    craftsman_dict[postcode] = craftsmen

time.sleep(1)

100%|██████████| 8168/8168 [01:22<00:00, 99.23it/s] 


### Benchmarking

We can now time the access to the graph by randomly selecting a large amount of service provider lists:

In [19]:
import time
import random

elapsed = 0
num_tries = 1_000_000
for i in tqdm(range(num_tries)):
    access_postcode = random.randint(0, len(apc.postcodes)-1)
    start = time.time()
    craftsman_dict[apc.postcodes[access_postcode].postcode][:20]
    end = time.time()
    elapsed += end-start
print(f'Total time for {num_tries} tries: {elapsed}\nTime per access: {(elapsed/num_tries)*1_000_000} microseconds')

100%|██████████| 1000000/1000000 [00:03<00:00, 252859.90it/s]

Total time for 1000000 tries: 2.5200109481811523
Time per access: 2.5200109481811523 microseconds





## Results

As an example, we will now print the first 20 providers of the first postcode:

In [14]:
print(craftsman_dict[1067][:20])

[(30, 2.695622658255577), (15800, 2.6926411584339083), (3708, 2.6902867998542863), (44918, 2.679246026322894), (841, 2.6499046305485856), (23236, 2.6338415972869136), (49357, 2.622901597189316), (3726, 2.5735129606114375), (3009, 2.572696616754063), (41976, 2.5720523222769964), (9879, 2.3593262285224084), (15080, 2.3576070230208535), (808, 2.3566244213474947), (3266, 2.355475344838018), (56917, 2.354260993865051), (1442, 2.3529238087885087), (11199, 2.3516139272660244), (16825, 2.3512001563142526), (5890, 2.3499036086609992), (53559, 2.3472640670281995)]
