In [11]:
import sys
import os

# Add the build directory to Python path
build_dir = os.path.abspath('../lib')
sys.path.insert(0, build_dir)

# Add the data directory to Python path
data_dir = os.path.abspath('../data')
sys.path.insert(0, data_dir)

import pandas as pd
import numpy as np

import sorters
import probes

import list_generators as lg

In [12]:
probe_list = probes.list_probes()
for probe in probe_list:
    print(probe)
    print(probes.explain_probe(probe))

dis
Displacement: Sum of distances each element must move to reach its sorted position
enc
Entropy: Measure of disorder in the sequence based on information theory
exc
Exchanges: Minimum number of exchanges needed to sort the sequence
ham
Hamming distance: Number of elements that are not in their sorted position
inv
Inversions: Number of pairs of elements that are in the wrong order
max
Max: Maximum distance an element must move to reach its sorted position
mono
Monotonicity: Number of adjacent elements that are not in order
osc
Oscillation: Measures how elements are oscillating around their final positions
rem
Rem: Minimum number of elements to remove to obtain a sorted sequence
runs
Runs: Number of monotonically increasing subsequences
sus
Suspension: Measure of how suspended elements are from their sorted positions


In [13]:
arr = lg.generate_list(1000)
probes.measure_all(arr)

{'dis': 0,
 'enc': 0,
 'exc': 0,
 'ham': 0,
 'inv': 0,
 'max': 0,
 'mono': 0,
 'osc': 0,
 'rem': 0,
 'runs': 0,
 'sus': 0}

In [14]:
sizes = []
for i in range (1, 10000, 1000):
    sizes.append(i)

dataset = []
for size in sizes:
    dataset.append(lg.randomize_list(lg.generate_list(size)))

presortedness = []
for arr in dataset:
    presortedness.append(probes.measure_all(arr))

for arr in presortedness:
    print(arr)

{'dis': 0, 'enc': 0, 'exc': 0, 'ham': 0, 'inv': 0, 'max': 0, 'mono': 0, 'osc': 0, 'rem': 0, 'runs': 0, 'sus': 0}
{'dis': 999, 'enc': 39, 'exc': 990, 'ham': 1000, 'inv': 248300, 'max': 968, 'mono': 419, 'osc': 339719, 'rem': 940, 'runs': 501, 'sus': 54}
{'dis': 1999, 'enc': 55, 'exc': 1993, 'ham': 2000, 'inv': 1017339, 'max': 1955, 'mono': 832, 'osc': 1361861, 'rem': 1916, 'runs': 1003, 'sus': 79}
{'dis': 2994, 'enc': 71, 'exc': 2992, 'ham': 2999, 'inv': 2212704, 'max': 2983, 'mono': 1233, 'osc': 2996922, 'rem': 2899, 'runs': 1512, 'sus': 99}
{'dis': 3996, 'enc': 81, 'exc': 3993, 'ham': 4001, 'inv': 3996423, 'max': 3897, 'mono': 1658, 'osc': 5304632, 'rem': 3886, 'runs': 1980, 'sus': 122}
{'dis': 5000, 'enc': 90, 'exc': 4989, 'ham': 4998, 'inv': 6264539, 'max': 4921, 'mono': 2070, 'osc': 8358139, 'rem': 4868, 'runs': 2506, 'sus': 134}
{'dis': 5997, 'enc': 101, 'exc': 5991, 'ham': 6001, 'inv': 9017223, 'max': 5959, 'mono': 2462, 'osc': 12018673, 'rem': 5853, 'runs': 3023, 'sus': 145}
{'d

In [16]:
times = []
for probe in probe_list:
    probe_time = []
    for arr in dataset:
        probe_time.append(probes.benchmark_probe_by_name(probe, arr))
    times.append(probe_time)

for i in range(len(probe_list)):
    print(probe_list[i], ": ", times[i])

dis :  [0.00075, 0.004166, 0.007042, 0.009917, 0.014, 0.01825, 0.017375, 0.018917, 0.021375, 0.023833]
enc :  [4.2e-05, 0.022833, 0.04175, 0.060292, 0.07875, 0.099625, 0.117833, 0.137583, 0.154292, 0.241167]
exc :  [0.000208, 0.060791, 0.126041, 0.203833, 0.275708, 0.371458, 0.471833, 0.56175, 0.874583, 0.7305]
ham :  [8.3e-05, 0.045667, 0.100709, 0.164459, 0.221791, 0.32275, 0.35725, 0.461084, 0.496292, 0.541459]
inv :  [0.000125, 0.071166, 0.142959, 0.20275, 0.277834, 0.357625, 0.438417, 0.522208, 0.625333, 0.869291]
max :  [8.3e-05, 0.115583, 0.249875, 0.402875, 0.536834, 0.638083, 0.764083, 0.915916, 1.091542, 1.233417]
mono :  [4.2e-05, 0.003292, 0.006125, 0.009583, 0.012875, 0.015958, 0.018834, 0.0215, 0.024584, 0.0255]
osc :  [0.0, 0.101958, 0.414542, 0.538084, 0.460083, 0.589834, 0.746709, 0.878667, 1.028, 1.310292]
rem :  [0.000125, 0.017917, 0.032958, 0.055958, 0.078166, 0.0835, 0.095333, 0.117375, 0.151625, 0.160583]
runs :  [4.1e-05, 0.002875, 0.006084, 0.008791, 0.012, 0.0