# Ranking elements

In [4]:
import numpy as np

In [5]:
a = [1,9,4,3,12,6]
a = [x+0.1 for x in a]
print(a)

[1.1, 9.1, 4.1, 3.1, 12.1, 6.1]


In [6]:
# Ugly, yet using numpy
b = np.array(a).argsort()
print(b)
c = [0]*len(b)
for i in range(len(b)):
    c[b[i]] = i
c

[0 3 2 5 1 4]


[0, 4, 2, 1, 5, 3]

In [7]:
# Smarter, using numpy
np.array(a).argsort().argsort()

array([0, 4, 2, 1, 5, 3], dtype=int64)

In [8]:
# Without numpy, but with same logic, except it doesn't handle repetitions
keysort = lambda x: [x.index(element) for element in sorted(x)]
keysort(keysort(a))

[0, 4, 2, 1, 5, 3]

In [9]:
# With a function, and proper handling of repetitions
def rank(x):
    out = [0]*len(x)
    y = sorted(x)
    for i in range(len(x)):
        out[i] = y.index(x[i])
        y[y.index(x[i])] = None
    return out

rank(a)

[0, 4, 2, 1, 5, 3]

In [10]:
# Alternative keysort (should be faster)
keysort = lambda x: [i for _,i in sorted([(x[i],i) for i in range(len(x))])]
keysort(keysort(a))

[0, 4, 2, 1, 5, 3]

In [16]:
# Same without double sort but with an ugly loop
t = [i for _,i in sorted([(a[i],i) for i in range(len(a))])]
out = [0]*len(t)
for i in range(len(t)):
    out[t[i]] = i
out

[0, 4, 2, 1, 5, 3]

In [19]:
# Same without double sort but with an ugly loop
t = [i for _,i in sorted([(a[i],i) for i in range(len(a))])]
[j for i in range(len(t)) for j in range(len(t)) if i==t[j]]

[0, 4, 2, 1, 5, 3]

In [20]:
# Same, but with maps
t = list(map(lambda x:x[1],sorted(zip(a,range(len(a))))))
[j for i in range(len(t)) for j in range(len(t)) if i==t[j]]

[0, 4, 2, 1, 5, 3]

### Sorting one list by keys from the other

Now a test for a related, but simpler problem. Two lists, we need to sort one according to the sequence in the other.

In [15]:
import time

In [46]:
ntries = 10000
nelements = 1000
time1 = time2 = 0
item_list = list(range(nelements))
for i in range(ntries):
    value_list = np.random.randint(size=nelements,low=0,high=1000)
    tic = time.time()
    out1 = [item_list[j] for _,j in sorted([(value_list[i],i) for i in range(len(value_list))])]
    time1 += time.time()-tic
    tic = time.time()
    out2 = list(map(lambda x: x[1], sorted(zip(value_list, item_list))))
    time2 += time.time()-tic
    assert out1==out2
    
print('Total times, for LC and MAP:', time1, time2)
print(f'Difference: {int((time1-time2)/time1*100)}%')

Total times, for LC and MAP: 7.173242568969727 6.424499034881592
Difference: 10%


So, an interesting finding. `list(map(lambda`-based solution is consitently about 10% faster than a list-comprehension-based one!