In [9]:
import numpy as np
from scipy import stats
import time

def percentile_ranks_scipy(arr):
    return stats.rankdata(arr, method="average") / len(arr)

# Generate a large random array for testing
arr = np.random.rand(4000*4000)

# Time the scipy method
start_time = time.time()
percentiles_scipy = percentile_ranks_scipy(arr)
print(f"Scipy method took {time.time() - start_time:0.4f} seconds")


Scipy method took 2.5931 seconds


In [10]:
import numpy as np
import time

def percentile_ranks_numpy(arr):
    sorted_indices = np.argsort(arr)
    ranks = np.empty_like(sorted_indices)
    ranks[sorted_indices] = np.arange(len(arr))
    percentiles = (ranks + 1) / float(len(arr))  # +1 to make ranks start from 1
    return percentiles

# Time the numpy method
start_time = time.time()
percentiles_numpy = percentile_ranks_numpy(arr)
print(f"NumPy method took {time.time() - start_time:0.4f} seconds")


NumPy method took 2.1269 seconds


In [11]:
import numpy as np

def percentile_ranks_fast(arr):
    sorted_indices = np.argsort(arr)
    ranks = np.empty_like(sorted_indices)
    ranks[sorted_indices] = np.arange(1, len(arr) + 1)  # Ranks start from 1
    percentiles = ranks / float(len(arr))
    return percentiles

# Time the numpy method
start_time = time.time()
percentiles_numpy = percentile_ranks_fast(arr)
print(f"argsort method took {time.time() - start_time:0.4f} seconds")


argsort method took 2.2070 seconds


In [12]:
import numpy as np

def percentile_ranks_inplace(arr):
    sorted_indices = np.argsort(arr)
    arr[sorted_indices] = np.arange(1, len(arr) + 1)  # Ranks start from 1
    arr = arr / float(len(arr))
    return arr

# Time the numpy method
start_time = time.time()
percentiles_inplace = percentile_ranks_fast(arr)
print(f"inplace method took {time.time() - start_time:0.4f} seconds")


inplace method took 2.1712 seconds


In [14]:
import numpy as np
import timeit

def percentile_ranks_scipy(arr):
    from scipy import stats
    return stats.rankdata(arr, method="average") / len(arr)

def percentile_ranks_numpy(arr):
    sorted_indices = np.argsort(arr)
    ranks = np.empty_like(sorted_indices)
    ranks[sorted_indices] = np.arange(1, len(arr) + 1)  # Ranks start from 1
    percentiles = ranks / float(len(arr))
    return percentiles

def percentile_ranks_inplace(arr):
    sorted_indices = np.argsort(arr)
    arr[sorted_indices] = np.arange(1, len(arr) + 1)  # Ranks start from 1
    arr = arr / float(len(arr))
    return arr

# Generate a large random array for testing
arr = np.random.rand(4000*4000)

# Define a wrapper to test with timeit
def test_scipy():
    percentile_ranks_scipy(arr.copy())

def test_numpy():
    percentile_ranks_numpy(arr.copy())

def test_inplace():
    percentile_ranks_inplace(arr.copy())

# Number of times to repeat the test
num_repeats = 10

# Measure execution time using timeit.repeat
scipy_times = timeit.repeat("test_scipy()", globals=globals(), repeat=num_repeats, number=1)
numpy_times = timeit.repeat("test_numpy()", globals=globals(), repeat=num_repeats, number=1)
inplace_times = timeit.repeat("test_inplace()", globals=globals(), repeat=num_repeats, number=1)

# Print the statistics
print(f"Scipy method: {np.mean(scipy_times):.6f} sec ± {np.std(scipy_times):.6f} sec")
print(f"Numpy method: {np.mean(numpy_times):.6f} sec ± {np.std(numpy_times):.6f} sec")
print(f"In-place method: {np.mean(inplace_times):.6f} sec ± {np.std(inplace_times):.6f} sec")


Scipy method: 2.555591 sec ± 0.038955 sec
Numpy method: 2.094897 sec ± 0.033403 sec
In-place method: 2.118528 sec ± 0.044289 sec
