Skip to content
Permalink
 
 
Cannot retrieve contributors at this time
executable file 150 lines (119 sloc) 4.28 KB
#!/usr/bin/env python
import os
import cProfile
import subprocess
import numpy as np
import json
import logging
from deepdiff import DeepDiff
from deepdiff import diff
from deepdiff.helper import pypy3, py_current_version
logging.basicConfig(level=logging.INFO)
current_pid = str(os.getpid())
def get_profile_of_usage(filename, t1, t2, **params):
def _run():
DeepDiff(t1, t2, log_frequency_in_sec=2, **params)
P = cProfile.Profile()
P.runcall(_run)
P.dump_stats(f'results/{filename}.profile')
def plot_resource_usage(filename, t1, t2, freq, **params):
subprocess.Popen(['psrecord', current_pid, '--interval', str(freq), '--plot', f'results/{filename}.png'], close_fds=True)
DeepDiff(t1, t2, log_frequency_in_sec=2, **params)
def get_file_name(func_name, **params):
suffix = 'pypy3' if pypy3 else py_current_version
filename = f"{func_name}__{suffix}"
for key, value in params.items():
filename = f"{filename}__{key}={value}"
print('-' * 20)
print(filename)
return filename
def benchmark_numpy_array(plot_usage, profile_usage):
"""
Benchmarking numpy arrays and different python versions and cache sizes etc.
"""
t1 = np.loadtxt('data/mat1.txt')
t2 = np.loadtxt('data/mat2.txt')
params = dict(
ignore_order=True,
cache_size=5000,
cache_tuning_sample_size=500,
)
filename = get_file_name('benchmark_numpy_array', **params)
if plot_usage:
plot_resource_usage(filename, t1, t2, freq='.1', **params)
elif profile_usage:
get_profile_of_usage(filename, t1, t2, **params)
def benchmark_array_no_numpy(plot_usage, profile_usage):
"""
Benchmarking numpy arrays and different python versions and cache sizes etc.
"""
t1 = np.loadtxt('data/mat1.txt').tolist()
t2 = np.loadtxt('data/mat2.txt').tolist()
params = dict(
ignore_order=True,
cache_size=10000,
cache_tuning_sample_size=20000,
)
filename = get_file_name('benchmark_array_no_numpy', **params)
try:
diff.np = None
if plot_usage:
plot_resource_usage(filename, t1, t2, freq='.1', **params)
elif profile_usage:
get_profile_of_usage(filename, t1, t2, **params)
finally:
diff.np = np
def benchmark_big_jsons(plot_usage, profile_usage):
"""
Benchmarking numpy arrays and different python versions and cache sizes etc.
"""
t1 = json.load(open('data/big_actual.json'))
t2 = json.load(open('data/big_expected.json'))
params = dict(
ignore_order=True,
cache_size=0,
cache_tuning_sample_size=0,
max_diffs=300000,
max_passes=40000,
)
filename = get_file_name('benchmark_big_jsons', **params)
if plot_usage:
plot_resource_usage(filename, t1, t2, freq='.1', **params)
elif profile_usage:
get_profile_of_usage(filename, t1, t2, **params)
def benchmark_deeply_nested_a(plot_usage, profile_usage):
params = dict(
ignore_order=True,
cache_size=500,
cache_tuning_sample_size=500,
)
filename = get_file_name('benchmark_deeply_nested_a', **params)
t1 = json.load(open('data/nested_a_t1.json'))
t2 = json.load(open('data/nested_a_t2.json'))
if plot_usage:
plot_resource_usage(filename, t1, t2, freq='.1', **params)
elif profile_usage:
get_profile_of_usage(filename, t1, t2, **params)
def benchmark_deeply_nested_b(plot_usage, profile_usage):
params = dict(
ignore_order=True,
cache_size=5000,
cache_tuning_sample_size=0,
)
filename = get_file_name('benchmark_deeply_nested_b', **params)
t1 = json.load(open('data/nested_b_t1.json'))
t2 = json.load(open('data/nested_b_t2.json'))
if plot_usage:
plot_resource_usage(filename, t1, t2, freq='.1', **params)
elif profile_usage:
get_profile_of_usage(filename, t1, t2, **params)
if __name__ == '__main__':
plot_usage = False
profile_usage = False
plot_usage = True
# profile_usage = True
# benchmark_array_no_numpy(plot_usage, profile_usage)
# benchmark_big_jsons(plot_usage, profile_usage)
# benchmark_deeply_nested_a(plot_usage, profile_usage)
# benchmark_deeply_nested_b(plot_usage, profile_usage)
benchmark_numpy_array(plot_usage, profile_usage)