# Timing vs Dataset Size (Range–CoMine vs Naïve vs RangeInc)
This notebook measures average **runtime (ms)** and **peak memory (KB)** as we scale the number of features and instances per feature.

Use this to sanity-check complexity and compare algorithms on your machine.

In [None]:

from range_comine.synthetic import generate_synthetic
from range_comine.mining import range_comine
from range_comine.baselines import naive_range, range_inc_mining
import time, tracemalloc, statistics as stats
import matplotlib.pyplot as plt

ALGOS = {
    "range": ("Range–CoMine", range_comine),
    "naive": ("Naïve", naive_range),
    "range_inc": ("RangeInc-Mining", range_inc_mining),
}

def run_profile(fn, objs, d1, d2, min_prev):
    tracemalloc.start()
    t0 = time.perf_counter()
    col = fn(objs, d1=d1, d2=d2, min_prev=min_prev)
    ms = (time.perf_counter() - t0) * 1000.0
    _, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    return ms, peak/1024.0


In [None]:

sizes = [(3,5), (4,6), (5,6), (6,6)]  # (n_features, instances_per_feature)
d1, d2, min_prev = 8.0, 30.0, 0.5
seed = 13
reps = 3  # average
results = {a: [] for a in ALGOS.keys()}

for (nf, ni) in sizes:
    objs = generate_synthetic(n_features=nf, instances_per_feat=ni, seed=seed)
    for key, (name, fn) in ALGOS.items():
        tms, pks = [], []
        for _ in range(reps):
            ms, pk = run_profile(fn, objs, d1, d2, min_prev)
            tms.append(ms); pks.append(pk)
        results[key].append((nf*ni, sum(tms)/len(tms), sum(pks)/len(pks)))

results


In [None]:

# Plot runtime
plt.figure()
for key, series in results.items():
    xs = [n for (n,ms,pk) in series]
    ys = [ms for (n,ms,pk) in series]
    plt.plot(xs, ys, marker="o", label=ALGOS[key][0])
plt.xlabel("Total #objects (n_features × instances_per_feature)")
plt.ylabel("Avg runtime (ms)")
plt.title("Runtime vs dataset size")
plt.legend(); plt.grid(True, linestyle="--", alpha=0.6); plt.tight_layout()


In [None]:

# Plot memory
plt.figure()
for key, series in results.items():
    xs = [n for (n,ms,pk) in series]
    ys = [pk for (n,ms,pk) in series]
    plt.plot(xs, ys, marker="o", label=ALGOS[key][0])
plt.xlabel("Total #objects (n_features × instances_per_feature)")
plt.ylabel("Avg peak memory (KB)")
plt.title("Peak memory vs dataset size")
plt.legend(); plt.grid(True, linestyle="--", alpha=0.6); plt.tight_layout()
