In [3]:
import logging
import pandas as pd
import multiprocessing as mp
import time
from remediator import get_remediation

logging.basicConfig(level=logging.ERROR)

In [4]:
incompats = pd.read_csv("res/license_incompatibilities.csv")
incompat_set = set(zip(incompats.package, incompats.version))
mongo_uri = "mongodb://localhost:27017/"

# for pkg, ver in sorted(incompat_set):
#     print(pkg, ver)
#     print(get_remediation(mongo_uri, pkg, ver)["changes"])

def worker(mongo_uri, pkg, ver):
    try:
        st=time.time()
        res = get_remediation(mongo_uri, pkg, ver)
        et=time.time()

        res["run_time"] = et - st
        res["original_tree_size"] = len(res["original_tree"])
        res["new_tree_size"] = len(res["new_tree"][0])

        del res["original_tree"]
        return res
    except Exception as ex:
        return {"package": pkg, "version": ver, "error": str(ex)}

params = [(mongo_uri, pkg, ver) for pkg, ver in incompat_set]
with mp.Pool(mp.cpu_count() // 2) as pool:
    result = pool.starmap(worker, params)
result = pd.DataFrame(result).sort_values(by=["package", "version"])
result.to_csv("res/remediation.csv", index=False)
result.head()

print("Total time: ", result.run_time.sum())
print("Total packages: ", len(result))
print("max run time: ", result.run_time.max())
print("median run time: ", result.run_time.median())
print("Average original tree size: ", result.original_tree_size.mean())
print("Average new tree size: ", result.new_tree_size.mean())