In [1]:
import numpy as np
import pyhf
import json
import cabinetry
from os import listdir, getcwd, path
import requests
from jsonschema import validate
import pruning.normsys_pruning as pruning
#from os.path import isfile, isdir, join

In [2]:
path = getcwd()

In [3]:
names = []

for name in listdir(path):
    if name[-5::] == ".json":
        names.append("/" + name)

names

['/bottom-squarks_PA_results.json',
 '/workspace_Comb.json',
 '/workspace_1LOS.json',
 '/bottom-squarks.json']

In [4]:
filename = '/bottom-squarks.json'

In [5]:
workspace_spec = json.load(open(path + filename, 'r'))

workspace = pyhf.Workspace(workspace_spec) 



In [6]:
original_model = workspace.model()
poi_index = original_model.config.poi_index
poi_name = original_model.config.par_names()[poi_index]
poi_name

'mu_SIG'

In [7]:
num_thresholds = 10
pruning_thresholds = [float(eps) for eps in np.linspace(0.0, 0.1, num_thresholds)] 
pruning_thresholds

[0.0, 0.025, 0.05, 0.07500000000000001, 0.1]

In [8]:
pruned_workspaces_specs = [ pruning.prune_model(workspace_spec, eps) for eps in pruning_thresholds ]

pruning.prune_model(workspace_spec, 0.0)

output_file = open(path + "/spec_original.json", "w")
json.dump(workspace_spec, output_file, indent=4)
output_file.close()

In [9]:
for l in range(num_thresholds):
    output_file = open(path + "/spec_{}.json".format(pruning_thresholds[l]), "w")
    json.dump(pruned_workspaces_specs[l], output_file, indent=4)
    output_file.close()

In [10]:
pruned_workspaces = [ pyhf.Workspace(workspace_spec) for workspace_spec in pruned_workspaces_specs ]

In [11]:
pruned_models = [ pruned_workspace.model() for pruned_workspace in pruned_workspaces ]

In [12]:
import time

num_executions = 1

In [13]:
pyhf.set_backend("numpy", "minuit")

In [14]:
pyhf.get_backend()

(<pyhf.tensor.numpy_backend.numpy_backend at 0x7f319ee55d40>,
 <pyhf.optimize.minuit_optimizer at 0x7f319ee17be0>)

In [15]:
%time
average_exec_times = []
exec_times = []
output_params_all_specs = []

for l in range(num_thresholds):
    
    pruned_data = pruned_workspaces[l].data(pruned_models[l]) #, include_auxdata=False)

    exec_times_pruned = []
    output_params_sigle_spec = []

    for k in range(num_executions):
        t0 = time.time()
        output_params = pyhf.infer.mle.fit(data=pruned_data, pdf=pruned_models[l])
        t1 = time.time()
        exec_times_pruned.append(t1-t0)
        
        output_params_sigle_spec.append(dict(zip(pruned_models[l].config.par_names(), output_params)))
        print(k+1)
        print(output_params)
        
    print("eps = {} ----------------------------".format(pruning_thresholds[l]))
    
    exec_times.append(exec_times_pruned)
    
    exec_times_pruned = np.array(exec_times_pruned)
    
    average_exec_times.append(float(np.mean(exec_times_pruned)))
    
    output_params_all_specs.append(output_params_sigle_spec)
    

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.39 µs
1
[ 9.99979738e-01  9.99027998e-01  6.62764299e-05  7.41803920e-05
 -1.31715649e-03 -5.53842414e-02 -7.42011570e-02 -6.32418871e-03
 -1.31715650e-03 -5.01928222e-03 -4.83300048e-03  9.02938927e-03
  3.68976045e-04  1.41909605e-02  2.07695721e-02 -5.15841088e-03
 -1.31715656e-03 -1.31715655e-03 -2.60817900e-03 -7.70732692e-03
 -3.42090156e-03 -1.24003918e-02 -2.19085151e-03 -1.69571592e-03
 -3.20220633e-03 -3.63108172e-03 -9.51828373e-04  2.53097775e-03
 -1.99389153e-03 -5.11635238e-03 -1.14738141e-03  1.36763905e-01
 -2.27775782e-01 -1.85442180e-01 -1.87509270e-02 -5.54853352e-02
 -1.27737533e-01  2.84851237e-02  1.65413846e-01  1.48681863e-02
  3.52207970e-02 -1.42752821e-02  4.57864519e-02  7.56467018e-03
 -1.71225189e-02 -9.05015959e-03  8.18995660e-03  4.44231310e-03
  1.10700590e-03 -1.31718550e-03 -2.56692003e-03  1.04374346e+00
 -1.47704443e-02  7.99560317e-01  8.34982259e-02 -9.10234252e-03
  4.48020616e-01  3.1880

In [16]:
poi_difference = []

In [17]:
for k in range(num_executions):
    for l in range(1, num_thresholds-1):
        pruned_output_params_dict = output_params_all_specs[l][k]
        parameter_names = pruned_output_params_dict.keys()
        
        pruned_output_params = np.fromiter(pruned_output_params_dict.values(), float)
        original_output_params = np.array([output_params_all_specs[0][k][name] for name in parameter_names])
        print(np.abs(pruned_output_params - original_output_params))
        #print(pruned_output_params)
        #print(original_output_params)
    print("-------------------------------")

[1.69969031e-05 9.55258755e-06 6.47759770e-05 7.21676631e-05
 1.31715649e-03 4.05718311e-02 3.43568082e-02 4.98673515e-03
 1.31715648e-03 3.85448387e-03 1.65067903e-03 2.28217989e-03
 2.98986820e-02 1.48907850e-02 1.20115792e-02 7.18774584e-04
 1.31715658e-03 1.31715657e-03 2.32399213e-03 6.22731701e-03
 2.97444876e-03 9.91701063e-03 2.00502268e-03 1.61595507e-03
 2.65460878e-03 3.12110950e-03 2.57744156e-03 4.44620865e-03
 1.99389859e-03 5.00088905e-03 3.88618173e-04 7.05475671e-03
 4.95367370e-03 4.83920518e-03 5.19117009e-04 4.14603303e-03
 4.30008734e-02 2.88855668e-02 1.78932563e-02 2.64050234e-02
 1.05720576e-02 1.25929719e-03 3.15837779e-02 1.28032769e-03
 3.13074188e-04 8.27974141e-03 2.08990871e-02 7.11095599e-03
 4.40157039e-03 1.31716060e-03 2.87966124e-03 2.24723460e-05
 5.05089137e-04 1.81695038e-04 1.93088221e-03 7.03623503e-05
 3.30875587e-03 2.68271185e-03 7.10944622e-03 1.14732501e-03
 2.08255305e-04 7.84956396e-04 1.58515970e-04 3.55572906e-04
 9.83949517e-04 1.853442

In [18]:
results = {"num_executions" : num_executions, "num_thresholds" : num_thresholds, "pruning_thresholds" : pruning_thresholds, "average_exec_times" : average_exec_times, "exec_times" : exec_times, "output_params_all_specs" : output_params_all_specs }

In [19]:
output_file = open(path + filename[:-5:] + "_PA_results.json", "w")
json.dump(results, output_file, indent=4)
output_file.close()