In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import dama as dm
#import tensorflow as tf
#import tensorflow_addons as tfa

from matplotlib.colors import LogNorm
from scipy import stats
from scipy.optimize import minimize
from scipy.optimize import curve_fit
from multiprocessing import Process, Pool

In [None]:
params = {'legend.fontsize': 17,
          'figure.figsize': (15, 9.3),
          'axes.labelsize': 24,
          'axes.titlesize': 24,
          'xtick.labelsize': 22,
          'ytick.labelsize': 22}
plt.rcParams.update(params)

par_names = ['x', 'y', 't', 'E', 'azi']
data_path = '../../../freedom/resources/toy_data/'
plot_path = '../../../plots/toy_model/'

def correct_azi(azi):
    azi = np.where(azi<-np.pi, azi+2*np.pi, azi)
    return np.where(azi>np.pi, azi-2*np.pi, azi)

## Create events

In [None]:
N = 10000
#events2, Truth2 = toy_experiment.generate_events(N, xlims=(-12,12), blims=(-12,12), N_lims=(3,40))
#Truth2 = np.insert(Truth2, 2, 0, axis=1)

events = np.load('../../../freedom/resources/toy_data/toy_events_test.npy', allow_pickle=True)[:N]
Truth = np.load('../../../freedom/resources/toy_data/toy_truth_test.npy')[:N]

In [None]:
#events = np.concatenate([events, events2])
#Truth = np.concatenate([Truth, Truth2])

#np.save('../../../freedom/resources/toy_data/toy_events_test', events)
#np.save('../../../freedom/resources/toy_data/toy_truth_test', Truth)

## Reco events

In [None]:
bounds = np.array([[-12,12], [-12,12], [-5,5], [3,40], [0, 2*np.pi]])

#### true LLH

In [None]:
from freedom.toy_model import advanced_toy_model
detectors = np.vstack([np.repeat(np.linspace(-10, 10, 5), 5), np.tile(np.linspace(-10, 10, 5), 5)]).T
toy_experiment = advanced_toy_model.advanced_toy_experiment(detectors=detectors, isotrop=False)

def LLH(X, event, only_c=False, only_h=False, fix=[None], bounds=bounds):
    #X: hypo_x, hypo_y, hypo_t, hypo_N_src, hypo_ang
    assert only_c + only_h < 2
    
    if fix[0] != None:
        X = np.insert(X, fix[0], fix[1])
        
    if ~np.alltrue(np.logical_and(bounds[:,0] <= X, X <= bounds[:,1]), axis=-1):
        return 1e9
    
    pos = np.array([X[0], X[1]])
    c_term = -toy_experiment.charge_term(event[0], pos, X[3], X[4])
    h_term = -toy_experiment.hit_term(event[1], pos, X[2], X[3], X[4])
    
    if only_c: return c_term
    if only_h: return h_term
    return c_term + h_term

In [None]:
def fit_event(event):
    event, Truth = event
    
    # seeding
    seed = np.random.normal(Truth, [2,2,1,5,0.5])
    seed = np.maximum(seed, bounds[:, 0])
    seed = np.minimum(seed, bounds[:, 1])
    
    # free fit
    args = (np.array(event), False, False, [None])
    fit_res = minimize(LLH, seed, method='Nelder-Mead', args=args)
    out = fit_res.x
    
    # 1 param fixed fit
    for i in range(5):
        args, mask = (np.array(event), False, False, [i, Truth[i]]), [x for x in range(5) if x != i]
        m = minimize(LLH, fit_res.x[mask], method='Nelder-Mead', args=args) #seed
        dllh = m.fun-fit_res.fun
        out = np.append(out, dllh)
    
    return list(out)

In [None]:
%%time
with Pool(6) as p:
    outs = p.map(fit_event, zip(events, Truth))

In [None]:
#np.save('../../../freedom/resources/toy_data/toy_reco_trueLLH_NM_randomSeed', outs)

#### NN LLH

In [None]:
import math

from functools import partial
from freedom.llh_service.llh_service import LLHService
from freedom.llh_service.llh_client import LLHClient
from freedom.reco import crs_reco

In [None]:
loc = '../../../freedom/resources/models/toy/'
service_conf = {
        "poll_timeout": 1,
        "flush_period": 1,
        "n_hypo_params": 5,
        "n_hit_features": 3,
        "n_evt_features": 2,
        "batch_size" : {
          "n_hypos": 200,
          "n_observations": 6000, 
        },
        "send_hwm": 10000,
        "recv_hwm": 10000,
        "hitnet_file": loc+'hitNet.hdf5',
        "chargenet_file": loc+'chargeNet_new.hdf5',
}

In [None]:
n_gpus = 4

base_req = "ipc:///tmp/recotestreq"
base_ctrl = "ipc:///tmp/recotestctrl"

req_addrs = []
ctrl_addrs = []
for i in range(n_gpus):
    req_addrs.append(f'{base_req}{i}')
    ctrl_addrs.append(f'{base_ctrl}{i}')
    
procs = []
for i in range(n_gpus):
    proc = Process(target=crs_reco.start_service, args=(service_conf, ctrl_addrs[i], req_addrs[i], i))
    proc.start()
    procs.append(proc)

In [None]:
def fit_events(events, index, Truths, ctrl_addrs, truth_seed=False, method='Nelder-Mead'):
    outputs = []

    client = LLHClient(ctrl_addr=ctrl_addrs[index], conf_timeout=60000)
    def eval_llh(params, event, fix=[None]):
        if fix[0] != None:
            params = np.insert(params, fix[0], fix[1])
            
        if ~np.alltrue(np.logical_and(bounds[:,0] <= params, params <= bounds[:,1]), axis=-1):
            return 1e9
        
        return client.eval_llh(event[1][:, :3], event[0], params)

    for j, event in enumerate(events):
        # seeding
        if truth_seed:
            seed = Truths[j]
        else:
            seed = np.random.normal(Truths[j], [2,2,1,5,0.5])
        seed = np.maximum(seed, bounds[:, 0])
        seed = np.minimum(seed, bounds[:, 1])
        
        #free fit
        args = (event, [None])
        fit_res = minimize(eval_llh, seed, method=method, args=args)
        out = fit_res.x
        
        # 1 param fixed fit
        for i in range(5):
            args, mask = (event, [i, Truths[j][i]]), [x for x in range(5) if x != i]
            m = minimize(eval_llh, fit_res.x[mask], method=method, args=args) #seed
            dllh = m.fun-fit_res.fun
            out = np.append(out, dllh)
            
        outputs.append(out)

    return outputs

In [None]:
events_to_process = len(events) #1000
pool_size = 200
evts_per_proc = int(math.ceil(events_to_process/pool_size))
evt_splits = [events[i*evts_per_proc:(i+1)*evts_per_proc] for i in range(pool_size)]
true_splits = [Truth[i*evts_per_proc:(i+1)*evts_per_proc] for i in range(pool_size)]
print(sum(len(l) for l in evt_splits))

gpu_inds = np.arange(pool_size) % n_gpus

fit_events_partial = partial(
        fit_events,
        ctrl_addrs=ctrl_addrs,
        truth_seed=True,
        #method='Powell'
)

In [None]:
%%time
# reconstruct with a worker pool; one LLH client per worker
with Pool(pool_size) as p:
    outs = p.starmap(fit_events_partial, zip(evt_splits, gpu_inds, true_splits))

all_outs = sum((out for out in outs), [])
all_outs = np.array(all_outs).reshape((events_to_process, 10))

In [None]:
#np.save('../../../freedom/resources/toy_data/toy_reco_nnLLH_NM_truthSeed', all_outs)

In [None]:
# kill all the services
import zmq
for proc, ctrl_addr in zip(procs, ctrl_addrs): 
    with zmq.Context.instance().socket(zmq.REQ) as ctrl_sock:
        ctrl_sock.connect(ctrl_addr)
        ctrl_sock.send_string("die")
        proc.join()

#### plots

In [None]:
Reco, dLLH = np.split(np.load(data_path+'toy_reco_trueLLH_NM_truthSeed.npy'), 2, axis=1)
#Reco, dLLH = np.split(np.load(data_path+'toy_reco_nnLLH_PW_truthSeed.npy'), 2, axis=1)
Reco_NN, dLLH_NN = np.split(np.load(data_path+'toy_reco_nnLLH_NM_truthSeed.npy'), 2, axis=1)
Truth = np.load('../../../freedom/resources/toy_data/toy_truth_test.npy')[:len(Reco_NN)]

In [None]:
def plot_diff(diff, bins, label):
    plt.hist(diff, bins, label=label + ' (std=%.2f, iqr=%.2f)'%(np.std(diff), stats.iqr(diff)), alpha=0.5)
    if 'true' in label:
        o, c = 0.05, 'tab:Blue'
    else:
        o, c = 0.11, 'tab:Orange'

In [None]:
fig = plt.figure(figsize=(20, 12))
plt.suptitle('Reco - Truth', size=20, y=.91)
label1, label2 = 'true LLH', 'NN LLH'

plt.subplot(2,3,1)
plt.axvline(0, color='black', linestyle='--')
plot_diff(Reco[:, 0]-Truth[:, 0], np.linspace(-3,3,50), label1)
plot_diff(Reco_NN[:, 0]-Truth[:, 0], np.linspace(-3,3,50), label2)
plt.legend()
plt.xlabel('x')

plt.subplot(2,3,2)
plt.axvline(0, color='black', linestyle='--')
plot_diff(Reco[:, 1]-Truth[:, 1], np.linspace(-3,3,50), label1)
plot_diff(Reco_NN[:, 1]-Truth[:, 1], np.linspace(-3,3,50), label2)
plt.legend()
plt.xlabel('y')

plt.subplot(2,3,3)
plt.axvline(0, color='black', linestyle='--')
plot_diff(Reco[:, 2]-Truth[:, 2], np.linspace(-1,1,50), label1)
plot_diff(Reco_NN[:, 2]-Truth[:, 2], np.linspace(-1,1,50), label2)
plt.legend()
plt.xlabel('t')

plt.subplot(2,3,4)
plt.axvline(0, color='black', linestyle='--')
plot_diff(Reco[:, 3]-Truth[:, 3], np.linspace(-20,20,50), label1)
plot_diff(Reco_NN[:, 3]-Truth[:, 3], np.linspace(-20,20,50), label2)
plt.legend()
plt.xlabel('E')

plt.subplot(2,3,5)
plt.axvline(0, color='black', linestyle='--')
plot_diff(correct_azi(Reco[:, 4]-Truth[:, 4]), np.linspace(-1,1,50), label1)
plot_diff(correct_azi(Reco_NN[:, 4]-Truth[:, 4]), np.linspace(-1,1,50), label2)
plt.legend()
plt.xlabel('azi')

#plt.savefig(plot_path+'/reco_dists/reco_dists_NM_truthSeed', bbox_inches='tight')

In [None]:
diff = Reco[:, 2]-Truth[:, 2]
plt.hist(diff[np.abs(diff)>1e-2], 100)
np.std(diff[np.abs(diff)>1e-2])

In [None]:
fig = plt.figure(figsize=(20, 12))
label1, label2 = 'true LLH', 'NN LLH'

for i in range(5):
    plt.subplot(2,3,i+1)
    plt.plot([min(Truth[:, i]), max(Truth[:, i])], [min(Truth[:, i]), max(Truth[:, i])], color='black', linestyle='--')
    plt.scatter(Truth[:, i], Reco[:, i], label=label1)
    plt.scatter(Truth[:, i], Reco_NN[:, i], label=label2)
    plt.legend()
    plt.title(par_names[i])
    if i == 4: plt.xlabel('Truth')
#plt.text('Reco')

#plt.savefig(plot_path+'/dLLH/reco_truth_NM_randomSeed', bbox_inches='tight')

## dLLH

In [None]:
i = 0

resi = np.abs(Reco_NN[:,i]-Truth[:,i]) #
var = par_names[i]

dllh = dLLH_NN[:,i] #
dllh_50 = np.percentile(dllh[dllh > 0], 50)
dllh_68 = np.percentile(dllh[dllh > 0], 68)

print(len(dllh[dllh > 0])/len(dllh))

In [None]:
plt.title(var)
plt.hist(dllh, 100) #np.linspace(-20,20,100)
plt.axvline(dllh_68, color='red', linestyle='--', label='68 percentile %.2f'%(dllh_68))
plt.legend()
plt.xlabel(r'$LLH_{4p} - LLH_{5p}$')
plt.ylabel('#events')
plt.yscale('log')
#plt.savefig('../../../plots/toy_model/dLLH/dLLH_'+var+'_nnLLH', bbox='tight') #+'_truthSeeded'

In [None]:
dllh_range = np.logspace(-2, 0.6, 10) #-2, 0.3
dllh_center = (dllh_range[1:]+dllh_range[:-1])/2
medi, mean = [], []
for i in range(len(dllh_range)-1):
    test = resi[(dllh > dllh_range[i]) & (dllh < dllh_range[i+1])]
    if len(test) == 0: test = np.zeros(1)
    mean.append(np.mean(test))
    medi.append(np.median(test))

In [None]:
def func(x, a): #, b
    return a * np.sqrt(x) #+ b

popt_medi, pcov = curve_fit(func, dllh_center, medi)
unc_50_medi = func(dllh_50, popt_medi[0]) #, popt_medi[1]

popt_mean, pcov = curve_fit(func, dllh_center, mean)
unc_68_mean = func(dllh_68, popt_mean[0]) #, popt_mean[1]

popt_medi[0], popt_mean[0]

In [None]:
plt.scatter(dllh, resi)

plt.scatter(dllh_center, mean, color='r', label='means')
plt.plot(dllh_center, func(dllh_center, popt_mean[0]), color='r', label=r'$a\cdot\sqrt{x}$ fit to means')
plt.axvline(dllh_68, color='red', linestyle='--', label='68 percentile (%.2f, %.2f)'%(dllh_68, unc_68_mean))
plt.axhline(unc_68_mean, color='red', linestyle='--')
'''
plt.scatter(dllh_center, medi, color='black', label='medians')
plt.plot(dllh_center, func(dllh_center, popt_medi[0]), color='black', label=r'$a\cdot\sqrt{x}$ fit to medians')
plt.axvline(dllh_50, color='black', linestyle='--', label='50 percentile (%.2f, %.2f)'%(dllh_50, unc_50_medi))
plt.axhline(unc_50_medi, color='black', linestyle='--')
'''
#plt.axvline(dllh_range[0], color='black')
#plt.axvline(dllh_range[-1], color='black')

plt.title(var)
plt.legend()
plt.loglog()
plt.xlim(1e-3, 1e2)
plt.xlabel(r'$LLH_{4p} - LLH_{5p}$')
plt.ylim(1e-3, 15)
plt.ylabel('|reco - true|')
#plt.savefig('../../../plots/toy_model/dLLH/unc_'+var+'_nnLLH', bbox='tight') #+'_truthSeeded'