In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import scipy.optimize
import scipy.stats
from collections import Counter

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [2]:
SEED = 172
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)
PATH = "tracks_100k_updated.txt"

In [3]:
'''
i want to
get some parameters - plot the actual points using those parameters
then i can rescale those parameters, add some noise, restore the scale, and then plot again
calculate the distance
'''

class Dataset(Dataset):
    def __init__(self, path):
        with open(path, 'r') as file:
            content = file.read()
            data_points = content.split('EOT')

            data_points = [dp.strip() for dp in data_points if dp.strip()]
            data_points = [dp.split('\n') for dp in data_points]
            data_points = [[[float(cell) for cell in row.split(', ')] for row in dp] for dp in data_points]
            self.original_targets = np.array([dp[0] for dp in data_points])
            self.scaler = MinMaxScaler()
            self.rescaled_targets = self.scaler.fit_transform(self.original_targets)

    def __len__(self):
        return len(self.rescaled_targets)

    def __getitem__(self, idx):
        original = self.original_targets[idx]
        rescaled = self.rescaled_targets[idx]
        return original, rescaled

In [4]:
d = Dataset(PATH)

In [5]:
d[0]

(array([8.5000e-03, 3.1300e+00, 1.5818e+02, 5.0000e-02, 3.7000e-01]),
 array([0.16634051, 0.49840764, 0.76101492, 0.47786606, 0.62909091]))

In [6]:
original_mins = np.min(d.original_targets, axis=0)
original_maxs = np.max(d.original_targets, axis=0)
print(original_mins)
print(original_maxs)
print(original_maxs - original_mins)

[ 0.    0.   25.01 -4.16 -1.36]
[5.11e-02 6.28e+00 2.00e+02 4.65e+00 1.39e+00]
[5.1100e-02 6.2800e+00 1.7499e+02 8.8100e+00 2.7500e+00]


In [7]:
rescaled_mins = np.min(d.rescaled_targets, axis=0)
rescaled_maxs = np.max(d.rescaled_targets, axis=0)
print(rescaled_mins)
print(rescaled_maxs)
print(rescaled_maxs - rescaled_mins)

[0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]


In [8]:
min_r0=1.0
max_r0=10.0
nlayers=10
sigma=0.01

def track(phi, d0,phi0,pt,dz,tanl):
    alpha = 1/2 # 1/cB
    q=1
    kappa = q/pt
    rho = alpha/kappa
    x = d0*np.cos(phi0) + rho*(np.cos(phi0)-np.cos(phi0+phi))
    y = d0*np.sin(phi0) + rho*(np.sin(phi0)-np.sin(phi0+phi))
    z = dz - rho*tanl*phi
    return x,y,z

def dr(phi, r02,d0,phi0,pt,dz,tanl):

    # get the xyz of the track at this phi
    x,y,z = track(phi, d0,phi0,pt,dz,tanl)
    r2=x*x+y*y

    # get the distance from the target r02
    dr = np.fabs(r2-r02)

    return dr

def find_phi(r0, d0,phi0,pt,dz,tanl):

    # this is lazy, but rather than inverting the equations we just minimize the distance
    res = scipy.optimize.minimize(dr,0,method='Nelder-Mead',args = (r0, d0,phi0,pt,dz,tanl))#, bounds =(0,1.0))

    return res.x[0]


# find the intersections with the detector layers for these track parameters, add noise
def make_hits(params):
    xs=[]
    ys=[]
    zs =[]
    
    for r0 in np.linspace(min_r0,max_r0,nlayers):
        phi0 = find_phi(r0*r0,*params)
        x0,y0,z0 = track(phi0,*params)
        xs.append(x0)
        ys.append(y0)
        zs.append(z0)


    return xs,ys,zs

def chisq(params,x,y,z):
    ihit=0
    c2=0

    # find the hits for the track parameters
    for r0 in np.linspace(min_r0,max_r0,nlayers):
        phi0 = find_phi(r0*r0,*params)
        x0,y0,z0 = track(phi0,*params)
        # calculate deviation from observed hit
        c2 = c2 + (x0-x[ihit])**2 + (y0-y[ihit])**2 + (z0-z[ihit])**2   # assume equal uncertainty in x,y,z
        ihit = ihit+1

    return c2

In [9]:
def calculate_chisq_distance(original_parameters, changed_parameters):
    changed_x, changed_y, changed_z = make_hits(changed_parameters)
    return chisq(original_parameters, changed_x, changed_y, changed_z)

def test_parameter_importance(index, dataset: Dataset, noise=0.01):
    '''
    get original parameter
    minmax scale it
    for each parameter, add noise
    inverse the noise
    calculate the chisq distance
    rank based on importance
    '''
    original_parameters, rescaled_parameters = dataset[index]
    parameter_names = ['d0','phi0','pt','dz','tanl']
    distances = []
    for i in range(len(parameter_names)):
        temp = rescaled_parameters.copy()
        temp[i] += noise
        if temp[i] > 1:
            raise ValueError('out of bound')
        scale_restored_temp = dataset.scaler.inverse_transform([temp])[0]
        distance = calculate_chisq_distance(original_parameters, scale_restored_temp)
        distances.append(distance)
    return sorted(zip(distances, parameter_names), reverse=True)

In [10]:
INDEX = 1

In [11]:
d[INDEX]

(array([ 4.000e-03,  1.080e+00,  5.175e+01, -2.130e+00,  1.700e-01]),
 array([0.07827789, 0.17197452, 0.15280873, 0.23041998, 0.55636364]))

In [13]:
test_parameter_importance(INDEX, d, 0.1)

[(146.91319778402934, 'phi0'),
 (29.35336308583485, 'tanl'),
 (7.761610000000012, 'dz'),
 (0.6176208585327747, 'pt'),
 (0.00025021688112549347, 'd0')]

In [14]:
counter = Counter()

for i in range(len(d)):
    try:
        result = test_parameter_importance(i, d, 0.1)
    except:
        continue
    ordering = tuple(element[1] for element in result)
    counter[ordering] += 1
    if i > 2000:
        break
    

In [15]:
counter

Counter({('phi0', 'tanl', 'dz', 'pt', 'd0'): 1619})