In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import RandomizedSearchCV, cross_val_score
from scipy.stats import uniform

import weakref 



import uproot


#To save some memory we will delete unused variables
class TestClass(object): 
    def check(self): 
        print ("object is alive!") 
    def __del__(self): 
        print ("object deleted") 
        
from concurrent.futures import ThreadPoolExecutor
executor = ThreadPoolExecutor(8)

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score


from sklearn.preprocessing import StandardScaler

import gc

In [8]:
df_scaled = uproot.open('scaled_data_uproot.root:t1').arrays(library='pd')

In [26]:
# The following columns will be used to predict whether a reconstructed candidate is a lambda particle or not
#cuts = [ 'chi2geo', 'chi2primneg', 'chi2primpos', 'distance', 'ldl']
cuts = [ 'chi2geo', 'chi2primneg', 'chi2primpos', 'chi2topo',
       'cosineneg', 'cosinepos', 'cosinetopo', 'distance', 'l', 'ldl']

x = df_scaled[cuts].to_numpy()

# The MC information is saved in this y variable
y = df_scaled['issignal'].to_numpy()

In [27]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=324)

In [32]:
from DirectRanker import DirectRanker
aa = DirectRanker(num_features = len(x_train[0]), epoch=2, validation_size=0.2, verbose=10)

In [33]:
aa.fit(x_train,y_train)

Epoch 1/2


2021-10-26 19:29:41.879976: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: None/2021-10-26 19:29:05.984168_save_model_epoch_1/assets




Epoch 2/2
INFO:tensorflow:Assets written to: None/2021-10-26 19:29:05.984168_save_model_epoch_2/assets




In [34]:
bb = aa.predict_proba(x_test)

In [35]:
from sklearn.metrics import roc_auc_score

In [37]:
from sklearn.model_selection import StratifiedKFold, train_test_split, PredefinedSplit, GridSearchCV

In [41]:
def weighted_auc(estimator, x, y_true,  w_0=1, w_1=1 ):
    y_pred = estimator.predict_proba(x)
    w = [w_0  if yi == 1 else w_1  for yi in y_true]
    return roc_auc_score(y_true,y_pred,sample_weight=w)

In [42]:
def auc(estimator, x, y_true):
    return weighted_auc(estimator, x, y_true,  w_0=1, w_1=1)

In [43]:
scoring = {'auc': auc}

cv_split = StratifiedKFold(n_splits=2, shuffle=True, random_state=32)
refit = 'auc'
parameters = {'hidden_layers_dr':[[32,16],[64,32,16]], 'learning_rate':[0.001, 0.01]}

cv = GridSearchCV(aa,
                  param_grid=parameters,
                  verbose=10,
                  scoring=scoring,
                  cv=cv_split,
                  refit=refit,
                  n_jobs=6,
                  return_train_score=True)
cv.fit(x_train, y_train)
best_estimator = cv.best_estimator_

Fitting 2 folds for each of 4 candidates, totalling 8 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
2021-10-26 19:42:41.100409: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-26 19:42:41.101596: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.
2021-10-26 19:42:41.219443: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-26

[Parallel(n_jobs=6)]: Done   2 out of   8 | elapsed:   16.0s remaining:   48.0s
[Parallel(n_jobs=6)]: Done   3 out of   8 | elapsed:   17.0s remaining:   28.4s
[Parallel(n_jobs=6)]: Done   4 out of   8 | elapsed:   18.6s remaining:   18.6s
[Parallel(n_jobs=6)]: Done   5 out of   8 | elapsed:   20.9s remaining:   12.5s
[Parallel(n_jobs=6)]: Done   6 out of   8 | elapsed:   20.9s remaining:    7.0s
[Parallel(n_jobs=6)]: Done   8 out of   8 | elapsed:   27.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   8 out of   8 | elapsed:   27.4s finished


Epoch 1/2
INFO:tensorflow:Assets written to: None/2021-10-26 19:43:06.024789_save_model_epoch_1/assets




Epoch 2/2
INFO:tensorflow:Assets written to: None/2021-10-26 19:43:06.024789_save_model_epoch_2/assets




In [46]:
print("AUC: {}".format(auc(best_estimator, x_test, y_test)))

AUC: 0.9880386540561741
[CV] hidden_layers_dr=[32, 16], learning_rate=0.001 ..................
Epoch 1/2
Epoch 2/2
[CV]  hidden_layers_dr=[32, 16], learning_rate=0.001, auc=(train=0.987, test=0.987), total=  12.8s
[CV] hidden_layers_dr=[32, 16], learning_rate=0.01 ...................
Epoch 1/2
Epoch 2/2
[CV]  hidden_layers_dr=[32, 16], learning_rate=0.01, auc=(train=0.964, test=0.964), total=  13.9s
[CV] hidden_layers_dr=[64, 32, 16], learning_rate=0.001 ..............
Epoch 1/2
Epoch 2/2
[CV]  hidden_layers_dr=[64, 32, 16], learning_rate=0.001, auc=(train=0.982, test=0.981), total=  16.3s
[CV] hidden_layers_dr=[64, 32, 16], learning_rate=0.001 ..............
Epoch 1/2
Epoch 2/2
[CV]  hidden_layers_dr=[64, 32, 16], learning_rate=0.001, auc=(train=0.988, test=0.989), total=  16.4s
[CV] hidden_layers_dr=[32, 16], learning_rate=0.001 ..................
Epoch 1/2
Epoch 2/2
[CV]  hidden_layers_dr=[32, 16], learning_rate=0.001, auc=(train=0.988, test=0.989), total=  12.4s
[CV] hidden_layers_