From d70d7abc0d51f01b5bdd43a1bd01ce624d2141d0 Mon Sep 17 00:00:00 2001 From: Michela Paganini Date: Tue, 25 Oct 2016 00:06:15 -0700 Subject: [PATCH] Minor changes --- bbyy_jet_classifier/plotting/plot_asimov.py | 5 +++-- bbyy_jet_classifier/strategies/root_tmva.py | 2 +- bbyy_jet_classifier/strategies/skl_BDT.py | 12 ++++++++---- evaluate_event_performance.py | 4 ++-- requirements.txt | 6 ++++++ 5 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 requirements.txt diff --git a/bbyy_jet_classifier/plotting/plot_asimov.py b/bbyy_jet_classifier/plotting/plot_asimov.py index adea08b..043ed6d 100644 --- a/bbyy_jet_classifier/plotting/plot_asimov.py +++ b/bbyy_jet_classifier/plotting/plot_asimov.py @@ -2,9 +2,10 @@ import matplotlib import cPickle import numpy as np +import os import plot_atlas -def bdt_old_ratio(data, strategy, baseline_strategy, lower_bound): +def bdt_old_ratio(data, category, strategy, baseline_strategy, lower_bound): plot_atlas.set_style() figure = plt.figure(figsize=(6, 6), dpi=100) @@ -34,5 +35,5 @@ def bdt_old_ratio(data, strategy, baseline_strategy, lower_bound): plt.ylim(ymin=0.2, ymax=2.8) plt.legend(loc='upper left') - plt.savefig('threshold_ratio_{}.pdf'.format(strategy)) + plt.savefig(os.path.join('output', 'threshold_ratio_{}_{}.pdf'.format(strategy, category))) plt.close(figure) diff --git a/bbyy_jet_classifier/strategies/root_tmva.py b/bbyy_jet_classifier/strategies/root_tmva.py index b228676..cc484fd 100644 --- a/bbyy_jet_classifier/strategies/root_tmva.py +++ b/bbyy_jet_classifier/strategies/root_tmva.py @@ -47,7 +47,7 @@ def train(self, train_data, classification_variables, variable_dict, sample_name #-- Define methods: # ["NTrees=200", "MinNodeSize=0.1", "MaxDepth=6", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=IgnoreNegWeightsInTraining"] factory.BookMethod(TMVA.Types.kBDT, "BDT", ":".join( - ["NTrees=300", "MinNodeSize=0.01", "MaxDepth=8", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=Pray"] + ["NTrees=300", "MinNodeSize=0.01", "MaxDepth=15", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=IgnoreNegWeightsInTraining"]# "NegWeightTreatment=Pray"] )) # -- Have we considered using a Fisher classifier? diff --git a/bbyy_jet_classifier/strategies/skl_BDT.py b/bbyy_jet_classifier/strategies/skl_BDT.py index 8d67ccc..b41f8b6 100644 --- a/bbyy_jet_classifier/strategies/skl_BDT.py +++ b/bbyy_jet_classifier/strategies/skl_BDT.py @@ -43,8 +43,12 @@ def train(self, train_data, classification_variables, variable_dict, sample_name fit_params = {"sample_weight":train_data["w"]} # Run grid search over provided ranges logging.getLogger("skl_BDT").info("Running grid search parameter optimisation...") - grid_search = GridSearchCV(estimator=GradientBoostingClassifier(learning_rate=0.2, min_samples_leaf=50, max_features="sqrt", subsample=0.8, random_state=10), - param_grid=parameters, fit_params=fit_params, scoring="roc_auc", n_jobs=1, iid=False, cv=3, verbose=1) + grid_search = GridSearchCV( + estimator=GradientBoostingClassifier( + learning_rate=0.2, min_samples_leaf=50, max_features="sqrt", subsample=0.8, random_state=10 + ), + param_grid=parameters, fit_params=fit_params, scoring="roc_auc", n_jobs=-1, iid=False, cv=3, verbose=1 + ) grid_search.fit(train_data["X"], train_data["y"]) for param_name in parameters.keys(): if grid_search.best_params_[param_name] in [ parameters[param_name][0], parameters[param_name][-1] ]: @@ -55,8 +59,8 @@ def train(self, train_data, classification_variables, variable_dict, sample_name else: classifier = GradientBoostingClassifier( - n_estimators=5, # was n_estimators=300 - max_depth=6, # was max_depth=15 + n_estimators=300, # was n_estimators=300 + max_depth=10, # was max_depth=15 min_samples_leaf=40, # was min_samples_split=0.5 * len(train_data["y"]) verbose=1 ) diff --git a/evaluate_event_performance.py b/evaluate_event_performance.py index a216abc..e0b1d5d 100755 --- a/evaluate_event_performance.py +++ b/evaluate_event_performance.py @@ -93,11 +93,11 @@ def main(strategy, category, lower_bound, intervals): # -- Write dictionary of Asimov significances to disk utils.ensure_directory(os.path.join("output", "pickles")) - with open(os.path.join("output", "pickles", "multi_proc_{}.pkl".format(strategy)), "wb") as f: + with open(os.path.join("output", "pickles", "multi_proc_{}_{}.pkl".format(strategy, category)), "wb") as f: cPickle.dump(asimov_dict, f) # -- Plot Z_BDT/Z_old for different threshold values - plot_asimov.bdt_old_ratio(asimov_dict, strategy, 'mHmatch', lower_bound) + plot_asimov.bdt_old_ratio(asimov_dict, category, strategy, 'mHmatch', lower_bound) # -- Print Asimov significance for different strategies and different samples in tabular form # Each table corresponds to a different threshold value diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7df5f15 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +joblib==0.10.2 +matplotlib==1.5.2 +numpy==1.11.1 +root_numpy==4.4.0 +rootpy==0.8.1 +scikit_learn==0.18