Minor changes

mickypaganini · Oct 25, 2016 · d70d7ab · d70d7ab
1 parent a4b0605
commit d70d7ab
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 9 deletions.
diff --git a/bbyy_jet_classifier/plotting/plot_asimov.py b/bbyy_jet_classifier/plotting/plot_asimov.py
@@ -2,9 +2,10 @@
 import matplotlib
 import cPickle
 import numpy as np
+import os
 import plot_atlas
 
-def bdt_old_ratio(data, strategy, baseline_strategy, lower_bound):
+def bdt_old_ratio(data, category, strategy, baseline_strategy, lower_bound):
 
     plot_atlas.set_style()
     figure = plt.figure(figsize=(6, 6), dpi=100)
@@ -34,5 +35,5 @@ def bdt_old_ratio(data, strategy, baseline_strategy, lower_bound):
     plt.ylim(ymin=0.2, ymax=2.8)
 
     plt.legend(loc='upper left')
-    plt.savefig('threshold_ratio_{}.pdf'.format(strategy))
+    plt.savefig(os.path.join('output', 'threshold_ratio_{}_{}.pdf'.format(strategy, category)))
     plt.close(figure)
diff --git a/bbyy_jet_classifier/strategies/root_tmva.py b/bbyy_jet_classifier/strategies/root_tmva.py
@@ -47,7 +47,7 @@ def train(self, train_data, classification_variables, variable_dict, sample_name
         #-- Define methods:
         # ["NTrees=200", "MinNodeSize=0.1", "MaxDepth=6", "BoostType=Grad", "SeparationType=GiniIndex",  "NegWeightTreatment=IgnoreNegWeightsInTraining"]
         factory.BookMethod(TMVA.Types.kBDT, "BDT", ":".join(
-            ["NTrees=300", "MinNodeSize=0.01", "MaxDepth=8", "BoostType=Grad", "SeparationType=GiniIndex",  "NegWeightTreatment=Pray"]
+            ["NTrees=300", "MinNodeSize=0.01", "MaxDepth=15", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=IgnoreNegWeightsInTraining"]#  "NegWeightTreatment=Pray"]
         ))
 
         # -- Have we considered using a Fisher classifier?

diff --git a/bbyy_jet_classifier/strategies/skl_BDT.py b/bbyy_jet_classifier/strategies/skl_BDT.py
@@ -43,8 +43,12 @@ def train(self, train_data, classification_variables, variable_dict, sample_name
             fit_params = {"sample_weight":train_data["w"]}
             # Run grid search over provided ranges
             logging.getLogger("skl_BDT").info("Running grid search parameter optimisation...")
-            grid_search = GridSearchCV(estimator=GradientBoostingClassifier(learning_rate=0.2, min_samples_leaf=50, max_features="sqrt", subsample=0.8, random_state=10),
-                                       param_grid=parameters, fit_params=fit_params, scoring="roc_auc", n_jobs=1, iid=False, cv=3, verbose=1)
+            grid_search = GridSearchCV(
+                estimator=GradientBoostingClassifier(
+                    learning_rate=0.2, min_samples_leaf=50, max_features="sqrt", subsample=0.8, random_state=10
+                ),
+                param_grid=parameters, fit_params=fit_params, scoring="roc_auc", n_jobs=-1, iid=False, cv=3, verbose=1
+            )
             grid_search.fit(train_data["X"], train_data["y"])
             for param_name in parameters.keys():
                 if grid_search.best_params_[param_name] in [ parameters[param_name][0], parameters[param_name][-1] ]:
@@ -55,8 +59,8 @@ def train(self, train_data, classification_variables, variable_dict, sample_name
 
         else:
             classifier = GradientBoostingClassifier(
-                n_estimators=5, # was n_estimators=300
-                max_depth=6, # was max_depth=15
+                n_estimators=300, # was n_estimators=300
+                max_depth=10, # was max_depth=15
                 min_samples_leaf=40, # was min_samples_split=0.5 * len(train_data["y"])
                 verbose=1
                 )

diff --git a/evaluate_event_performance.py b/evaluate_event_performance.py
@@ -93,11 +93,11 @@ def main(strategy, category, lower_bound, intervals):
 
     # -- Write dictionary of Asimov significances to disk
     utils.ensure_directory(os.path.join("output", "pickles"))
-    with open(os.path.join("output", "pickles", "multi_proc_{}.pkl".format(strategy)), "wb") as f:
+    with open(os.path.join("output", "pickles", "multi_proc_{}_{}.pkl".format(strategy, category)), "wb") as f:
         cPickle.dump(asimov_dict, f)
 
     # -- Plot Z_BDT/Z_old for different threshold values
-    plot_asimov.bdt_old_ratio(asimov_dict, strategy, 'mHmatch', lower_bound)
+    plot_asimov.bdt_old_ratio(asimov_dict, category, strategy, 'mHmatch', lower_bound)
 
     # -- Print Asimov significance for different strategies and different samples in tabular form
     #    Each table corresponds to a different threshold value

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+joblib==0.10.2
+matplotlib==1.5.2
+numpy==1.11.1
+root_numpy==4.4.0
+rootpy==0.8.1
+scikit_learn==0.18