Skip to content

Commit

Permalink
Minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Michela Paganini committed Oct 25, 2016
1 parent a4b0605 commit d70d7ab
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 9 deletions.
5 changes: 3 additions & 2 deletions bbyy_jet_classifier/plotting/plot_asimov.py
Expand Up @@ -2,9 +2,10 @@
import matplotlib
import cPickle
import numpy as np
import os
import plot_atlas

def bdt_old_ratio(data, strategy, baseline_strategy, lower_bound):
def bdt_old_ratio(data, category, strategy, baseline_strategy, lower_bound):

plot_atlas.set_style()
figure = plt.figure(figsize=(6, 6), dpi=100)
Expand Down Expand Up @@ -34,5 +35,5 @@ def bdt_old_ratio(data, strategy, baseline_strategy, lower_bound):
plt.ylim(ymin=0.2, ymax=2.8)

plt.legend(loc='upper left')
plt.savefig('threshold_ratio_{}.pdf'.format(strategy))
plt.savefig(os.path.join('output', 'threshold_ratio_{}_{}.pdf'.format(strategy, category)))
plt.close(figure)
2 changes: 1 addition & 1 deletion bbyy_jet_classifier/strategies/root_tmva.py
Expand Up @@ -47,7 +47,7 @@ def train(self, train_data, classification_variables, variable_dict, sample_name
#-- Define methods:
# ["NTrees=200", "MinNodeSize=0.1", "MaxDepth=6", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=IgnoreNegWeightsInTraining"]
factory.BookMethod(TMVA.Types.kBDT, "BDT", ":".join(
["NTrees=300", "MinNodeSize=0.01", "MaxDepth=8", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=Pray"]
["NTrees=300", "MinNodeSize=0.01", "MaxDepth=15", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=IgnoreNegWeightsInTraining"]# "NegWeightTreatment=Pray"]
))

# -- Have we considered using a Fisher classifier?
Expand Down
12 changes: 8 additions & 4 deletions bbyy_jet_classifier/strategies/skl_BDT.py
Expand Up @@ -43,8 +43,12 @@ def train(self, train_data, classification_variables, variable_dict, sample_name
fit_params = {"sample_weight":train_data["w"]}
# Run grid search over provided ranges
logging.getLogger("skl_BDT").info("Running grid search parameter optimisation...")
grid_search = GridSearchCV(estimator=GradientBoostingClassifier(learning_rate=0.2, min_samples_leaf=50, max_features="sqrt", subsample=0.8, random_state=10),
param_grid=parameters, fit_params=fit_params, scoring="roc_auc", n_jobs=1, iid=False, cv=3, verbose=1)
grid_search = GridSearchCV(
estimator=GradientBoostingClassifier(
learning_rate=0.2, min_samples_leaf=50, max_features="sqrt", subsample=0.8, random_state=10
),
param_grid=parameters, fit_params=fit_params, scoring="roc_auc", n_jobs=-1, iid=False, cv=3, verbose=1
)
grid_search.fit(train_data["X"], train_data["y"])
for param_name in parameters.keys():
if grid_search.best_params_[param_name] in [ parameters[param_name][0], parameters[param_name][-1] ]:
Expand All @@ -55,8 +59,8 @@ def train(self, train_data, classification_variables, variable_dict, sample_name

else:
classifier = GradientBoostingClassifier(
n_estimators=5, # was n_estimators=300
max_depth=6, # was max_depth=15
n_estimators=300, # was n_estimators=300
max_depth=10, # was max_depth=15
min_samples_leaf=40, # was min_samples_split=0.5 * len(train_data["y"])
verbose=1
)
Expand Down
4 changes: 2 additions & 2 deletions evaluate_event_performance.py
Expand Up @@ -93,11 +93,11 @@ def main(strategy, category, lower_bound, intervals):

# -- Write dictionary of Asimov significances to disk
utils.ensure_directory(os.path.join("output", "pickles"))
with open(os.path.join("output", "pickles", "multi_proc_{}.pkl".format(strategy)), "wb") as f:
with open(os.path.join("output", "pickles", "multi_proc_{}_{}.pkl".format(strategy, category)), "wb") as f:
cPickle.dump(asimov_dict, f)

# -- Plot Z_BDT/Z_old for different threshold values
plot_asimov.bdt_old_ratio(asimov_dict, strategy, 'mHmatch', lower_bound)
plot_asimov.bdt_old_ratio(asimov_dict, category, strategy, 'mHmatch', lower_bound)

# -- Print Asimov significance for different strategies and different samples in tabular form
# Each table corresponds to a different threshold value
Expand Down
6 changes: 6 additions & 0 deletions requirements.txt
@@ -0,0 +1,6 @@
joblib==0.10.2
matplotlib==1.5.2
numpy==1.11.1
root_numpy==4.4.0
rootpy==0.8.1
scikit_learn==0.18

0 comments on commit d70d7ab

Please sign in to comment.