In [16]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from deap import base, creator, tools, algorithms
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
import warnings
from pathos.multiprocessing import ProcessingPool as Pool
warnings.filterwarnings("ignore")

In [17]:
dataset = pd.read_csv('sim_with_razor2.csv')

X = dataset[['MET', 'Rsq']].values  
y = dataset['Dark Photon Produced'].astype(int).values  

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [18]:
base_cls = DecisionTreeClassifier(max_depth=2)
def evalModel(individual):
    n_estimators, learning_rate = individual
    learning_rate = learning_rate / 10

    ada_boost = AdaBoostClassifier(base_estimator=base_cls, n_estimators=int(n_estimators), learning_rate=learning_rate)
    
    ada_boost.fit(X_train_scaled, y_train)
    
    predictions = ada_boost.predict(X_val_scaled)

    accuracy = accuracy_score(y_val, predictions)
    return (accuracy,)

In [19]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_n_estimators", np.random.randint, 50, 400)
toolbox.register("attr_learning_rate", np.random.randint, 1, 10)
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_n_estimators, toolbox.attr_learning_rate), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evalModel)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=[10, 1], up=[400, 50], indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)

# toolbox.register("select", tools.selRoulette) # option to make things more interesting if needed

In [20]:
population = toolbox.population(n=50)
hof = tools.HallOfFame(1)  # Hall of Fame to store the best individual
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)


result = algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=10, stats=stats, halloffame=hof, verbose=True)

gen	nevals	avg     	std        	min     	max     
0  	2     	0.999951	9.80392e-06	0.999941	0.999961
1  	0     	0.999961	0          	0.999961	0.999961
2  	2     	0.999961	0          	0.999961	0.999961
3  	2     	0.999956	4.90196e-06	0.999951	0.999961


In [21]:
best_hyperparams = hof.items[0]
print(f"Optimized Hyperparameters for AdaBoost:")
print(f"  n_estimators: {best_hyperparams[0]}, learning_rate: {best_hyperparams[1]/10}")

Optimized Hyperparameters for AdaBoost:
  n_estimators: 391, learning_rate: 0.1


In [22]:
ada_boost = AdaBoostClassifier(base_estimator=base_cls, n_estimators=int(best_hyperparams[0]), learning_rate=(best_hyperparams[1]/10))
ada_boost.fit(X_train_scaled, y_train)

In [23]:
X_random_sample = dataset[['MET', 'Rsq']].values
y_random_sample = dataset['Dark Photon Produced'].astype(int).values

X_random_sample_scaled = scaler.transform(X_random_sample)

random_sample_predictions = ada_boost.predict(X_random_sample_scaled)

accuracy_random_sample = accuracy_score(y_random_sample, random_sample_predictions)
print(f"Accuracy of AdaBoost model on entire data set: {accuracy_random_sample}")

Accuracy of AdaBoost model on entire data set: 0.9999921568627451


In [24]:
dark_photon_instances = dataset[dataset['Dark Photon Produced'] == 1]
X_dark_photon = dark_photon_instances[['MET', 'Rsq']].values
y_dark_photon = dark_photon_instances['Dark Photon Produced'].astype(int).values

X_dark_photon_scaled = scaler.transform(X_dark_photon)
dark_photon_predictions = ada_boost.predict(X_dark_photon_scaled)

accuracy_dark_photon = accuracy_score(y_dark_photon, dark_photon_predictions)
print(f"Accuracy of AdaBoost model on dark photon produced instances: {accuracy_dark_photon}")
print("Predictions on dark photon produced instances:", dark_photon_predictions)

Accuracy of AdaBoost model on dark photon produced instances: 0.8518518518518519
Predictions on dark photon produced instances: [1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 1]


In [25]:
for i, (features, label) in enumerate(zip(X_dark_photon, y_dark_photon)):
    features_scaled = scaler.transform([features])
    prediction = ada_boost.predict(features_scaled)
    prediction_result = "Yes" if prediction[0] == 1 else "No"
    actual_result = "Yes" if label == 1 else "No"    
    print(f"Data Point {i+1}:")
    print(f"  Prediction for Dark Photon Produced: {prediction_result}")
    print(f"  Actual: {actual_result}\n")

Data Point 1:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 2:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 3:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 4:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 5:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 6:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 7:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 8:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 9:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 10:
  Prediction for Dark Photon Produced: No
  Actual: Yes

Data Point 11:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 12:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 13:
  Prediction for Dark Photon Produced: Yes
  Actual: Yes

Data Point 14:
  Prediction for Dark Photon Produced: Yes
  A