In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from deap import base, creator, tools
import random
import pandas as pd
from deap import algorithms

In [16]:
dataset = pd.read_csv('sim_with_razor.csv')

X = dataset[['MET', 'Rsq']].values  
y = dataset['Dark Photon Produced'].astype(int).values  

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [17]:
def decode_individual(individual):
    model_type = individual[0]
    parameter = individual[1]
    model_info = {'type': None, 'params': {}}

    if model_type == 0:  # Logistic Regression
        model_info['type'] = 'LogisticRegression'
        C_value = 0.01 + (parameter - 1) * (100 - 0.01) / (10 - 1)
        model_info['params']['C'] = C_value

    elif model_type == 1:  # Decision Tree
        model_info['type'] = 'DecisionTreeClassifier'
        model_info['params']['max_depth'] = parameter

    elif model_type == 2:  # Random Forest
        model_info['type'] = 'RandomForestClassifier'
        model_info['params']['n_estimators'] = parameter
        model_info['params']['max_depth'] = 5  # Example fixed value

    elif model_type == 3:  # Neural Network
        model_info['type'] = 'NeuralNetworkClassifier'
        if parameter == 1:
            layers_config = [64]
        elif parameter == 2:
            layers_config = [64, 32] 
        else:
            layers_config = [64, 32, 16]
        model_info['params']['layers'] = layers_config

    return model_info


In [18]:
def train_model(decoded_individual):
    model_type = decoded_individual['type']
    params = decoded_individual['params']
    model = None

    if model_type == 'LogisticRegression':
        model = LogisticRegression(**params)
        model.fit(X_train_scaled, y_train)

    elif model_type == 'DecisionTreeClassifier':
        model = DecisionTreeClassifier(**params)
        model.fit(X_train, y_train)

    elif model_type == 'RandomForestClassifier':
        model = RandomForestClassifier(**params)
        model.fit(X_train, y_train)

    elif model_type == 'NeuralNetworkClassifier':
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(params['layers'][0], input_shape=(X_train_scaled.shape[1],), activation='relu'))
        for neurons in params['layers'][1:]:
            model.add(tf.keras.layers.Dense(neurons, activation='relu'))
        model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Output layer
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        model.fit(X_train_scaled, y_train, epochs=100, batch_size=10, verbose=0)

    # Evaluation
    if model_type in ['LogisticRegression', 'DecisionTreeClassifier', 'RandomForestClassifier']:
        predictions = model.predict(X_val_scaled if model_type == 'LogisticRegression' else X_val)
        accuracy = accuracy_score(y_val, (predictions > 0.5).astype(int) if model_type == 'LogisticRegression' else predictions)
    else:  # Neural Network
        predictions = (model.predict(X_val_scaled) > 0.5).astype(int)
        accuracy = accuracy_score(y_val, predictions.flatten())

    return accuracy

In [19]:
def eval_model(individual):
    accuracy = train_model(decode_individual(individual))
    return (accuracy,)

In [20]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

# Define the individual creation function
def create_individual():
    model_type = random.randint(0, 3)  # 0 to 3 for four model types
    if model_type == 0:  # Logistic Regression
        parameter = random.randint(1, 10)  # C parameter encoded
    elif model_type == 1:  # Decision Tree
        parameter = random.randint(1, 15)  # Max Depth
    elif model_type == 2:  # Random Forest
        parameter = random.randint(10, 100)  # Number of Estimators
    else:  # Neural Network
        parameter = random.randint(1, 3)  # Simplified to number of layers
    return creator.Individual([model_type, parameter])

toolbox.register("individual", create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Define genetic operators
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=[0,1], up=[3,100], indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", eval_model)



In [24]:
population = toolbox.population(n=2)
hof = tools.HallOfFame(1)  # Store the best individual

stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)

result = algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=5, stats=stats, halloffame=hof, verbose=True)

gen	nevals	avg     	min   	max    
0  	2     	0.999925	0.9999	0.99995
1  	2     	0.99995 	0.99995	0.99995
2  	2     	0.99995 	0.99995	0.99995
3  	2     	0.99995 	0.99995	0.99995
4  	2     	0.99995 	0.99995	0.99995
5  	1     	0.99995 	0.99995	0.99995


In [26]:
best_individual = hof.items[0]
best_model_info = decode_individual(best_individual)
best_model_info

{'type': 'RandomForestClassifier',
 'params': {'n_estimators': 78, 'max_depth': 5}}