In [None]:
import kagglehub
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, make_scorer
import time
import random
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, f1_score
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

In [2]:
path = kagglehub.dataset_download("wenruliu/adult-income-dataset")
import os
csv_file_path = None
expected_file = 'adult.csv'
for root, g, files in os.walk(path):
    if expected_file in files:
        csv_file_path = os.path.join(root, expected_file)
        break

In [3]:
df = pd.read_csv(csv_file_path, na_values=['?'])
df.shape

(48842, 15)

In [4]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,,103497,Some-college,10,Never-married,,Own-child,White,Female,0,0,30,United-States,<=50K


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   age              48842 non-null  int64 
 1   workclass        46043 non-null  object
 2   fnlwgt           48842 non-null  int64 
 3   education        48842 non-null  object
 4   educational-num  48842 non-null  int64 
 5   marital-status   48842 non-null  object
 6   occupation       46033 non-null  object
 7   relationship     48842 non-null  object
 8   race             48842 non-null  object
 9   gender           48842 non-null  object
 10  capital-gain     48842 non-null  int64 
 11  capital-loss     48842 non-null  int64 
 12  hours-per-week   48842 non-null  int64 
 13  native-country   47985 non-null  object
 14  income           48842 non-null  object
dtypes: int64(6), object(9)
memory usage: 5.6+ MB


In [6]:
df.isnull().sum()

age                   0
workclass          2799
fnlwgt                0
education             0
educational-num       0
marital-status        0
occupation         2809
relationship          0
race                  0
gender                0
capital-gain          0
capital-loss          0
hours-per-week        0
native-country      857
income                0
dtype: int64

In [7]:
df.duplicated().sum()

np.int64(52)

In [8]:
df_processed = df.copy()

SUBSET_SIZE = 5000

df_processed = df_processed.sample(n=SUBSET_SIZE, random_state=42).reset_index(drop=True)

TARGET_COLUMN = 'income'
NEW_TARGET_NAME = 'Outcome'

y = df_processed[TARGET_COLUMN]
X = df_processed.drop(TARGET_COLUMN, axis=1)
COLUMNS_TO_DROP = ['fnlwgt']
if 'ID' in X.columns:
    COLUMNS_TO_DROP.append('ID')
if 'policy_id' in X.columns:
    COLUMNS_TO_DROP.append('policy_id')

X = X.drop(columns=COLUMNS_TO_DROP, errors='ignore')

y = y.rename(NEW_TARGET_NAME)

if y.dtype == 'object':
    le = LabelEncoder()
    y = pd.Series(le.fit_transform(y), name=NEW_TARGET_NAME, index=y.index)

numerical_features = X.select_dtypes(include=np.number).columns.tolist()
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()

all_identified_features = numerical_features + categorical_features
if len(all_identified_features) != X.shape[1]:
    print("Warning: Not all columns were classified as numerical or categorical!")
    print("Unclassified columns:", [col for col in X.columns if col not in all_identified_features])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

transformers = []
if numerical_features:
    transformers.append(('num', numerical_pipeline, numerical_features))
if categorical_features:
    transformers.append(('cat', categorical_pipeline, categorical_features))

if not transformers:
     raise ValueError("No numerical or categorical features identified for preprocessing.")


preprocessor = ColumnTransformer(transformers=transformers, remainder='passthrough')

X_train shape: (4000, 13)
y_train shape: (4000,)
X_test shape: (1000, 13)
y_test shape: (1000,)


In [9]:
def evaluate_classifier(model, X, y):
    y_pred = model.predict(X)
    if y.dtype not in [np.number, np.int64, np.float64]:
         print(f"Warning: Target labels are not numerical (dtype is {y.dtype}). Cannot calculate ROC AUC.")
         auc = np.nan
    else:
        try:
            if hasattr(model, 'predict_proba'):
                 y_prob = model.predict_proba(X)[:, 1]
                 if len(np.unique(y)) == 2:
                     auc = roc_auc_score(y, y_prob)
                 else:
                     auc = np.nan

            else:
                 auc = np.nan

        except Exception as e:
            print(f"Warning: Could not calculate ROC AUC. Error: {e}")
            auc = np.nan

    metrics = {
        'accuracy': accuracy_score(y, y_pred),
        'precision': precision_score(y, y_pred),
        'recall': recall_score(y, y_pred),
        'f1_score': f1_score(y, y_pred),
        'roc_auc': auc
    }
    return metrics

In [10]:
svm_baseline_estimator = SVC(random_state=42, probability=True)

baseline_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('svc', svm_baseline_estimator)
])

print(baseline_pipeline.named_steps['svc'].get_params())

start_time = time.time()
baseline_pipeline.fit(X_train, y_train)
train_time_baseline = time.time() - start_time

print(f"\nBaseline model training time: {train_time_baseline:.4f} seconds")

baseline_metrics = evaluate_classifier(baseline_pipeline, X_test, y_test)

for metric, value in baseline_metrics.items():
    print(f"{metric}: {value:.4f}")

{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': True, 'random_state': 42, 'shrinking': True, 'tol': 0.001, 'verbose': False}

Baseline model training time: 5.0983 seconds
accuracy: 0.8700
precision: 0.7865
recall: 0.6292
f1_score: 0.6991
roc_auc: 0.9041


  if y.dtype not in [np.number, np.int64, np.float64]:


In [11]:
print("\n--- Traditional Tuning: Grid Search for SVM Parameters ---")

svm_gs_estimator = SVC(random_state=42, probability=True)

gs_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('svc', svm_gs_estimator)
])

param_grid = {
    'svc__C': [0.1, 1, 10],
    'svc__gamma': [0.01, 0.1, 'scale'],
    'svc__kernel': ['rbf']
}

print(param_grid)

cv_strategy = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

grid_search = GridSearchCV(
    estimator=gs_pipeline,
    param_grid=param_grid,
    cv=cv_strategy,
    scoring='f1',
    n_jobs=-1,
    verbose=2
)

start_time = time.time()
grid_search.fit(X_train, y_train)
tuning_time_gs = time.time() - start_time

best_params_gs = grid_search.best_params_
best_svm_model_gs = grid_search.best_estimator_

print(best_params_gs)
print(f"Best cross-validation score ({grid_search.scorer_}): {grid_search.best_score_:.4f}")

tuned_metrics_gs = evaluate_classifier(best_svm_model_gs, X_test, y_test)

for metric, value in tuned_metrics_gs.items():
    print(f"{metric}: {value:.4f}")


--- Traditional Tuning: Grid Search for SVM Parameters ---
{'svc__C': [0.1, 1, 10], 'svc__gamma': [0.01, 0.1, 'scale'], 'svc__kernel': ['rbf']}
Fitting 3 folds for each of 9 candidates, totalling 27 fits
{'svc__C': 10, 'svc__gamma': 0.01, 'svc__kernel': 'rbf'}
Best cross-validation score (make_scorer(f1_score, response_method='predict', average=binary)): 0.6572


  if y.dtype not in [np.number, np.int64, np.float64]:


accuracy: 0.8660
precision: 0.7650
recall: 0.6375
f1_score: 0.6955
roc_auc: 0.9080


In [12]:
print("Metric         | Baseline SVM | Grid Search Tuned SVM")
print("---------------------------------------------------")
all_metrics = sorted(list(set(baseline_metrics.keys()) | set(tuned_metrics_gs.keys())))

for metric in all_metrics:
    baseline_val = baseline_metrics.get(metric, np.nan)
    tuned_gs_val = tuned_metrics_gs.get(metric, np.nan)

    baseline_str = f"{baseline_val:.4f}" if pd.notna(baseline_val) else "N/A     "
    tuned_gs_str = f"{tuned_gs_val:.4f}" if pd.notna(tuned_gs_val) else "N/A     "

    print(f"{metric:<14} | {baseline_str}     | {tuned_gs_str}")

print(f"\nTuning Time (seconds): Baseline: {train_time_baseline:.4f}, Grid Search: {tuning_time_gs:.4f}")

Metric         | Baseline SVM | Grid Search Tuned SVM
---------------------------------------------------
accuracy       | 0.8700     | 0.8660
f1_score       | 0.6991     | 0.6955
precision      | 0.7865     | 0.7650
recall         | 0.6292     | 0.6375
roc_auc        | 0.9041     | 0.9080

Tuning Time (seconds): Baseline: 5.0983, Grid Search: 43.5658


In [34]:
N_POPULATION = 20
N_ITERATIONS = 10
MUTATION_RATE = 0.5
MIN_VAL_C = 0.1
MAX_VAL_C = 100
MIN_VAL_GAMMA = 0.0001
MAX_VAL_GAMMA = 1
MUTATION_STRENGTH = .1

In [14]:
print(random.random())

0.3965703784017728


In [15]:
def random_chromosome():
    return [random.uniform(MIN_VAL_C, MAX_VAL_C), random.uniform(MIN_VAL_GAMMA, MAX_VAL_GAMMA)] 

In [37]:
chromosome = random_chromosome()
print( chromosome)

[58.286414808827644, 0.5589024143552049]


In [None]:
def fitness(chromosome, X, y):
    C, gamma = chromosome[0], chromosome[1]

    model = Pipeline([
        ('preprocessor', preprocessor),
        ('svc', SVC(C=C, gamma=gamma, kernel='rbf'))
    ])
    scores = cross_val_score(model, X, y, cv=3, scoring='f1_macro')

    return scores.mean()


In [19]:
def create_population(N_POPULATION):
    pop = []
    for _ in range(N_POPULATION): 
        individual = random_chromosome()
        pop.append(individual)
    return pop

In [20]:
def evaluate_population(pop, X, y):
    evaluated = []
    for indiv in pop:
        fit = fitness(indiv, X, y)
        evaluated.append([indiv[0], indiv[1], fit])
    return evaluated


In [49]:
pop_evaluated = evaluate_population(pop, X_train, y_train)

In [121]:
pop_evaluated

[[21.228110741887594, 0.7082897999156881, np.float64(0.79125)],
 [49.50892985889862, 0.1869475922769497, np.float64(0.8192499999999999)],
 [45.03372230631889, 0.8538861509445128, np.float64(0.786)],
 [15.317167896224023, 0.4965294888390049, np.float64(0.80625)],
 [21.515737555957617, 0.018493573351651815, np.float64(0.8525)],
 [77.5070096017225, 0.46616730340731694, np.float64(0.7982500000000001)],
 [46.72013201175253, 0.6868134961036066, np.float64(0.7932499999999999)],
 [71.91639037101667, 0.2360559305109132, np.float64(0.81225)],
 [4.785157692471482, 0.28627143697648566, np.float64(0.8352499999999999)],
 [99.74242166956603, 0.05512590376546943, np.float64(0.8227499999999999)]]

In [21]:
def tournament_selection(pop, tournament_size=5):
    
    tournament = random.sample(pop, tournament_size)
    
    winner = max(tournament, key=lambda x: x[2])
    
    return winner

In [57]:
winn = tournament_selection(pop_evaluated)
winn

[87.93661433899456, 0.33453418484075664, np.float64(0.8074999999999999)]

In [22]:
def roulette_wheel_selection(pop):
    total_fitness = sum(individual[2] for individual in pop)
    
    pick = random.uniform(0, total_fitness)
    current = 0
    
    for individual in pop:
        current += individual[2]
        if current > pick:
            return individual


In [96]:
ind = roulette_wheel_selection(pop_evaluated)
ind

[91.7640111568729, 0.5088838376373818, np.float64(0.79775)]

In [23]:
def rank_selection(pop):
    ranked_pop = sorted(pop, key=lambda x: x[2], reverse=True)
    
    total_rank = sum(range(1, len(pop) + 1))
    pick = random.uniform(0, total_rank)
    current_rank = 0
    
    for i, individual in enumerate(ranked_pop):
        current_rank += (i + 1)
        if current_rank >= pick:
            return individual


In [84]:
indd = rank_selection(pop_evaluated)
indd

[44.54608996572458, 0.388042596694855, np.float64(0.8067499999999999)]

In [24]:
def stochastic_universal_sampling(pop, num_parents=2):
    total_fitness = sum(individual[2] for individual in pop)
    
    # Divide the fitness sum into equal intervals
    distance = total_fitness / num_parents
    start_point = random.uniform(0, distance)
    
    selected_parents = []
    current_point = start_point
    for _ in range(num_parents):
        total = 0
        for individual in pop:
            total += individual[2]
            if total >= current_point:
                selected_parents.append(individual)
                current_point += distance
                break
    
    return selected_parents


In [98]:
inddd = stochastic_universal_sampling(pop_evaluated)
inddd[1]

[91.7640111568729, 0.5088838376373818, np.float64(0.79775)]

In [25]:
def uniform_mutation(individual):
    c, gamma, fitness = individual

    if random.random() < MUTATION_RATE:
        c = random.uniform(MIN_VAL_C, MAX_VAL_C)
        gamma = random.uniform(MIN_VAL_GAMMA, MAX_VAL_GAMMA)
        return [c, gamma, 0]

    return individual 

In [100]:
mutated = uniform_mutation(indd)

In [101]:
mutated

[35.1105400227239, 0.38665807434407556, 0]

In [26]:
def creep_mutation(individual):
    c, gamma, fitness = individual

    if random.random() < MUTATION_RATE:
        c += random.uniform(-0.05, 0.05)
        gamma += random.uniform(-0.05, 0.05)
        
        c = min(max(c, MIN_VAL_C), MAX_VAL_C)
        gamma = min(max(gamma, MIN_VAL_GAMMA), MAX_VAL_GAMMA)
        return [c, gamma, 0]
    
    return individual


In [110]:
print(creep_mutation(indd))

[44.54608996572458, 0.388042596694855, np.float64(0.8067499999999999)]


In [27]:
def gaussian_mutation(individual):
    c, gamma, fitness = individual

    if random.random() < MUTATION_RATE:
        c = min(max(c + random.gauss(0, MUTATION_STRENGTH), MIN_VAL_C), MAX_VAL_C)
        gamma = min(max(gamma + random.gauss(0, MUTATION_STRENGTH), MIN_VAL_GAMMA), MAX_VAL_GAMMA)
        return [c, gamma, 0]

    return individual


In [114]:
print(gaussian_mutation(indd))

[44.39748879297673, 0.35228535610403444, 0]


In [28]:
def uniform_crossover(parent1, parent2):
    c1, gamma1, _ = parent1
    c2, gamma2, _ = parent2
    
    new_c = c1 if random.random() < 0.5 else c2
    new_gamma = gamma1 if random.random() < 0.5 else gamma2
    
    return [new_c, new_gamma, 0]  


In [29]:
def arithmetic_crossover(parent1, parent2, alpha=0.5):
    c1, gamma1, _ = parent1
    c2, gamma2, _ = parent2

    new_c = alpha * c1 + (1 - alpha) * c2
    new_gamma = alpha * gamma1 + (1 - alpha) * gamma2

    return [new_c, new_gamma, 0]


In [30]:
def calculate_crowding_distance(pop):
    # Sort by fitness ascending
    sorted_pop = sorted(pop, key=lambda x: x[2])
    distances = [0] * len(pop)
    for i in range(1, len(pop) - 1):
        distances[i] = abs(sorted_pop[i + 1][2] - sorted_pop[i - 1][2])
    # Boundary individuals get max distance to keep diversity
    distances[0] = distances[1]
    distances[-1] = distances[-2]
    # Return dictionary: {chromosome_tuple: distance} for quick lookup
    dist_dict = {tuple(indiv[:2]): dist for indiv, dist in zip(sorted_pop, distances)}
    return dist_dict

In [None]:
def genetic_algorithm(
    X, y,
    selection_method,
    mutation_method,
    crossover_method,
    n_population=N_POPULATION,
    n_iterations=N_ITERATIONS,
    seed=None
):
    if seed is not None:
        random.seed(seed)
        np.random.seed(seed)

    all_individuals = []
    population = create_population(n_population)
    population = evaluate_population(population, X, y)

    # Save initial generation with diversity
    crowd_distances = calculate_crowding_distance(population)
    for indiv in population:
        all_individuals.append({
            'generation': 0,
            'C': indiv[0],
            'gamma': indiv[1],
            'fitness': indiv[2],
            'diversity': crowd_distances.get((indiv[0], indiv[1]), 0),
            'seed': seed
        })

    best_solution = max(population, key=lambda x: x[2])

    for gen in range(1, n_iterations + 1):
        new_population = []
        while len(new_population) < n_population:
            # Parent selection
            if selection_method == 'tournament':
                parent1 = tournament_selection(population)
                parent2 = tournament_selection(population)
            elif selection_method == 'roulette':
                parent1 = roulette_wheel_selection(population)
                parent2 = roulette_wheel_selection(population)
            elif selection_method == 'rank':
                parent1 = rank_selection(population)
                parent2 = rank_selection(population)
            elif selection_method == 'sus':
                parents = stochastic_universal_sampling(population, 2)
                parent1, parent2 = parents[0], parents[1]
            else:
                raise ValueError("Unknown selection method")

            # Crossover
            if crossover_method == 'uniform':
                child = uniform_crossover(parent1, parent2)
            elif crossover_method == 'arithmetic':
                child = arithmetic_crossover(parent1, parent2)
            else:
                raise ValueError("Unknown crossover method")

            # Mutation
            if mutation_method == 'uniform':
                child = uniform_mutation(child)
            elif mutation_method == 'creep':
                child = creep_mutation(child)
            elif mutation_method == 'gaussian':
                child = gaussian_mutation(child)
            else:
                raise ValueError("Unknown mutation method")

            
            child[2] = fitness(child, X, y)
            new_population.append(child)

        population = new_population

       
        crowd_distances = calculate_crowding_distance(population)

        
        for indiv in population:
            all_individuals.append({
                'generation': gen,
                'C': indiv[0],
                'gamma': indiv[1],
                'fitness': indiv[2],
                'diversity': crowd_distances.get((indiv[0], indiv[1]), 0),
                'seed': seed
            })

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        print(f"Gen {gen} | Best Fitness: {best_solution[2]:.4f}")

    return best_solution, all_individuals


In [32]:
def run_experiments(X, y, n_runs=30):
    seeds = list(range(1000, 1000 + n_runs))
    experiment_data = []
    for run_seed in seeds:
        print(f"\nRun with seed {run_seed}")
        best, all_indivs = genetic_algorithm(
            X, y,
            selection_method='tournament',
            mutation_method='creep',
            crossover_method='uniform',
            n_population=N_POPULATION,
            n_iterations=N_ITERATIONS,
            seed=run_seed
        )
        experiment_data.extend(all_indivs)
    return experiment_data

In [None]:
run_experiments(X_train, y_train)


Run with seed 1000
Gen 1 | Best Fitness: 0.8492
Gen 2 | Best Fitness: 0.8492
Gen 3 | Best Fitness: 0.8540
Gen 4 | Best Fitness: 0.8540
Gen 5 | Best Fitness: 0.8540
Gen 6 | Best Fitness: 0.8540
Gen 7 | Best Fitness: 0.8540
Gen 8 | Best Fitness: 0.8545
Gen 9 | Best Fitness: 0.8545
Gen 10 | Best Fitness: 0.8545

Run with seed 1001
Gen 1 | Best Fitness: 0.8538
Gen 2 | Best Fitness: 0.8538
Gen 3 | Best Fitness: 0.8538
Gen 4 | Best Fitness: 0.8538
Gen 5 | Best Fitness: 0.8538
Gen 6 | Best Fitness: 0.8538
Gen 7 | Best Fitness: 0.8538
Gen 8 | Best Fitness: 0.8538
Gen 9 | Best Fitness: 0.8538
Gen 10 | Best Fitness: 0.8538

Run with seed 1002
Gen 1 | Best Fitness: 0.8417
Gen 2 | Best Fitness: 0.8538
Gen 3 | Best Fitness: 0.8548
Gen 4 | Best Fitness: 0.8548
Gen 5 | Best Fitness: 0.8548
Gen 6 | Best Fitness: 0.8548
Gen 7 | Best Fitness: 0.8548
Gen 8 | Best Fitness: 0.8548
Gen 9 | Best Fitness: 0.8548
Gen 10 | Best Fitness: 0.8548

Run with seed 1003
Gen 1 | Best Fitness: 0.8545
Gen 2 | Best Fitne

In [None]:
run_experiments(X_train, y_train)

In [121]:
def genetic_algorithm_1(X, y):
    all_generations = []
    population = create_population(N_POPULATION)
    population = evaluate_population(population, X, y)
    for individual in population:
        all_generations.append({
            'generation': 0,
            'C': individual[0],
            'gamma': individual[1],
            'fitness': individual[2]
        })
    best_solution = max(population, key=lambda x: x[2])
    
    for iteration in range(N_ITERATIONS):
        new_population = []

        while len(new_population) < N_POPULATION:
            
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)

            child = uniform_crossover(parent1, parent2) 

            child = creep_mutation(child)

            child[2] = fitness(child, X, y)

            new_population.append(child)

        population = new_population
        for individual in population:
            all_generations.append({
                'generation': iteration + 1,
                'C': individual[0],
                'gamma': individual[1],
                'fitness': individual[2]
            })

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        print(f"Generation {iteration+1}, Best Accuracy: {best_solution[2]:.4f}")

    return best_solution, all_generations


In [123]:
import pandas as pd

def genetic_algorithm_1(X, y, run_number):
    all_generations = []

    population = create_population(N_POPULATION)
    population = evaluate_population(population, X, y)

    best_solution = max(population, key=lambda x: x[2])

    current_best = max(population, key=lambda x: x[2])
    all_generations.append({
        'run': run_number,
        'generation': 0,
        'C': current_best[0],
        'gamma': current_best[1],
        'fitness': current_best[2]
    })

    for iteration in range(N_ITERATIONS):
        new_population = []

        while len(new_population) < N_POPULATION:
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)

            child = uniform_crossover(parent1, parent2) 
            child = creep_mutation(child)
            child[2] = fitness(child, X, y)

            new_population.append(child)

        population = new_population

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        all_generations.append({
            'run': run_number,
            'generation': iteration + 1,
            'C': current_best[0],
            'gamma': current_best[1],
            'fitness': current_best[2]
        })

        print(f"[Run {run_number}] Generation {iteration+1}, Best Accuracy: {current_best[2]:.4f}")

    return best_solution, all_generations


In [108]:
def genetic_algorithm_2(X, y):
    
    population = create_population(N_POPULATION)
    population = evaluate_population(population, X, y)

    best_solution = max(population, key=lambda x: x[2])
    
    for iteration in range(N_ITERATIONS):
        new_population = []

        while len(new_population) < N_POPULATION:
            
            parent1 = roulette_wheel_selection(population)
            parent2 = roulette_wheel_selection(population)

            child = uniform_crossover(parent1, parent2)

            child = gaussian_mutation(child)

            child[2] = fitness(child, X, y)

            new_population.append(child)

        population = new_population

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        print(f"Generation {iteration+1}, Best Accuracy: {best_solution[2]:.4f}")

    return best_solution


In [208]:
def genetic_algorithm_3(X, y):
    
    population = create_population(N_POPULATION)
    population = evaluate_population(population, X, y)

    best_solution = max(population, key=lambda x: x[2])
    
    for iteration in range(N_ITERATIONS):
        new_population = []

        while len(new_population) < N_POPULATION:

            parent1 ,parent2 = stochastic_universal_sampling(population)

            child = uniform_crossover(parent1, parent2)  

            child = creep_mutation(child)

            child[2] = fitness(child, X, y)

            new_population.append(child)

        population = new_population

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        print(f"Generation {iteration+1}, Best Accuracy: {best_solution[2]:.4f}")

    return best_solution


In [110]:
def genetic_algorithm_4(X, y):
    
    population = create_population(N_POPULATION)
    population = evaluate_population(population, X, y)

    best_solution = max(population, key=lambda x: x[2])
    
    for iteration in range(N_ITERATIONS):
        new_population = []

        while len(new_population) < N_POPULATION:

            parent1 = rank_selection(population)
            parent2 = rank_selection(population)

            child = uniform_crossover(parent1, parent2)  

            child = gaussian_mutation(child)

            child[2] = fitness(child, X, y)

            new_population.append(child)

        population = new_population

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        print(f"Generation {iteration+1}, Best Accuracy: {best_solution[2]:.4f}")

    return best_solution


In [203]:
def genetic_algorithm_5(X, y):
    
    population = create_population(N_POPULATION)
    population = evaluate_population(population, X, y)

    best_solution = max(population, key=lambda x: x[2])
    
    for iteration in range(N_ITERATIONS):
        new_population = []

        while len(new_population) < N_POPULATION:

            parent1 = roulette_wheel_selection(population)
            parent2 = rank_selection(population)

            child = uniform_crossover(parent1, parent2)  

            child = gaussian_mutation(child)

            child[2] = fitness(child, X, y)

            new_population.append(child)

        population = new_population

        current_best = max(population, key=lambda x: x[2])
        if current_best[2] > best_solution[2]:
            best_solution = current_best

        print(f"Generation {iteration+1}, Best Accuracy: {best_solution[2]:.4f}")

    return best_solution

In [126]:
all_runs_data = []

for run in range(1, 31):
    _, run_data = genetic_algorithm_1(X_train, y_train, run)
    all_runs_data.extend(run_data)


df = pd.DataFrame(all_runs_data)
df.to_csv("Best_Per_Generation_30_Runs.csv", index=False)

[Run 1] Generation 1, Best Accuracy: 0.8538
[Run 1] Generation 2, Best Accuracy: 0.8538
[Run 1] Generation 3, Best Accuracy: 0.8543
[Run 1] Generation 4, Best Accuracy: 0.8543
[Run 1] Generation 5, Best Accuracy: 0.8543
[Run 2] Generation 1, Best Accuracy: 0.8492
[Run 2] Generation 2, Best Accuracy: 0.8530
[Run 2] Generation 3, Best Accuracy: 0.8557
[Run 2] Generation 4, Best Accuracy: 0.8540
[Run 2] Generation 5, Best Accuracy: 0.8530
[Run 3] Generation 1, Best Accuracy: 0.8538
[Run 3] Generation 2, Best Accuracy: 0.8543
[Run 3] Generation 3, Best Accuracy: 0.8543
[Run 3] Generation 4, Best Accuracy: 0.8550
[Run 3] Generation 5, Best Accuracy: 0.8550
[Run 4] Generation 1, Best Accuracy: 0.8535
[Run 4] Generation 2, Best Accuracy: 0.8538
[Run 4] Generation 3, Best Accuracy: 0.8540
[Run 4] Generation 4, Best Accuracy: 0.8545
[Run 4] Generation 5, Best Accuracy: 0.8545
[Run 5] Generation 1, Best Accuracy: 0.8538
[Run 5] Generation 2, Best Accuracy: 0.8538
[Run 5] Generation 3, Best Accur

In [114]:
best = genetic_algorithm_2(X_train, y_train)
print(best)

Generation 1, Best Accuracy: 0.8432
Generation 2, Best Accuracy: 0.8432
Generation 3, Best Accuracy: 0.8432
Generation 4, Best Accuracy: 0.8432
Generation 5, Best Accuracy: 0.8432
Generation 6, Best Accuracy: 0.8432
Generation 7, Best Accuracy: 0.8432
Generation 8, Best Accuracy: 0.8432
Generation 9, Best Accuracy: 0.8432
Generation 10, Best Accuracy: 0.8432
Generation 11, Best Accuracy: 0.8432
Generation 12, Best Accuracy: 0.8432
Generation 13, Best Accuracy: 0.8432
Generation 14, Best Accuracy: 0.8432
Generation 15, Best Accuracy: 0.8432
Generation 16, Best Accuracy: 0.8432
Generation 17, Best Accuracy: 0.8432
Generation 18, Best Accuracy: 0.8432
Generation 19, Best Accuracy: 0.8432
Generation 20, Best Accuracy: 0.8432
Generation 21, Best Accuracy: 0.8432
Generation 22, Best Accuracy: 0.8432
Generation 23, Best Accuracy: 0.8432
Generation 24, Best Accuracy: 0.8432
Generation 25, Best Accuracy: 0.8432
Generation 26, Best Accuracy: 0.8432
Generation 27, Best Accuracy: 0.8432
Generation

In [209]:
best = genetic_algorithm_3(X_train, y_train)
print(best)

Generation 1, Best Accuracy: 0.8218
Generation 2, Best Accuracy: 0.8218
Generation 3, Best Accuracy: 0.8218
Generation 4, Best Accuracy: 0.8218
Generation 5, Best Accuracy: 0.8218
Generation 6, Best Accuracy: 0.8218
Generation 7, Best Accuracy: 0.8218
Generation 8, Best Accuracy: 0.8218
Generation 9, Best Accuracy: 0.8218
Generation 10, Best Accuracy: 0.8218
Generation 11, Best Accuracy: 0.8218
Generation 12, Best Accuracy: 0.8218
Generation 13, Best Accuracy: 0.8218
Generation 14, Best Accuracy: 0.8218
Generation 15, Best Accuracy: 0.8218
Generation 16, Best Accuracy: 0.8218
Generation 17, Best Accuracy: 0.8218
Generation 18, Best Accuracy: 0.8218


KeyboardInterrupt: 

In [116]:
best = genetic_algorithm_4(X_train, y_train)
print(best)

Generation 1, Best Accuracy: 0.8465
Generation 2, Best Accuracy: 0.8465
Generation 3, Best Accuracy: 0.8465
Generation 4, Best Accuracy: 0.8465
Generation 5, Best Accuracy: 0.8465
Generation 6, Best Accuracy: 0.8465
Generation 7, Best Accuracy: 0.8465
Generation 8, Best Accuracy: 0.8465
Generation 9, Best Accuracy: 0.8465
Generation 10, Best Accuracy: 0.8465
Generation 11, Best Accuracy: 0.8465
Generation 12, Best Accuracy: 0.8465
Generation 13, Best Accuracy: 0.8465
Generation 14, Best Accuracy: 0.8465
Generation 15, Best Accuracy: 0.8465
Generation 16, Best Accuracy: 0.8465
Generation 17, Best Accuracy: 0.8465
Generation 18, Best Accuracy: 0.8465
Generation 19, Best Accuracy: 0.8465
Generation 20, Best Accuracy: 0.8465
Generation 21, Best Accuracy: 0.8465
Generation 22, Best Accuracy: 0.8465
Generation 23, Best Accuracy: 0.8465
Generation 24, Best Accuracy: 0.8465
Generation 25, Best Accuracy: 0.8465
Generation 26, Best Accuracy: 0.8465
Generation 27, Best Accuracy: 0.8465
Generation

In [118]:
best = genetic_algorithm_5(X_train, y_train)
print(best)

Generation 1, Best Accuracy: 0.8497
Generation 2, Best Accuracy: 0.8497
Generation 3, Best Accuracy: 0.8547
Generation 4, Best Accuracy: 0.8547
Generation 5, Best Accuracy: 0.8547
Generation 6, Best Accuracy: 0.8547
Generation 7, Best Accuracy: 0.8547
Generation 8, Best Accuracy: 0.8547
Generation 9, Best Accuracy: 0.8547
Generation 10, Best Accuracy: 0.8547
Generation 11, Best Accuracy: 0.8547
Generation 12, Best Accuracy: 0.8547
Generation 13, Best Accuracy: 0.8547
Generation 14, Best Accuracy: 0.8547
Generation 15, Best Accuracy: 0.8547
Generation 16, Best Accuracy: 0.8547
Generation 17, Best Accuracy: 0.8547
Generation 18, Best Accuracy: 0.8547
Generation 19, Best Accuracy: 0.8547
Generation 20, Best Accuracy: 0.8547
Generation 21, Best Accuracy: 0.8547
Generation 22, Best Accuracy: 0.8547
Generation 23, Best Accuracy: 0.8547
Generation 24, Best Accuracy: 0.8547
Generation 25, Best Accuracy: 0.8547
Generation 26, Best Accuracy: 0.8547
Generation 27, Best Accuracy: 0.8547
Generation