# Running Hill Climbing

In [1]:
import pandas as pd
import numpy as np
import random
from copy import deepcopy
from itertools import product
import os
import json

In [2]:
import sys

sys.path.append("..")

from functions.utils import *
from functions.crossover import *
from functions.selection_algos import *
from functions.mutations import *
from functions.algorithms import *
from functions.solutions import *

In [3]:
scores = pd.read_csv("data/seating_data(in).csv", index_col=0)
scores_array = scores.to_numpy()

> To compare algorithms we wan to run 10_000 fitness evaluations. <br>
For every iteration the HC evaluates 1792 neighbors so we will have a maximum of around 6 iterations (6*1792 = 10_752 FE). 

> We will also run for 100 iterations to see the evolution of the model

In [4]:
max_iters = [6, 100]
neighbor_ops = [swap_mutation]

In [5]:
results_file = "results/hc_results.csv"

if os.path.exists(results_file):
    df_existing = pd.read_csv(results_file)
    tested_configs = set(
        (row["max_iter"], row["neighbor"]) for _, row in df_existing.iterrows()
    )
else:
    df_existing = pd.DataFrame()
    tested_configs = set()

In [6]:
for max_iter, neighbor_fn in product(max_iters, neighbor_ops):
    config_key = (max_iter, neighbor_fn.__name__)

    if config_key in tested_configs:
        print(f"Skipping tested config: {config_key}")
        continue

    print(f"Running HC with config: {config_key}")
    fitnesses, conv_iters, conv_times, fitness_hists = [], [], [], []

    for _ in range(100):
        sol = Wedding_HC_Solution(scores=scores_array)

        # Exhaustive neighborhood is built into Wedding_HC_Solution
        best_sol, fitness_hist, conv_iter, conv_time = hill_climbing(
            initial_solution=sol, max_iter=max_iter, maximization=True, verbose=False
        )

        fitnesses.append(best_sol.fitness())
        fitness_hists.append(fitness_hist)
        conv_iters.append(conv_iter)
        conv_times.append(conv_time)

    avg_fit = round(np.mean(fitnesses), 2)
    std_fit = round(np.std(fitnesses), 2)
    avg_iter = round(np.mean(conv_iters), 2)
    avg_time = round(np.mean(conv_times), 2)
    
    fitness_hists_median = np.median(fitness_hists, axis=0)

    print(
        f"Avg fitness: {avg_fit}, Std: {std_fit}, Iter: {avg_iter}, Time: {avg_time}s"
    )

    df_new = pd.DataFrame(
        [
            {
                "max_iter": max_iter,
                "neighbor": neighbor_fn.__name__,
                "avg_fitness": avg_fit,
                "std_fitness": std_fit,
                "avg_iter": avg_iter,
                "avg_time": avg_time,
                "fitness_scores": json.dumps([float(f) for f in fitnesses]),
                "fitness_hists": json.dumps([float(f) for f in fitness_hists_median]),
            }
        ]
    )

    if os.path.exists(results_file):
        df_new.to_csv(results_file, mode="a", index=False, header=False)
    else:
        df_new.to_csv(results_file, index=False)


Skipping tested config: (6, 'swap_mutation')
Skipping tested config: (100, 'swap_mutation')
