In [10]:
import mlrose_hiive as mlrose
import pandas as pd
import numpy as np
import joblib
from tqdm import tqdm
import time

In [2]:
import sklearn

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
from mlrose_hiive.fitness import MaxKColor
from mlrose_hiive.generators import MaxKColorGenerator

In [6]:
from IPython.core.display import display, HTML 

#### define maxkcolors with generator set - same initial state seed, same size for nodes

In [7]:
maxKColor_problem_100 = MaxKColorGenerator.generate(seed=428, number_of_nodes=100, maximize=True)
maxKColor_problem_50 = MaxKColorGenerator.generate(seed=428, number_of_nodes=50, maximize=True)
maxKColor_problem_25 = MaxKColorGenerator.generate(seed=428, number_of_nodes=25, maximize=True)

#### run SA, GA and RHC with seed=55 for apples-to-apples comparison

## Simulated Annealing (SA)

In [8]:
from mlrose_hiive.runners import SARunner

#### use SARunner class to determine ideal temperature for nodes = 50

In [11]:
sa_runner = mlrose.SARunner(problem=maxkcolor_problem_50,
                            experiment_name="SA_maxKColors50_Experiment",
                            output_directory=None,  
                            seed=55,
                            iteration_list=2**np.arange(12),  
                            max_attempts=200,
                            temperature_list=[0.1, 0.5, 0.75, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0, 250.0, 500.0, 750.0, 1000.0],
                            decay_list=[mlrose_hiive.GeomDecay])
# sa_runner.run()

# Run the experiment
sa_run_stats, sa_run_curves = sa_runner.run()
sa_run_stats

NameError: name 'mlrose_hiive' is not defined

In [None]:
sa_run_curves.describe()

In [None]:
sa_run_curves.to_csv('sa_maxKColor50_temp_results.csv', index=False)

In [None]:
def extract_initial_temperature(temp):
    """Extracts initial temperature from a GeomDecay object or returns the numeric value."""
    if isinstance(temp, str) and 'GeomDecay' in temp:
        # Assuming the temperature is embedded in a string like 'GeomDecay(init_temp=10.0)'
        import re
        match = re.search(r'init_temp=([\d.]+)', temp)
        if match:
            return float(match.group(1))
    elif isinstance(temp, float):
        return temp
    else:
        raise ValueError(f"Unexpected temperature format: {temp}")

def plot_fitness_iterations(csv_path):
    # Load the CSV file
    df_temp_size100 = pd.read_csv('sa_maxKColor50_temp_results.csv')
    
    # Extract and handle temperature values correctly
    df_temp_size100['Temperature'] = df_temp_size100['Temperature'].apply(extract_initial_temperature)
    
    # Plot fitness over iterations for each temperature
    plt.figure(figsize=(12, 8))
    
    temperatures = df_temp_size100['Temperature'].unique()
    
    for temp in temperatures:
        temp_df = df_temp_size100[df_temp_size100['Temperature'] == temp]
        plt.plot(temp_df['Iteration'], temp_df['Fitness'], label=f'Temperature {temp}')
    
    plt.xlabel('Iterations')
    plt.ylabel('Fitness')
    plt.title('Max K Colors(SA): Fitness vs Iterations for Different Temperatures (Problem Size=50)')
    plt.legend(title='Temperature')
    plt.grid(True)
    plt.show()

# Call the function with the CSV file path
plot_fitness_iterations('/mnt/data/sa_maxKColor50_temp_results.csv')

In [None]:
df_temp_size50 = pd.read_csv('sa_maxKColor50_temp_results.csv')

best_fitness = df_temp_size50['Fitness'].max()
best_runs = df_temp_size50[df_temp_size50['Fitness'] == best_fitness]

HTML(best_runs.to_html())

In [None]:
maximum_evaluations = best_runs['FEvals'].max()

best_curve_run = best_runs[best_runs['FEvals'] == maximum_evaluations]
HTML(best_curve_run.to_html())

Best initial temperature for a problem size of 50 is either 750 or 1000. Either option is fine because it returns the same fitness and FEval

#### determine ideal temp for nodes=100

In [None]:
sa_runner = mlrose.SARunner(problem=maxKColor_problem_100,
                            experiment_name="SA_maxKColors100_Experiment",
                            output_directory=None,  
                            seed=55,
                            iteration_list=2**np.arange(12),  
                            max_attempts=200,
                            temperature_list=[0.1, 0.5, 0.75, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0, 250.0, 500.0, 750.0, 1000.0],
                            decay_list=[mlrose_hiive.GeomDecay])
# sa_runner.run()

# Run the experiment
sa_run_stats, sa_run_curves = sa_runner.run()
sa_run_stats

In [None]:
sa_run_curves.describe()

In [None]:
sa_run_curves.to_csv('sa_maxKColor100_temp_results.csv', index=False)

In [None]:
def extract_initial_temperature(temp):
    """Extracts initial temperature from a GeomDecay object or returns the numeric value."""
    if isinstance(temp, str) and 'GeomDecay' in temp:
        # Assuming the temperature is embedded in a string like 'GeomDecay(init_temp=10.0)'
        import re
        match = re.search(r'init_temp=([\d.]+)', temp)
        if match:
            return float(match.group(1))
    elif isinstance(temp, float):
        return temp
    else:
        raise ValueError(f"Unexpected temperature format: {temp}")

def plot_fitness_iterations(csv_path):
    # Load the CSV file
    df_temp_size100 = pd.read_csv('sa_maxKColor100_temp_results.csv')
    
    # Extract and handle temperature values correctly
    df_temp_size100['Temperature'] = df_temp_size100['Temperature'].apply(extract_initial_temperature)
    
    # Plot fitness over iterations for each temperature
    plt.figure(figsize=(12, 8))
    
    temperatures = df_temp_size100['Temperature'].unique()
    
    for temp in temperatures:
        temp_df = df_temp_size100[df_temp_size100['Temperature'] == temp]
        plt.plot(temp_df['Iteration'], temp_df['Fitness'], label=f'Temperature {temp}')
    
    plt.xlabel('Iterations')
    plt.ylabel('Fitness')
    plt.title('Max K Colors(SA): Fitness vs Iterations for Different Temperatures (Problem Size=100)')
    plt.legend(title='Temperature')
    plt.grid(True)
    plt.show()

# Call the function with the CSV file path
plot_fitness_iterations('/mnt/data/sa_maxKColor100_temp_results.csv')

In [None]:
df_temp_size50 = pd.read_csv('sa_maxKColor100_temp_results.csv')

best_fitness = df_temp_size50['Fitness'].max()
best_runs = df_temp_size50[df_temp_size50['Fitness'] == best_fitness]

HTML(best_runs.to_html())

In [None]:
maximum_evaluations = best_runs['FEvals'].max()

best_curve_run = best_runs[best_runs['FEvals'] == maximum_evaluations]
HTML(best_curve_run.to_html())

Best initial temperature for a problem size of 50 is either 750 or 1000. Either option is fine because it returns the same fitness and FEval

#### determine ideal temp for nodes=25

In [None]:
sa_runner = mlrose.SARunner(problem=maxKColor_problem_25,
                            experiment_name="SA_maxKColors25_Experiment",
                            output_directory=None,  
                            seed=55,
                            iteration_list=2**np.arange(12),  
                            max_attempts=200,
                            temperature_list=[0.1, 0.5, 0.75, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0, 250.0, 500.0, 750.0, 1000.0],
                            decay_list=[mlrose_hiive.GeomDecay])
# sa_runner.run()

# Run the experiment
sa_run_stats, sa_run_curves = sa_runner.run()
sa_run_stats

In [None]:
sa_run_curves.describe()

In [None]:
sa_run_curves.to_csv('sa_maxKColor25_temp_results.csv', index=False)

In [None]:
def extract_initial_temperature(temp):
    """Extracts initial temperature from a GeomDecay object or returns the numeric value."""
    if isinstance(temp, str) and 'GeomDecay' in temp:
        # Assuming the temperature is embedded in a string like 'GeomDecay(init_temp=10.0)'
        import re
        match = re.search(r'init_temp=([\d.]+)', temp)
        if match:
            return float(match.group(1))
    elif isinstance(temp, float):
        return temp
    else:
        raise ValueError(f"Unexpected temperature format: {temp}")

def plot_fitness_iterations(csv_path):
    # Load the CSV file
    df_temp_size100 = pd.read_csv('sa_maxKColor25_temp_results.csv')
    
    # Extract and handle temperature values correctly
    df_temp_size100['Temperature'] = df_temp_size100['Temperature'].apply(extract_initial_temperature)
    
    # Plot fitness over iterations for each temperature
    plt.figure(figsize=(12, 8))
    
    temperatures = df_temp_size100['Temperature'].unique()
    
    for temp in temperatures:
        temp_df = df_temp_size100[df_temp_size100['Temperature'] == temp]
        plt.plot(temp_df['Iteration'], temp_df['Fitness'], label=f'Temperature {temp}')
    
    plt.xlabel('Iterations')
    plt.ylabel('Fitness')
    plt.title('Max K Colors(SA): Fitness vs Iterations for Different Temperatures (Problem Size=25)')
    plt.legend(title='Temperature')
    plt.grid(True)
    plt.show()

# Call the function with the CSV file path
plot_fitness_iterations('/mnt/data/sa_maxKColor25_temp_results.csv')

In [None]:
df_temp_size50 = pd.read_csv('sa_maxKColor25_temp_results.csv')

best_fitness = df_temp_size50['Fitness'].max()
best_runs = df_temp_size50[df_temp_size50['Fitness'] == best_fitness]

HTML(best_runs.to_html())

In [None]:
maximum_evaluations = best_runs['FEvals'].max()

best_curve_run = best_runs[best_runs['FEvals'] == maximum_evaluations]
HTML(best_curve_run.to_html())

Best initial temperature for a problem size of 50 is either 750 or 1000. Either option is fine because it returns the same fitness and FEval

## Genetic Algorithms (GA)

In [None]:
from mlrose_hiive.runners import GARunner

## Randomized Hill Climbing (RHC)

In [None]:
from mlrose_hiive.runners import RHCRunner