# Genetic Algorithm

## Hyperparams Optimization

### Setup
Change directory to the root folder to be able to import modules.

In [None]:
import os

THIS_FOLDER = os.path.dirname(os.path.realpath("__file__"))
ROOT_FOLDER = os.path.dirname(THIS_FOLDER)
os.chdir(ROOT_FOLDER)

### Load Players Data

In [None]:
from tests.helper import load_players

players = load_players()
print(f"There are {len(players)} players in this set.")

### Metric
Hyperparam needs a metric that reflects the goal.
The goal is to have the algorithm running enough time to find the best result, but not longer than the minimum necessary.

The 3D plot confirms that the metric is higher (darker) where points are higher, variance is lower and time elapsed is lower.

In [None]:
import itertools

import numpy as np
import pandas as pd
import plotly.express as px

MAX_POINTS_MEAN = 125  # Approx. value extracted from max_value.ipynb
MAX_STD_MEAN = 1
MAX_TIME_ELAPSED = 1   # Seconds


def metric(points_mean, points_std, time_elapsed):
    """Optimization metric."""
    return np.product(
        (
            np.sinh(points_mean / MAX_POINTS_MEAN),
            np.tanh(MAX_STD_MEAN / points_std),
            np.tanh(MAX_TIME_ELAPSED / time_elapsed),
        ),
        axis=0,
    )


points_mean_arr = np.linspace(start=MAX_POINTS_MEAN, stop=0, num=9, endpoint=False)
points_std_arr = np.linspace(start=MAX_STD_MEAN, stop=0, num=9, endpoint=False)
time_elapsed_arr = np.linspace(start=MAX_TIME_ELAPSED, stop=0, num=9, endpoint=False)

# Cross product and transpose.
data = zip(*itertools.product(points_mean_arr, points_std_arr, time_elapsed_arr))
df = pd.DataFrame(data, index=["Points Mean", "Points Std", "Time Elapsed"]).transpose()
df["Metric"] = metric(df["Points Mean"], df["Points Std"], df["Time Elapsed"])

fig = px.scatter_3d(
    df,
    x="Points Mean",
    y="Points Std",
    z="Time Elapsed",
    color="Metric",
    color_continuous_scale=px.colors.sequential.Oranges,
    template="plotly_white",
)
fig.update_traces(marker=dict(size=15, line=dict(width=0)))
fig.update_layout(
    scene=dict(
        xaxis=dict(showbackground=False),
        yaxis=dict(showbackground=False),
        zaxis=dict(showbackground=False),
    ),
)
fig.show()


### Tuning

In [None]:
import time

import optuna
import optuna.logging

from cartola_draft import Scheme
from cartola_draft.algorithm.genetic import Genetic
from tests.helper import SCHEMES_COUNTING

optuna.logging.set_verbosity(optuna.logging.ERROR)

BUDGET = 50
SCHEME = Scheme(SCHEMES_COUNTING[433])


def algo_factory(
    n_generations,
    n_individuals,
    tournament_size_ratio,
    n_tournament_winners_ratio,
    max_n_mutations,
):
    """Create genetic algorithm insance."""

    tournament_size = tournament_size_ratio * n_individuals
    tournament_size = int(np.clip(round(tournament_size, 0), 2, n_individuals - 1))

    n_tournament_winners = n_tournament_winners_ratio * tournament_size
    n_tournament_winners = int(np.clip(round(n_tournament_winners, 0), 1, tournament_size))

    return Genetic(
        players=players,
        n_generations=n_generations,
        n_individuals=n_individuals,
        tournament_size=tournament_size,
        n_tournament_winners=n_tournament_winners,
        max_n_mutations=max_n_mutations,
    )


def objective(trial):
    """Function to be optimized by optuna."""

    n_generations = trial.suggest_int("n_generations", 8, 512)
    n_individuals = trial.suggest_int("n_individuals", 8, 512)
    tournament_size_ratio = trial.suggest_float("tournament_size_ratio", 0, 1)
    n_tournament_winners_ratio = trial.suggest_float("n_tournament_winners_ratio", 0, 1)
    max_n_mutations = trial.suggest_int("max_n_mutations", 1, 12)

    algo = algo_factory(
        n_generations,
        n_individuals,
        tournament_size_ratio,
        n_tournament_winners_ratio,
        max_n_mutations,
    )

    n_times = 10

    # Do it many times to avoid variance bias the final decision.
    start = time.time()
    results = [algo.draft(BUDGET, SCHEME).points for _ in range(n_times)]
    end = time.time()

    time_elapsed = (end - start) / n_times

    return metric(np.mean(results), np.std(results), time_elapsed)


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=500, show_progress_bar=True)

study.best_params

In [None]:
algo = algo_factory(
        study.best_params["n_generations"],
        study.best_params["n_individuals"],
        study.best_params["tournament_size_ratio"],
        study.best_params["n_tournament_winners_ratio"],
        study.best_params["max_n_mutations"],
    )
start = time.time()
line_up = algo.draft(BUDGET, SCHEME)
end = time.time()
print(f"Points = {line_up.points}")
print(f"Price = {line_up.price}")
print(f"Time = {end - start}")

### Analyze Results

The next plot is used to check if there were enough trials to converge the objective value.

In [None]:
import optuna.visualization

fig = optuna.visualization.plot_optimization_history(study)
fig.show()

The importances plot helps tos understand which params really matters.

In [None]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()

The parallel coordinates plot can be confused at a first glance, but it is really helpfull to quick visualize if a params works better with higher or lower values.

In [None]:
optuna.visualization.plot_parallel_coordinate(study)