# Static tuning

In [1]:
import glob
import itertools
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
import tomli

In [2]:
DATA_DIR = Path("data/")

In [11]:
def res2data(where: Path):
    with open(where, "r") as fh:
        data = fh.readlines()
    
    seed = int(where.name.removesuffix(".txt"))

    # Only rows with instance names (we use GH200 and GH400).
    data = [row for row in data if "_2_" in row or "_4_" in row]
    rows = [datum.strip().split() for datum in data]

    return [
        (inst, seed, float(cost), int(iters))
        for inst, feas, cost, iters, time in rows
    ]

def get_size(instance: str):
    return 200 if "_2_" in instance else 400

def read(group: str, exp: int):
    instances = DATA_DIR.glob(f"{group}/{exp}.txt")
    records = itertools.chain.from_iterable([res2data(inst) for inst in instances])

    df = pd.DataFrame(records, columns=["instance", "seed", "cost", "iters"])
    df["exp"] = exp
    df["size_group"] = df["instance"].apply(get_size)

    with open(DATA_DIR / f"{group}/{exp}.toml", "rb") as fh:
        config = tomli.load(fh)

    for param, value in config[group].items():
        df[param] = value

    # Ignore scenarios with infeasible solutions.
    if df['cost'].mean() > 1_000_000:
        return pd.DataFrame()
        
    return df

## Population parameters

In [13]:
dfs = [read("population", exp) for exp in range(1, 501)]
df = pd.concat(dfs)
df.head()

data/population/1.txt
data/population/2.txt
data/population/3.txt
data/population/4.txt
data/population/5.txt
data/population/6.txt
data/population/7.txt
data/population/8.txt
data/population/9.txt
data/population/10.txt
data/population/11.txt
data/population/12.txt
data/population/13.txt
data/population/14.txt
data/population/15.txt
data/population/16.txt
data/population/17.txt
data/population/18.txt
data/population/19.txt
data/population/20.txt
data/population/21.txt
data/population/22.txt
data/population/23.txt
data/population/24.txt
data/population/25.txt
data/population/26.txt
data/population/27.txt
data/population/28.txt
data/population/29.txt
data/population/30.txt
data/population/31.txt
data/population/32.txt
data/population/33.txt
data/population/34.txt
data/population/35.txt
data/population/36.txt
data/population/37.txt
data/population/38.txt
data/population/39.txt
data/population/40.txt
data/population/41.txt
data/population/42.txt
data/population/43.txt
data/population/44.t

Unnamed: 0,instance,seed,cost,iters,exp,size_group,min_pop_size,generation_size,nb_elite,nb_close,lb_diversity,ub_diversity
0,C1_2_1,1,26986.0,284,1,200,25,40,4,5,0.1,0.5
1,C1_2_10,1,26557.0,264,1,200,25,40,4,5,0.1,0.5
2,C1_2_2,1,26943.0,309,1,200,25,40,4,5,0.1,0.5
3,C1_2_3,1,26784.0,311,1,200,25,40,4,5,0.1,0.5
4,C1_2_4,1,26487.0,205,1,200,25,40,4,5,0.1,0.5


In [None]:
fields = [
    "min_pop_size",
    "generation_size",
    "nb_elite",
    "nb_close",
    "lb_diversity",
    "ub_diversity",
]

In [None]:
for field in fields:
    df.cost.groupby([df[field], df["size_group"]]).mean().unstack().plot(subplots=True);    

# Analysis

- `min_pop_size` and `generation_size` are important (which makes a lot of sense).
- `nb_elite` and `nb_close` don't matter too much, but I have chosen bad parameter bounds. (This is due to some error when nb_elite and nb_close are smaller than the population size.)
- `lb_diversity` and `ub_diversity` don't seem to matter.

In [9]:
mean_value = df[df.exp == 1].cost.mean()
print(f"{1:>3}  ----  {mean_value:.0f} (default)") 

for exp in range(2, 501):
    data = df[df.exp == exp]

    if data.empty:
        continue
        
    test = stats.ttest_1samp(data.cost, mean_value, alternative='less')
    mean = data.cost.mean()

    if mean <= mean_value and test.pvalue < 0.10:
        print(f"{exp:>3}  {test.pvalue:.2f}  {mean:.0f}")

NameError: name 'df' is not defined

Let's go for experiment 378.

```
[population]
min_pop_size = 5
generation_size = 3
nb_elite = 2
nb_close = 2
lb_diversity = 0.11698396793587175
ub_diversity = 0.34994989979959923
```

## Neighbourhood parameters

In [10]:
dfs = [read("neighbourhood", exp) for exp in range(1, 501)]
df = pd.concat(dfs)
df.head()

Unnamed: 0,instance,seed,cost,iters,exp,size_group,weight_wait_time,weight_time_warp,nb_granular,symmetric_proximity,symmetric_neighbours
0,C1_2_1,1,26986.0,323,1,200,0.2,1.0,20,True,False
1,C1_2_10,1,26419.0,267,1,200,0.2,1.0,20,True,False
2,C1_2_2,1,26943.0,300,1,200,0.2,1.0,20,True,False
3,C1_2_3,1,26776.0,282,1,200,0.2,1.0,20,True,False
4,C1_2_4,1,26322.0,237,1,200,0.2,1.0,20,True,False


In [None]:
fields = [
    "weight_wait_time",
    "weight_time_warp",
    "nb_granular",
    "symmetric_proximity",
    "symmetric_neighbours",
]

In [None]:
for field in fields:
    df.cost.groupby([df[field], df["size_group"]]).mean().unstack().plot(subplots=True);    

In [None]:
mean_value = df[df.exp == 1].cost.mean()
print(f"{1:>3}  ----  {mean_value:.0f} (default)") 

for exp in range(2, 501):
    data = df[df.exp == exp]

    if data.empty:
        continue
        
    test = stats.ttest_1samp(data.cost, mean_value, alternative='less')
    mean = data.cost.mean()

    if mean <= mean_value and test.pvalue < 0.05:
        print(f"{exp:>3}  {test.pvalue:.2f}  {mean:.0f}")

No significant improvements. But it's pretty clear that decreasing the number of neighbours low to 20 is beneficial for the 200 size instances, and doesn't hurt the 400 customer instances.

## Penalty parameters

In [None]:
dfs = [read("penalty", exp) for exp in range(1, 501)]
df = pd.concat(dfs)
df.head()

In [None]:
fields = [
    "init_capacity_penalty",
    "init_time_warp_penalty",
    "repair_booster",
    "num_registrations_between_penalty_updates",
    "penalty_increase",
    "penalty_decrease",
    "target_feasible",
]

In [None]:
for field in fields:
    df.cost.groupby([df[field], df["size_group"]]).mean().unstack().plot(subplots=True);    

Is there any significant improvement?

In [None]:
mean_value = df[df.exp == 1].cost.mean()
print(f"{1:>3}  ----  {mean_value:.0f} (default)") 

for exp in range(2, 501):
    data = df[df.exp == exp]

    if data.empty:
        continue
        
    test = stats.ttest_1samp(data.cost, mean_value, alternative='less')
    mean = data.cost.mean()

    if mean <= mean_value and test.pvalue < 0.05:
        print(f"{exp:>3}  {test.pvalue:.2f}  {mean:.0f}")

Nothing significant here, so we'll keep the default values.

## Node operators

In [None]:
dfs = [read("node_ops", exp) for exp in range(1, 501)]
df = pd.concat(dfs)
df.head()

In [None]:
fields = [
    "Exchange10",
    "Exchange11",
    "Exchange20",
    "MoveTwoClientsReversed",
    "Exchange21",
    "Exchange22",
    "Exchange30",
    "Exchange31",
    "Exchange32",
    "Exchange33",
    "TwoOpt",
]

In [None]:
for field in fields:
    df.cost.groupby([df[field], df["size_group"]]).mean().unstack().plot(subplots=True);    

In [None]:
mean_value = df[df.exp == 1].cost.mean()
print(f"{1:>3}  ----  {mean_value:.0f} (default)") 

for exp in range(2, 501):
    data = df[df.exp == exp]

    if data.empty:
        continue
        
    test = stats.ttest_1samp(data.cost, mean_value, alternative='less')
    mean = data.cost.mean()

    if mean <= mean_value and test.pvalue < 0.05:
        print(f"{exp:>3}  {test.pvalue:.2f}  {mean:.0f}")

None of the scenarios is significant. But only (1, 0) and TwoOpt seem useful. So I'll just run that.

## Route operators

In [None]:
dfs = [read("route_ops", exp) for exp in range(1, 6)]
df = pd.concat(dfs)
df.head()

In [None]:
fields = [
    "RelocateStar",
    "SwapStar",
]

In [None]:
for field in fields:
    df.cost.groupby([df[field], df["size_group"]]).mean().unstack().plot(subplots=True);    

In [None]:
mean_value = df[df.exp == 1].cost.mean()
print(f"{1:>3}  ----  {mean_value:.0f} (default)") 

for exp in range(2, 501):
    data = df[df.exp == exp]

    if data.empty:
        continue
        
    test = stats.ttest_1samp(data.cost, mean_value, alternative='less')
    mean = data.cost.mean()

    print(f"{exp:>3}  {test.pvalue:.2f}  {mean:.0f}")