# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import glob
import graphical_sampling as gs
import pandas as pd
import numpy as np
import itertools
from tqdm import tqdm
from package_sampling.utils import inclusion_probabilities



# Loading and Determining Population

In [3]:
DATA_DIR = "populations"
csv_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))

coords_dict = {}
probs_dict = {}

for fp in csv_paths:
    name = os.path.splitext(os.path.basename(fp))[0]
    data = np.loadtxt(fp, delimiter=",", skiprows=1)
    coords = data[:, :2]
    probs  = data[:, -1]

    coord_name, prob_name, *rest = name.split("_")
    coord_name = 'cluster' if coord_name == 'clust' else coord_name
    prob_name = 'equal' if prob_name == 'eq' else 'unequal'

    coords_dict[coord_name] = coords
    probs_dict[coord_name] = probs_dict.get(coord_name, {})
    probs_dict[coord_name][prob_name] = probs

print(coords_dict.keys())
print(probs_dict.keys())
print(probs_dict['random'].keys())

dict_keys(['swiss', 'cluster', 'meuse', 'random', 'grid'])
dict_keys(['swiss', 'cluster', 'meuse', 'random', 'grid'])
dict_keys(['equal', 'unequal'])


In [24]:
N = 100
n = 8
coords = coords_dict['random']
probs = probs_dict['random']['equal']
modified_probs = inclusion_probabilities(probs, n=n)
pop = gs.Population(coords, modified_probs)

# Building Initial Designs

In [25]:
orders = [
    "lexico-yx",
    "lexico-xy",
    "random",
    "angle_0",
    "distance_0",
    "projection",
    "center",
    "spiral",
    "max",
    "snake",
    "hilbert",
]

In [26]:
initial_designs = []
combines = list(itertools.product(orders, orders))
num_trials = 5
for units_order, zones_order in tqdm(combines, desc="Generating initial designs", total=len(combines), unit="orders"):
    best = None
    best_score = np.inf
    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=(2, 2),
            zone_builder='sweep',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=4,
            zone_builder='cluster',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))


Generating initial designs: 100%|██████████| 121/121 [11:51<00:00,  5.88s/orders]


# Run

In [27]:
moran_criteria = gs.criteria.MoranCriteria()

In [28]:
astar = gs.search.AStar(
    initial_designs,
    moran_criteria
)

best initial criteria value -0.32535592415203857


In [29]:
astar.run(
    max_iterations = 1000,
    num_new_nodes = 5,
    max_open_set_size = 1000,
    n_clusters_to_change_order_zone = 'None',
    n_clusters_to_change_order_units = 'all',
    n_zones_to_change_order_units = 'all',
    n_changes_in_order_of_units = 1,
    n_changes_in_order_of_zones = 1,
)

Criteria of current node: -0.32535592415203857

New best criteria value: -0.3316931945159151

New best criteria value: -0.33344992986895167
Criteria of current node: -0.33344992986895167
Criteria of current node: -0.3316931945159151
Criteria of current node: -0.32535592415203857
Criteria of current node: -0.3210586564780824
Criteria of current node: -0.3210586564780824
Criteria of current node: -0.3183134272018159
Criteria of current node: -0.31743199685281154
Criteria of current node: -0.31743199685281154
Criteria of current node: -0.330943574148691
Criteria of current node: -0.323545439385002
Criteria of current node: -0.31829298078432594
Criteria of current node: -0.31450180722488164
Criteria of current node: -0.31450180722488164
Criteria of current node: -0.3322427619948967
Criteria of current node: -0.32570320030533506
Criteria of current node: -0.311146885984879
Criteria of current node: -0.30863330212043727
Criteria of current node: -0.30829040692874854
Criteria of current node:

1000

In [30]:
astar.best_criteria_value

-0.40223063284406996