# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import glob
import graphical_sampling as gs
import pandas as pd
import numpy as np
import itertools
from tqdm import tqdm
from package_sampling.utils import inclusion_probabilities



# Loading and Determining Population

In [3]:
DATA_DIR = "populations"
csv_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))

coords_dict = {}
probs_dict = {}

for fp in csv_paths:
    name = os.path.splitext(os.path.basename(fp))[0]
    data = np.loadtxt(fp, delimiter=",", skiprows=1)
    coords = data[:, :2]
    probs  = data[:, -1]

    coord_name, prob_name, *rest = name.split("_")
    coord_name = 'cluster' if coord_name == 'clust' else coord_name
    prob_name = 'equal' if prob_name == 'eq' else 'unequal'

    coords_dict[coord_name] = coords
    probs_dict[coord_name] = probs_dict.get(coord_name, {})
    probs_dict[coord_name][prob_name] = probs

print(coords_dict.keys())
print(probs_dict.keys())
print(probs_dict['random'].keys())

dict_keys(['swiss', 'cluster', 'meuse', 'random', 'grid'])
dict_keys(['swiss', 'cluster', 'meuse', 'random', 'grid'])
dict_keys(['equal', 'unequal'])


In [4]:
N = 100
n = 4
pop_str = 'random'
prob_str = 'equal'
coords = coords_dict[pop_str]
probs = probs_dict[pop_str][prob_str]
modified_probs = inclusion_probabilities(probs, n=n)
pop = gs.Population(coords, modified_probs)

# Building Initial Designs

In [5]:
orders = [
    # "lexico-yx",
    "lexico-xy",
    # "random",
    # "angle_0",
    "distance_0",
    "projection",
    # "center",
    "spiral",
    "max",
    "snake",
    "hilbert",
]

In [6]:
initial_designs = []
combines = list(itertools.product(orders, orders))
num_trials = 2
for units_order, zones_order in tqdm(combines, desc="Generating initial designs", total=len(combines), unit="orders"):
    best = None
    best_score = np.inf
    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=(2, 2),
            zone_builder='sweep',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=4,
            zone_builder='cluster',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=(1, 1),
            zone_builder='sweep',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=3,
            zone_builder='cluster',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))


Generating initial designs: 100%|██████████| 49/49 [01:01<00:00,  1.25s/orders]


# Run

In [7]:
moran_criteria = gs.criteria.MoranCriteria()

In [8]:
initial_criteria_value = np.array([moran_criteria(design) for design in initial_designs])
best_design = initial_designs[np.argmax(initial_criteria_value)]
best_criteria_value = np.min(initial_criteria_value)
best_criteria_value

np.float64(-0.38479718690192966)

In [12]:
astar = gs.search.AStar(
    initial_designs,
    moran_criteria
)

best initial criteria value -0.38479718690192966


In [None]:
astar.run(
    max_iterations = 1000,
    num_new_nodes = 30,
    max_open_set_size = 1000,

    n_clusters_to_change_order_zone = 0,
    n_changes_in_order_of_zones = 0,

    n_clusters_to_change_order_units = 1,
    n_zones_to_change_order_units = 1,
    n_changes_in_order_of_units = 1,

    n_jobs=-1
)


parent node: -0.38479718690192966




child node: -0.38423723411872407
child node: -0.38183367409655367
child node: -0.38564900070186187

New best criteria value: -0.38564900070186187
child node: -0.38230174686423474
child node: -0.38489307613663143
child node: -0.3846425138801379
child node: -0.3850522921006348
child node: -0.38365090017338616
child node: -0.38091669024229824
child node: -0.37976556615980134
child node: -0.38139721362987755
child node: -0.38205200090158764
child node: -0.3820544811421237
child node: -0.38704361349600513

New best criteria value: -0.38704361349600513
child node: -0.3849571337556049
child node: -0.3854375736389345
child node: -0.37804375487491654
child node: -0.3846480739680969
child node: -0.3836324379134489
child node: -0.37945033585055266
child node: -0.385657391152081
child node: -0.3858294147757168
child node: -0.3836655941051219
child node: -0.3842085461115503
child node: -0.3764296704644623
child node: -0.3821748846840967

parent node: -0.38704361349600513
child node: -0.383567753597

In [None]:
astar.best_criteria_value

In [None]:
astar.best_design.kmeans.all_samples

In [None]:
np.sum(astar.best_design.kmeans.all_samples_probs)

In [None]:
np.mean(np.abs(astar.best_design.kmeans.fips - modified_probs))