# Imports

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import glob
import graphical_sampling as gs
import pandas as pd
import numpy as np
import itertools
from tqdm import tqdm
from package_sampling.utils import inclusion_probabilities



# Loading and Determining Population

In [4]:
DATA_DIR = "populations"
csv_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))

coords_dict = {}
probs_dict = {}

for fp in csv_paths:
    name = os.path.splitext(os.path.basename(fp))[0]
    data = np.loadtxt(fp, delimiter=",", skiprows=1)
    coords = data[:, :2]
    probs  = data[:, -1]

    coord_name, prob_name, *rest = name.split("_")
    coord_name = 'cluster' if coord_name == 'clust' else coord_name
    prob_name = 'equal' if prob_name == 'eq' else 'unequal'

    coords_dict[coord_name] = coords
    probs_dict[coord_name] = probs_dict.get(coord_name, {})
    probs_dict[coord_name][prob_name] = probs

print(coords_dict.keys())
print(probs_dict.keys())
print(probs_dict['random'].keys())

dict_keys(['swiss', 'cluster', 'meuse', 'random', 'grid'])
dict_keys(['swiss', 'cluster', 'meuse', 'random', 'grid'])
dict_keys(['equal', 'unequal'])


In [5]:
N = 100
n = 15
coords = coords_dict['meuse']
probs = probs_dict['meuse']['equal']
modified_probs = inclusion_probabilities(probs, n=n)
pop = gs.Population(coords, modified_probs)

# Building Initial Designs

In [9]:
orders = [
    # "lexico-yx",
    "lexico-xy",
    # "random",
    # "angle_0",
    "distance_0",
    "projection",
    # "center",
    "spiral",
    "max",
    "snake",
    "hilbert",
]

In [None]:
initial_designs = []
combines = list(itertools.product(orders, orders))
num_trials = 2
for units_order, zones_order in tqdm(combines, desc="Generating initial designs", total=len(combines), unit="orders"):
    best = None
    best_score = np.inf
    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=(2, 2),
            zone_builder='sweep',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=4,
            zone_builder='cluster',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=(1, 1),
            zone_builder='sweep',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))

    for _ in range(num_trials):
        ks = gs.sampling.KMeansSampler(
            population=pop,
            n=n,
            n_zones=3,
            zone_builder='cluster',
            units_order=units_order,
            zones_order=zones_order,
            split_size=0.001
        )
        if ks.expected_moran_score() < best_score:
            best = ks
            best_score = ks.expected_moran_score()

    initial_designs.append(gs.NewDesign(best))


Generating initial designs:   2%|▏         | 1/49 [00:15<12:14, 15.30s/orders]

In [65]:
for design in initial_designs:
    print(design.kmeans.all_samples.shape, design.kmeans.expected_moran_score())

(8, 15) -0.39707465639616435
(8, 15) -0.39707465639616435
(8, 15) -0.39707465639616435
(10, 15) -0.3557123143767697
(8, 15) -0.36751426386191205
(8, 15) -0.36751426386191205
(8, 15) -0.29755027097419867
(8, 15) -0.3730747089819494
(8, 15) -0.3730747089819494
(9, 15) -0.3929755333256541
(9, 15) -0.3929755333256541
(9, 15) -0.3929755333256541
(6, 15) -0.24211979291518318
(8, 15) -0.37280822807561403
(8, 15) -0.37280822807561403
(8, 15) -0.39707465639616435
(8, 15) -0.39707465639616435
(8, 15) -0.39707465639616435
(8, 15) -0.27007561498221755
(8, 15) -0.33923474783055985
(8, 15) -0.33923474783055985
(10, 15) -0.40645880331091766
(10, 15) -0.40645880331091766
(10, 15) -0.40645880331091766
(10, 15) -0.35803559126953166
(9, 15) -0.36035021935386685
(9, 15) -0.36035021935386685
(11, 15) -0.38887723440056926
(11, 15) -0.38887723440056926
(11, 15) -0.38887723440056926
(9, 15) -0.3949806730318942
(9, 15) -0.3949806730318942
(9, 15) -0.3949806730318942
(10, 15) -0.3429148581118533
(8, 15) -0.3638

In [66]:
ks: gs.sampling.KMeansSampler = initial_designs[-1].kmeans
ks.all_samples

array([[128,  97,  34, 101,  38, 104, 137,  11, 110,  46, 147, 115, 122,
         91,  94],
       [129, 153,   1,   5, 136,  40,  74, 107, 140,  79,  48, 114,  52,
        121,  27],
       [ 96,  34, 105,  10,  74, 110, 142,  15, 145,  92,  19, 154, 124,
         61, 126],
       [102,   7,  73,  10,  42, 111,  82, 116, 149,  22, 119,  54, 152,
         94,  31],
       [ 96,  34, 105,  74, 110, 142,  15, 145,  29,  19,  92, 154, 124,
         61, 126],
       [ 32, 128,  97,  68, 101,  38, 104, 137,  11,  46, 147, 115, 122,
         91,  94],
       [102,   7,  73,  10,  42, 111,  49,  82, 116, 149,  20,  54, 152,
         94,  31],
       [102,   7,  73,  10,  42, 111,  82, 116, 149,  20, 119,  54, 152,
         94,  31],
       [129, 153,   1,   5, 136,  40,  74, 107, 140,  79,  48,  84, 121,
         27, 124],
       [102,   7,  73,  42,  11, 111,  49,  82, 116, 149,  20,  54, 152,
         94,  31],
       [129, 153,   1,   5, 136,  40,  74, 107, 140,  79,  48,  52,  84,
       

In [68]:
x = 1
for s, p in zip(ks.all_samples, ks.all_samples_probs):


[128  97  34 101  38 104 137  11 110  46 147 115 122  91  94] 0.16162288000000002
[129 153   1   5 136  40  74 107 140  79  48 114  52 121  27] 0.16062520800000005
[ 96  34 105  10  74 110 142  15 145  92  19 154 124  61 126] 0.16162288
[102   7  73  10  42 111  82 116 149  22 119  54 152  94  31] 0.080894579
[ 96  34 105  74 110 142  15 145  29  19  92 154 124  61 126] 0.08837711999999999
[ 32 128  97  68 101  38 104 137  11  46 147 115 122  91  94] 0.08837711999999999
[102   7  73  10  42 111  49  82 116 149  20  54 152  94  31] 0.015962754
[102   7  73  10  42 111  82 116 149  20 119  54 152  94  31] 0.08081144000000001
[129 153   1   5 136  40  74 107 140  79  48  84 121  27 124] 0.08081144000000001
[102   7  73  42  11 111  49  82 116 149  20  54 152  94  31] 0.072331227
[129 153   1   5 136  40  74 107 140  79  48  52  84 121  27] 0.008480213
[129 153   1   5 136  40  74 107 140  79  48 114  52 121  26] 8.3139e-05


# Run

In [49]:
moran_criteria = gs.criteria.MoranCriteria()

In [50]:
astar = gs.search.AStar(
    initial_designs,
    moran_criteria
)

best initial criteria value -0.5175787246740949


In [51]:
astar.run(
    max_iterations = 1000,
    num_new_nodes = 10,
    max_open_set_size = 1000,
    n_clusters_to_change_order_zone = 'None',
    n_clusters_to_change_order_units = 'all',
    n_zones_to_change_order_units = 'all',
    n_changes_in_order_of_units = 1,
    n_changes_in_order_of_zones = 1,
)

Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949

New best criteria value: -0.5252990012754026
Criteria of current node: -0.5252990012754026
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.5175787246740949
Criteria of current node: -0.502300616098327
Criteria of current node: -0.48401532242622414
Criteria of current node: -0.4733015224859966
Criteria of current node: -0.47131112712576895
Criteria of current node: -0.4659844674194494
Criteria of current node: -0.4659

1000

In [60]:
astar.best_criteria_value

-0.5261627262670661

In [61]:
astar.best_design.kmeans.all_samples

array([[132, 134,   9,  41, 139, 108,  13, 144,  18, 114,  86, 121, 123,
         92, 127]])

In [62]:
astar.best_design.kmeans.all_samples_probs

array([1.])