# Dpi Setting

In [17]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100 # 300

In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
from package_sampling.sampling import up_tille, up_brewer, up_systematic, up_max_entropy
from package_sampling.utils import inclusion_probabilities


In [20]:
import os

print(os.getcwd())

/home/divar/projects/graphical-sampling


In [21]:
import os
os.chdir('/home/divar/projects/graphical-sampling')

### Dependencies

### Python

In [22]:
# !pip install -q git+https://github.com/mehdimhb/geometric-sampling@dev
# !pip install git+https://github.com/mehdimhb/geometric-sampling@legacy-measure

In [23]:
!apt-get install -y r-base
#!pip install -q rpy2 tqdm


E: Could not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)
E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?


In [24]:
from rpy2.robjects.packages import importr

import numpy as np
from rpy2.robjects import numpy2ri, default_converter, globalenv
from rpy2.robjects.conversion import localconverter

In [25]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


### R

In [26]:
%%R

#library(spcosa)
#ibrary(spsurvey)
if(!require(WaveSampling)){
    install.packages("WaveSampling")
    library(WaveSampling)
}
if(!require(sampling)){
    install.packages("sampling")
    library(sampling)
}
if(!require(BalancedSampling)){
    install.packages("BalancedSampling")
    library(BalancedSampling)
}

### Imports

In [27]:
import matplotlib as mpl
import numpy as np
from rpy2.robjects import r, numpy2ri
import rpy2.robjects as ro
from matplotlib import pyplot as plt
from itertools import combinations
from collections import OrderedDict
import pandas as pd
import geometric_sampling as gs
from tqdm import tqdm
from tqdm.contrib import tenumerate
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

#numpy2ri.activate()

rng = gs.random.rng()

## Functions

### Functions

In [28]:
autumn_cmap = plt.get_cmap('autumn')
plasma_cmap = plt.get_cmap('plasma')

def get_autumn_colors(n_clusters):
    """Returns n_clusters colors from the autumn colormap."""
    colors = autumn_cmap(np.linspace(0.15, 0.95, n_clusters))
    return colors

def get_plasma_colors(n_clusters):
    """Returns n_clusters colors from the plasma colormap."""
    colors = plasma_cmap(np.linspace(0.15, 0.95, n_clusters))
    return colors

# Example usage:
n = 10  # Number of clusters
n_clusters = n
import matplotlib.pyplot as plt
import numpy as np

def lighten_color(color, amount=0.5):
    import matplotlib.colors as mc
    import colorsys
    try:
        c = mc.cnames[color]
    except:
        c = color
    c = np.array(mc.to_rgb(c))
    white = np.array([1, 1, 1])
    return tuple((1 - amount) * c + amount * white)

n_clusters = n
base_colors = plt.cm.plasma(np.linspace(0, 1, n_clusters))
light_colors = [lighten_color(col, 0.5) for col in base_colors]

autumn_colors = get_autumn_colors(n_clusters)
plasma_colors = get_plasma_colors(n_clusters)
plasma_colors = light_colors

In [29]:
def plot_convex_hull(points, ax, color, alpha=0.33, edge_color="gray", line_width=0.6):
    """Plot convex hull safely. Always returns (ax, hull or None)."""
    if len(points) < 3:
        return ax, None
    try:
        hull = ConvexHull(points)
        polygon = Polygon(
            points[hull.vertices],
            closed=True,
            facecolor=color,
            alpha=alpha,
            edgecolor=edge_color,
            lw=line_width,
            zorder=1
        )
        ax.add_patch(polygon)
        return ax, hull
    except QhullError:
        # Can't make hull, skip
        return ax, None


# Simulations

### Methods

In [30]:
def scores(coords, probs, n, N, sample_idx, split_size, density_measure=None):
    assert len(coords) == len(probs) == N
    assert len(sample_idx) == n
    sample_mask = np.zeros(N, dtype=int)
    sample_mask[sample_idx] = 1

    with localconverter(default_converter + numpy2ri.converter):
        globalenv['sample_mask'] = sample_mask
        globalenv['sample_idx'] = sample_idx + 1  # Or int(sample_idx + 1) if it's just one value
        globalenv['coords'] = coords
        globalenv['probs'] = probs
        globalenv['n'] = n
        globalenv['N'] = N

    r_code = """
W <- wpik(coords,probs)
W <- W - diag(diag(W))
diag(W) <- 0

ib_value <- tryCatch({
  IB(W, sample_mask)
}, error = function(e) { Inf })

sb_value <- tryCatch({
  sb(probs, coords, sample_idx)
}, error = function(e) { Inf })

sblb_value <- tryCatch({
  sblb(probs, coords, sample_idx)
}, error = function(e) { Inf })
"""
    ro.r(r_code)
    IB_value = ro.r("ib_value")[0]
    SB_value = ro.r("sb_value")[0]
    SBLB_value = ro.r("sblb_value")[0]
    if density_measure is None:
        scaled_coords = (coords - np.min(coords, axis=0)) / np.ptp(coords, axis=0)
        density_measure = gs.measure.Density(scaled_coords, probs, n, split_size=0.001)
    scores_val = density_measure.score(sample_idx.reshape(1, -1))
    return scores_val[0], IB_value, SBLB_value

In [31]:
import numpy as np
import rpy2.robjects as ro
from rpy2.robjects import default_converter, numpy2ri
from rpy2.robjects.conversion import localconverter

def local_pivotal_samples(coords, probs, n, num_samples):
    samples_idx = np.zeros((num_samples, n), dtype=int)
    for i in range(num_samples):
        with localconverter(default_converter + numpy2ri.converter):
            ro.globalenv['coords'] = coords
            ro.globalenv['probs'] = probs
        r_code = """
s <- lpm(probs, coords) # Local Pivotal Method (BalancedSampling)
"""
        ro.r(r_code)
        with localconverter(default_converter + numpy2ri.converter):
            sample_idx = np.array(list(ro.r("s"))) - 1
        samples_idx[i] = sample_idx

    return samples_idx

def k_means_samples(coords, probs, n, num_samples, n_zones, sort_method):
    # Placeholder, replace with your actual method if necessary
    return gs.sampling.KMeansSpatialSamplingSimple(coords, probs, n=n, n_zones=n_zones, sort_method=sort_method, tolerance=2, split_size=0.001).sample(num_samples)

def random_samples(coords, probs, n, num_samples):
    # Placeholder, replace with your implementation if available
    return gs.sampling.RandomSampling(coords, probs, n=n).sample(num_samples)

def upmaxentropy_samples(probs, num_samples):
    with localconverter(default_converter + numpy2ri.converter):
        ro.globalenv['probs'] = probs
    n = int(round(np.sum(probs)))

    samples_idx = np.zeros((num_samples, n), dtype=int)
    for i in range(num_samples):
        r_code = """
mask <- UPmaxentropy(probs)
"""
        ro.r(r_code)
        with localconverter(default_converter + numpy2ri.converter):
            mask = np.array(ro.r("mask"))
        if mask.dtype != np.bool_:
            mask = mask.astype(bool)
        sample_idx = np.where(mask)[0]
        samples_idx[i] = sample_idx

    return samples_idx

def wave_samples(coords, probs, n, num_samples):
    with localconverter(default_converter + numpy2ri.converter):
        ro.globalenv['coords'] = coords
        ro.globalenv['probs'] = probs

    samples_idx = np.zeros((num_samples, n), dtype=int)
    for i in range(num_samples):
        r_code = """
wave_mask <- wave(coords, probs)
"""
        ro.r(r_code)
        with localconverter(default_converter + numpy2ri.converter):
            mask = np.array(ro.r("wave_mask"))
        if mask.dtype != np.bool_:
            mask = mask.astype(bool)
        sample_idx = np.where(mask)[0]
        samples_idx[i] = sample_idx

    return samples_idx

def spcosa_samples(coords, n, num_samples):
    """
    Uses the spcosa R package for equal probability spatially balanced sampling.
    """
    with localconverter(default_converter + numpy2ri.converter):
        ro.globalenv['coords'] = coords
    N = coords.shape[0]
    samples_idx = np.zeros((num_samples, n), dtype=int)
    for i in range(num_samples):
        r_code = f"""
library(spcosa)
set.seed({np.random.randint(1, 1e8)})
df <- data.frame(x = coords[,1], y = coords[,2])
coordinates(df) <- ~x + y
gridded(df) <- TRUE
stratification <- stratify(df, nStrata = {n}, nTry = 50)
samples <- spsample(stratification)
# Extract row indices in original data
whichrow <- as.integer(rownames(as(samples, "data.frame")))
"""
        ro.r(r_code)
        with localconverter(default_converter + numpy2ri.converter):
            sample_idx = np.array(ro.r("whichrow")) - 1  # zero-based for Python
        samples_idx[i] = sample_idx
    return samples_idx

def find_samples(coords, probs, n, num_samples, ep_mode="auto"):
    """
    Returns a dict of various sampling methods. 
    If ep_mode=="auto", heuristically deduce EP/UP from probs.
    """
    # Determine if this is the EP case: uniform probs and sum ~ n
    result = {
        "Local Pivotal": local_pivotal_samples(coords, probs, n, num_samples),
        "Random": random_samples(coords, probs, n, num_samples),
        "UPmaxentropy": upmaxentropy_samples(probs, num_samples),
        "Wave": wave_samples(coords, probs, n, num_samples),
    }
    return result

### Gathering Data

In [32]:
n = 4

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
folder = "data_samples/results"
os.makedirs(folder, exist_ok=True)
all_coords = []
all_probs = []
n = 4
names = [
    "grid_eq", "grid_uneq",
    "clust_eq", "clust_uneq",
    "random_eq", "random_uneq"
]

for name in names:
    df = pd.read_csv(f"data_samples/coords_probs/{name}.csv")
    coords = df[["x", "y"]].values
    probs = df["prob"].values
    with localconverter(default_converter + numpy2ri.converter):
        globalenv['probs'] = probs
        globalenv['n']     = n
        # this will overwrite the R variable `probs` with the output of the function
        r('probs <- inclusionprobabilities(probs, n)')
        # convert it back into a NumPy array
        new_probs = np.array(globalenv['probs'])

# 4) Replace your Python variable
    probs = new_probs


    all_coords.append(coords)
    all_probs.append(probs)
print(sum(probs))
print(len(probs))
sample_cnt = 1000
columns = ["Method", "Density", "Moran", "Local Balance"]

for coords, probs, name in zip(all_coords, all_probs, names):
    print('name:', name)
    n = int(np.round(np.sum(probs)))
    N =len(probs)
    # Any code for one set, put here:
    sample_methods = find_samples(coords, probs, n, sample_cnt)
    scaled_coords = (coords - np.min(coords, axis=0)) / np.ptp(coords, axis=0)
    density_measure = gs.measure.Density(scaled_coords, probs, n, split_size=0.001)
    print('density_measure:', density_measure)
    rows = np.array([
    [method, *scores(coords, probs, n, N, s, density_measure)]
    for method, samples in sample_methods.items()
    if samples is not None  # <----- add this
    for s in tqdm(samples, total=sample_cnt)
])
    df = pd.DataFrame(rows, columns=columns)
    df = df.astype({
        "Density": float,
        "Moran": float,
        "Voronoi": float
    })
    filename = os.path.join(folder, f"final_results_{name}_n={n}.csv")
    df.to_csv(filename, index=False)
    print(f"Saved: {filename}")

3.9999999999999782
400
name: grid_eq


Exception ignored from cffi callback <function _processevents at 0x76e11c4d28e0>:
Traceback (most recent call last):
  File "/home/divar/projects/graphical-sampling/.venv/lib/python3.12/site-packages/rpy2/rinterface_lib/callbacks.py", line 308, in _processevents
    @ffi_proxy.callback(ffi_proxy._processevents_def,
    
KeyboardInterrupt: 


In [None]:
import os
import pandas as pd

folder = "data_samples/results"
names = [
    "clust_eq", "clust_uneq",
    "grid_eq", "grid_uneq",
    "random_eq", "random_uneq"
]

for name in names:
    # File paths
    initial_result_path = os.path.join(folder, f"initial_results_{name}.csv")
    result_path = os.path.join(folder, f"results_{name}.csv")

    # Read initial results (with header)
    initial_df = pd.read_csv(initial_result_path)

    # Read simulation results (SKIP header, but use correct columns)
    result_df = pd.read_csv(result_path, skiprows=1, names=initial_df.columns)

    # Combine both, initial first, then simulation
    combined_df = pd.concat([initial_df, result_df], ignore_index=True)

    # Overwrite results file (no header duplication!)
    combined_df.to_csv(result_path, index=False)
    print(f"Overwritten: {result_path}")

Overwritten: data_samples/results/results_clust_eq.csv
Overwritten: data_samples/results/results_clust_uneq.csv
Overwritten: data_samples/results/results_grid_eq.csv
Overwritten: data_samples/results/results_grid_uneq.csv
Overwritten: data_samples/results/results_random_eq.csv
Overwritten: data_samples/results/results_random_uneq.csv


In [4]:
import pandas as pd

# 1) List of file‑name stems (must match the part after "results_meuse_" and before "_n=4.csv")
names = [
    "grid_eq",   "grid_uneq",
    "clust_eq",  "clust_uneq",
    "random_eq", "random_uneq"
]

# 2) Base folder where all your CSVs live
base_dir = "/home/divar/projects/graphical-sampling/data_samples/results"

# 3) Read each file, compute its means, and tag scenario + design
mean_dfs = []
for stem in names:
    path = f"{base_dir}/results_{stem}.csv"
    df   = pd.read_csv(path)
    df = df[~df["Method"].str.contains("K-Means")]

    # compute means of the three indexes
    mdf = (
        df
        .groupby("Method")[["Density","Moran","Local Balance"]]
        .mean()
        .reset_index()
    )
    
    # extract scenario & design from the file‑stem
    scenario = "equal"   if stem.endswith("_eq")   else "unequal"
    design   = stem.split("_")[0]  # "grid", "clust", or "random"
    
    mdf["Scenario"] = scenario
    mdf["Design"]   = design
    
    mean_dfs.append(mdf)

# 4) Concatenate into one table
result = pd.concat(mean_dfs, ignore_index=True)

# 5) Reorder columns for clarity
result = result[[
    "Design", "Scenario", "Method",
    "Density", "Moran", "Local Balance"
]]

# 6) Print as markdown (or use result.to_csv(...) / display however you like)
print(result.to_markdown(index=False))


| Design   | Scenario   | Method        |     Density |      Moran |   Local Balance |
|:---------|:-----------|:--------------|------------:|-----------:|----------------:|
| grid     | equal      | Local Pivotal | -0.103637   | -0.122935  |        0.382222 |
| grid     | equal      | Random        | -0.396706   | -0.0434621 |        0.496185 |
| grid     | equal      | UPmaxentropy  | -0.395446   | -0.0449588 |        0.492392 |
| grid     | equal      | cube          | -0.412348   | -0.0397319 |        0.501449 |
| grid     | equal      | wave          |  0.0266841  | -0.197509  |        0.349212 |
| grid     | unequal    | Local Pivotal | -0.263398   | -0.101291  |        0.735206 |
| grid     | unequal    | Random        | -0.352008   | -0.0335465 |        8.97953  |
| grid     | unequal    | UPmaxentropy  | -0.482192   | -0.028425  |        0.839956 |
| grid     | unequal    | cube          | -0.339907   | -0.0389685 |       15.8776   |
| grid     | unequal    | wave          | -