# Packages

In [1]:
import matplotlib.pyplot as plt
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import numpy as np
from rpy2.robjects import r, pandas2ri

%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython


import sys
sys.path.append('/home/divar/projects/geometric-sampling')
import geometric_sampling

import geometric_sampling as gs
from geometric_sampling.search.astar import AStar
from geometric_sampling.design import Design
from geometric_sampling.criteria.var_nht import VarNHT



In [2]:
%%R
#install.packages("sampling")
library(sampling)

# Reading Data

In [3]:
# Python cell ----

import pandas as pd

# Read config file
params = pd.read_csv("config_origin.csv")

# Extract all columns as regular Python variables (from row 0)
for col in params.columns:
    globals()[col] = params.at[0, col]

# Now you have: bardia_balance_method, num_new_nodes, ... etc. as plain Python variables

In [4]:
%%R
# Read regular data (returns data.frame)

df <- read.csv("cor_zy_7.5_py_9.5.csv")

y   <- df$y
z   <- df$z
pik <- df$pik

# Read "extra" data (returns data.frame of one row; use as.list to extract)
extra <- read.csv("cor_zy_7.5_py_9.5_extra.csv")
 list2env(extra, .GlobalEnv)


<environment: R_GlobalEnv>


# Astar

In [5]:
y = np.array(ro.r['y'])
z = np.array(ro.r['z'])
n = np.array(ro.r['n'])
N = np.array(ro.r['N'])

inclusions = np.array(ro.r['pik'])

threshold_z = float(ro.r['var_ht_z'][0])
threshold_y = float(ro.r['var_ht_y'][0])

threshold_z_cube = float(ro.r['var_cube_z'][0])
threshold_y_cube = float(ro.r['var_cube_y'][0])

inclusions = np.array(inclusions)



rng = np.random.default_rng()  # Recommended for controlled/stable random runs

print(max_iterations, num_new_nodes, max_open_set_size, switch_lower, switch_upper)

criterion = VarNHT(main_variable=y, auxiliary_variable=z, 
                   inclusion_probability=inclusions, balance_method=bardia_balance_method)

astar = AStar(
    inclusions              = inclusions,
    num_initial_nodes       = num_initial_nodes,
    initial_design_to_use   = initial_design_to_use,
    criteria                = criterion,
    z                       = z,
    y                       = y,
    threshold_z             = threshold_z,
    threshold_y             = threshold_y,
    threshold_z_cube             = threshold_z_cube,
    threshold_y_cube             = threshold_y_cube,
    switch_lower            = switch_lower,
    switch_upper            = switch_upper,
    num_changes_lower       = num_changes_lower,
    num_changes_upper       = num_changes_upper,
    show_results            = show_results,
    var_percent_exected     = var_percent_exected,
    swap_iterations         = swap_iterations,
    swap_distance           = swap_distance,
    swap_units              = swap_units,
    rng                     = rng,                         
)

result = astar.run(
    max_iterations             = max_iterations,
    num_new_nodes              = num_new_nodes,
    max_open_set_size          = max_open_set_size,
    random_restart_period      = random_restart_period,
    random_injection_count     = random_injection_count,
    prune_fraction             = prune_fraction,
    num_top_restart_nodes      = num_top_restart_nodes,
    stuck_fraction             = stuck_fraction,

)

print("Best criteria value:", astar.best_criteria_value)
print("Iterations run:", result)
print("Best criteria value:", astar.best_criteria_value)
print("Iterations run:", result)
# print(np.round(var_z_0, 1))
# print(np.round(var_y_0, 1))
astar.initial_design.show()
astar.best_design.show()

5000 30 200000 0.7 0.9


Generating initial designs:   0%|                      | 0/3000 [00:00<?, ?it/s]

Initial design 0: method = Original, efficiency z = 0.1843, y=0.7838
New best at idx=0: method = Original, efficiency z = 0.1843 and efficiency y = 0.7838
New best at idx=2: method = z/pi, efficiency z = 0.1843 and efficiency y = 0.7838
New best at idx=3: method = swap, efficiency z = 0.1997 and efficiency y = 0.7048
New best at idx=4: method = swap, efficiency z = 0.2032 and efficiency y = 0.7575
New best at idx=5: method = swap, efficiency z = 0.2155 and efficiency y = 0.7487


Generating initial designs:   2%|▏           | 51/3000 [00:00<00:23, 126.28it/s]

New best at idx=21: method = swap, efficiency z = 0.2159 and efficiency y = 0.5860
New best at idx=23: method = swap, efficiency z = 0.2191 and efficiency y = 0.8001
New best at idx=37: method = swap, efficiency z = 0.2267 and efficiency y = 0.9474
New best at idx=42: method = swap, efficiency z = 0.2433 and efficiency y = 0.8376


Generating initial designs:  18%|██▏         | 551/3000 [00:06<00:26, 93.78it/s]

New best at idx=529: method = swap, efficiency z = 0.2917 and efficiency y = 0.9090


Generating initial designs: 100%|███████████| 3000/3000 [00:44<00:00, 66.85it/s]



Progress: 0.0% 
=== Best Solution Updated at Iteration 0 ===
  Balancing Method:    origine
  Real and E(.) z,y:   50480.78→50480.78, 10043.62→10043.62
  Best Cost (z):       253968.803
  Best Cost (y):       10440.6984
  number of new noded: 30
  rho (z, y):          0.792
  rho (z_hat, y_hat):  0.515
  rho (p, y):          0.926
  Criteria Value:      253968.803
  Efficiency z (0→f):  0.184 → 0.292 → dsd 0.2924→ cube 0.9644
  Efficiency y (0→f):  0.784 → 0.909 → dsd 0.9531→ cube 0.9871
  Efficiency z (srs):  10.204 → 0.292 → 16.1889
  Efficiency y (srs):  6.994 → 0.909 → 8.5057
  Alpha:               0.7216944166255966
  Num changes:         2
  Design Depth:        2
  Design Size (|D|):   104
  Open set size:       116

Progress: 0.0% 
=== Best Solution Updated at Iteration 1 ===
  Balancing Method:    origine
  Real and E(.) z,y:   50480.78→50480.78, 10043.62→10043.62
  Best Cost (z):       233238.805
  Best Cost (y):       9791.5104
  number of new noded: 30
  rho (z, y):       

KeyboardInterrupt: 

In [6]:
import pandas as pd
print("size of init design",len(list(astar.initial_design)))
print("size of best design", len(list(astar.best_design)))
#print(astar.initial_design.criteria_value())
print(astar.criteria(astar.initial_design))
print(astar.criteria(astar.best_design))
print(N)
sum_inclusion_last = 0
sum_inclusion_init = 0
for k in range(int(N)):
    size_last = 0
    prob_last = 0
    len_last = 0
    prob_last_second = 0
    prob = 0
    z_ests = []
    y_ests = []
    probs  = []
    for item in astar.best_design:
        prob += item.probability
        size_last += 1
        len_last += len(item.ids)
        probs.append(item.probability)
        ids = np.array(list(item.ids), dtype=int)
        z_ests.append(np.sum(z[ids]/inclusions[ids]))
        y_ests.append(np.sum(y[ids]/inclusions[ids]))
            
        if k in item.ids:
            
            #print(np.round(np.sum(y[ids]/inclusions[ids])), np.round(np.sum(z[ids]/inclusions[ids])))
            prob_last += item.probability
            if k+1 in item.ids:
                prob_last_second += item.probability
    sum_inclusion_last += prob_last
        #print(item)
    size_ini = 0
    prob_ini = 0
    len_ini = 0
    prob_ini_second = 0
    for item in astar.initial_design:
        len_ini += len(item.ids)
        size_ini += 1
        if k in item.ids:
            #print(item.probability)
            prob_ini += item.probability
            if k+1 in item.ids:
                prob_ini_second += item.probability
    sum_inclusion_init += prob_ini
    
        #print(item)
    #print(np.round(prob_last, 3), np.round(prob_ini, 3), np.round(inclusions[k], 3))
    if  not (len_last%n == 0 and len_ini%n == 0 and 
        np.round(prob_last, 3) == np.round(prob_ini, 3) and 
        np.round(prob_last, 3) == np.round(inclusions[k], 3)):
        print('yessssss')
print('sum of inclusions, last ini and original', sum_inclusion_last, sum_inclusion_init, sum(inclusions))
print(prob)
print('n = ', n)
#print(pd.concat([pd.Series(z_ests), pd.Series(y_ests), pd.Series(probs)], axis=1))
y_ests = np.array(y_ests)
z_ests = np.array(z_ests)
probs = np.array(probs)
print(np.sum(y), np.sum(z), np.sum(y_ests*probs), np.sum(z_ests*probs))
print(np.sum(y), np.sum(z), np.sum(y_ests*probs), np.sum(z_ests*probs))
print('var z estimates', np.round(np.sum(z_ests**2*probs)-np.sum(z_ests*probs)**2, 3))
print('var y estimates', np.round(np.sum(y_ests**2*probs)-np.sum(y_ests*probs)**2, 3))

size of init design 100
size of best design 2268
254549.70661918845
63276.84269422789
[100]


  for k in range(int(N)):


sum of inclusions, last ini and original 10.000000020000005 10.000000020000005 10.0
1.0000000020000002
n =  [10]
10043.617183059718 50480.78213352054 10043.617202861926 50480.782232546604
10043.617183059718 50480.78213352054 10043.617202861926 50480.782232546604
var z estimates 63271.746
var y estimates 9415.568
