# Packages

In [1]:
import matplotlib.pyplot as plt
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import numpy as np
from rpy2.robjects import r, pandas2ri

%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython


import sys
sys.path.append('/home/divar/projects/geometric-sampling')
import geometric_sampling

import geometric_sampling as gs
from geometric_sampling.search.astar import AStar
from geometric_sampling.design import Design
from geometric_sampling.criteria.var_nht import VarNHT



In [2]:
%%R
#install.packages("sampling")
library(sampling)

# Reading Data

In [3]:
# Python cell ----

import pandas as pd

# Read config file
params = pd.read_csv("config_origin.csv")

# Extract all columns as regular Python variables (from row 0)
for col in params.columns:
    globals()[col] = params.at[0, col]

# Now you have: bardia_balance_method, num_new_nodes, ... etc. as plain Python variables

In [4]:
%%R
# Read regular data (returns data.frame)

df <- read.csv("cor_zy_9.5_py_9.5.csv")

y   <- df$y
z   <- df$z
pik <- df$pik

# Read "extra" data (returns data.frame of one row; use as.list to extract)
extra <- read.csv("cor_zy_9.5_py_9.5_extra.csv")
 list2env(extra, .GlobalEnv)


<environment: R_GlobalEnv>


# Astar

In [5]:
y = np.array(ro.r['y'])
z = np.array(ro.r['z'])
n = np.array(ro.r['n'])
N = np.array(ro.r['N'])

inclusions = np.array(ro.r['pik'])

threshold_z = float(ro.r['var_ht_z'][0])
threshold_y = float(ro.r['var_ht_y'][0])

threshold_z_cube = float(ro.r['var_cube_z'][0])
threshold_y_cube = float(ro.r['var_cube_y'][0])

inclusions = np.array(inclusions)



rng = np.random.default_rng()  # Recommended for controlled/stable random runs

print(max_iterations, num_new_nodes, max_open_set_size, switch_lower, switch_upper)

criterion = VarNHT(main_variable=y, auxiliary_variable=z, 
                   inclusion_probability=inclusions, balance_method=bardia_balance_method)

astar = AStar(
    inclusions              = inclusions,
    num_initial_nodes       = num_initial_nodes,
    initial_design_to_use   = initial_design_to_use,
    criteria                = criterion,
    z                       = z,
    y                       = y,
    threshold_z             = threshold_z,
    threshold_y             = threshold_y,
    threshold_z_cube             = threshold_z_cube,
    threshold_y_cube             = threshold_y_cube,
    switch_lower            = switch_lower,
    switch_upper            = switch_upper,
    num_changes_lower       = num_changes_lower,
    num_changes_upper       = num_changes_upper,
    show_results            = show_results,
    var_percent_exected     = var_percent_exected,
    swap_iterations         = swap_iterations,
    swap_distance           = swap_distance,
    swap_units              = swap_units,
    rng                     = rng,                         
)

result = astar.run(
    max_iterations             = max_iterations,
    num_new_nodes              = num_new_nodes,
    max_open_set_size          = max_open_set_size,
    random_restart_period      = random_restart_period,
    random_injection_count     = random_injection_count,
    prune_fraction             = prune_fraction,
    num_top_restart_nodes      = num_top_restart_nodes,
    stuck_fraction             = stuck_fraction,

)

print("Best criteria value:", astar.best_criteria_value)
print("Iterations run:", result)
print("Best criteria value:", astar.best_criteria_value)
print("Iterations run:", result)
# print(np.round(var_z_0, 1))
# print(np.round(var_y_0, 1))
astar.initial_design.show()
astar.best_design.show()

5000 30 200000 0.7 0.9


Generating initial designs:   0%|                      | 0/3000 [00:00<?, ?it/s]

Initial design 0: method = Original, efficiency z = 0.2523, y=1.0293
New best at idx=0: method = Original, efficiency z = 0.2523 and efficiency y = 1.0293
New best at idx=2: method = z/pi, efficiency z = 0.2523 and efficiency y = 1.0293
New best at idx=3: method = swap, efficiency z = 0.2659 and efficiency y = 0.7536


Generating initial designs:   1%|             | 23/3000 [00:00<00:56, 52.82it/s]

New best at idx=6: method = swap, efficiency z = 0.2738 and efficiency y = 1.2302
New best at idx=12: method = swap, efficiency z = 0.2832 and efficiency y = 1.1019
New best at idx=17: method = swap, efficiency z = 0.2841 and efficiency y = 0.6509
New best at idx=27: method = swap, efficiency z = 0.2907 and efficiency y = 1.6004


Generating initial designs:   1%|▏            | 40/3000 [00:00<00:48, 60.99it/s]

New best at idx=32: method = swap, efficiency z = 0.3704 and efficiency y = 0.6209


Generating initial designs:  26%|███         | 770/3000 [00:12<00:40, 55.36it/s]

New best at idx=762: method = swap, efficiency z = 0.4181 and efficiency y = 0.9086


Generating initial designs:  49%|█████▍     | 1477/3000 [00:25<00:28, 53.74it/s]

New best at idx=1470: method = swap, efficiency z = 0.4556 and efficiency y = 1.3140


Generating initial designs: 100%|███████████| 3000/3000 [00:51<00:00, 58.77it/s]



Progress: 0.0% 
=== Best Solution Updated at Iteration 0 ===
  Balancing Method:    origine
  Real and E(.) z,y:   50171.04→50171.04, 10043.62→10043.62
  Best Cost (z):       62644.221
  Best Cost (y):       4092.0394
  number of new noded: 30
  rho (z, y):          0.951
  rho (z_hat, y_hat):  0.755
  rho (p, y):          0.926
  Criteria Value:      62644.221
  Efficiency z (0→f):  0.252 → 0.456 → dsd 0.4559→ cube 1.1782
  Efficiency y (0→f):  1.029 → 1.314 → dsd 1.4332→ cube 1.5992
  Efficiency z (srs):  22.636 → 0.456 → 40.8958
  Efficiency y (srs):  15.585 → 1.314 → 21.702
  Alpha:               0.7699889233946461
  Num changes:         1
  Design Depth:        1
  Design Size (|D|):   102
  Open set size:       100


=== Best Solution Updated at Iteration 0 ===
  Balancing Method:    origine
  Real and E(.) z,y:   50171.04→50171.04, 10043.62→10043.62
  Best Cost (z):       61769.533
  Best Cost (y):       4230.8673
  number of new noded: 30
  rho (z, y):          0.951
  rho (z_

KeyboardInterrupt: 

In [6]:
import pandas as pd
print("size of init design",len(list(astar.initial_design)))
print("size of best design", len(list(astar.best_design)))
#print(astar.initial_design.criteria_value())
print(astar.criteria(astar.initial_design))
print(astar.criteria(astar.best_design))
print(N)
sum_inclusion_last = 0
sum_inclusion_init = 0
for k in range(int(N)):
    size_last = 0
    prob_last = 0
    len_last = 0
    prob_last_second = 0
    prob = 0
    z_ests = []
    y_ests = []
    probs  = []
    for item in astar.best_design:
        prob += item.probability
        size_last += 1
        len_last += len(item.ids)
        probs.append(item.probability)
        ids = np.array(list(item.ids), dtype=int)
        z_ests.append(np.sum(z[ids]/inclusions[ids]))
        y_ests.append(np.sum(y[ids]/inclusions[ids]))
            
        if k in item.ids:
            
            #print(np.round(np.sum(y[ids]/inclusions[ids])), np.round(np.sum(z[ids]/inclusions[ids])))
            prob_last += item.probability
            if k+1 in item.ids:
                prob_last_second += item.probability
    sum_inclusion_last += prob_last
        #print(item)
    size_ini = 0
    prob_ini = 0
    len_ini = 0
    prob_ini_second = 0
    for item in astar.initial_design:
        len_ini += len(item.ids)
        size_ini += 1
        if k in item.ids:
            #print(item.probability)
            prob_ini += item.probability
            if k+1 in item.ids:
                prob_ini_second += item.probability
    sum_inclusion_init += prob_ini
    
        #print(item)
    #print(np.round(prob_last, 3), np.round(prob_ini, 3), np.round(inclusions[k], 3))
    if  not (len_last%n == 0 and len_ini%n == 0 and 
        np.round(prob_last, 3) == np.round(prob_ini, 3) and 
        np.round(prob_last, 3) == np.round(inclusions[k], 3)):
        print('yessssss')
print('sum of inclusions, last ini and original', sum_inclusion_last, sum_inclusion_init, sum(inclusions))
print(prob)
print('n = ', n)
#print(pd.concat([pd.Series(z_ests), pd.Series(y_ests), pd.Series(probs)], axis=1))
y_ests = np.array(y_ests)
z_ests = np.array(z_ests)
probs = np.array(probs)
print(np.sum(y), np.sum(z), np.sum(y_ests*probs), np.sum(z_ests*probs))
print(np.sum(y), np.sum(z), np.sum(y_ests*probs), np.sum(z_ests*probs))
print('var z estimates', np.round(np.sum(z_ests**2*probs)-np.sum(z_ests*probs)**2, 3))
print('var y estimates', np.round(np.sum(y_ests**2*probs)-np.sum(y_ests*probs)**2, 3))

size of init design 100
size of best design 2150
62675.19812545414
16149.744664962844
[100]


  for k in range(int(N)):


sum of inclusions, last ini and original 10.000000010000008 10.000000010000006 10.0
1.000000000999997
n =  [10]
10043.617183059718 50171.04438153942 10043.617193015132 50171.04443165533
10043.617183059718 50171.04438153942 10043.617193015132 50171.04443165533
var z estimates 16147.228
var y estimates 5336.008
