# Packages

In [2]:
import matplotlib.pyplot as plt
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import numpy as np
from rpy2.robjects import r, pandas2ri

%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython


import sys
sys.path.append('/home/divar/projects/geometric-sampling')
import geometric_sampling

import geometric_sampling as gs
from geometric_sampling.search.astar import AStar
from geometric_sampling.design import Design
from geometric_sampling.criteria.var_nht import VarNHT

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [3]:
%%R
#install.packages("sampling")
library(sampling)

# Reading Data

In [4]:
# Python cell ----

import pandas as pd

# Read config file
params = pd.read_csv("config_origin.csv")

# Extract all columns as regular Python variables (from row 0)
for col in params.columns:
    globals()[col] = params.at[0, col]

# Now you have: bardia_balance_method, num_new_nodes, ... etc. as plain Python variables

In [5]:
%%R
# Read regular data (returns data.frame)

df <- read.csv("cor_zy_8.5_py_8.5.csv")

y   <- df$y
z   <- df$z
pik <- df$pik

# Read "extra" data (returns data.frame of one row; use as.list to extract)
extra <- read.csv("cor_zy_7_py_7_extra.csv")
 list2env(extra, .GlobalEnv)


<environment: R_GlobalEnv>


# Astar

In [None]:
y = np.array(ro.r['y'])
z = np.array(ro.r['z'])
n = np.array(ro.r['n'])
N = np.array(ro.r['N'])

inclusions = np.array(ro.r['pik'])

threshold_z = float(ro.r['var_ht_z'][0])
threshold_y = float(ro.r['var_ht_y'][0])

threshold_z_cube = float(ro.r['var_cube_z'][0])
threshold_y_cube = float(ro.r['var_cube_y'][0])

inclusions = np.array(inclusions)



rng = np.random.default_rng()  # Recommended for controlled/stable random runs

print(max_iterations, num_new_nodes, max_open_set_size, switch_lower, switch_upper)

criterion = VarNHT(main_variable=y, auxiliary_variable=z, 
                   inclusion_probability=inclusions, balance_method=bardia_balance_method)

astar = AStar(
    inclusions              = inclusions,
    num_initial_nodes       = num_initial_nodes,
    initial_design_to_use   = initial_design_to_use,
    criteria                = criterion,
    z                       = z,
    y                       = y,
    threshold_z             = threshold_z,
    threshold_y             = threshold_y,
    threshold_z_cube             = threshold_z_cube,
    threshold_y_cube             = threshold_y_cube,
    switch_lower            = switch_lower,
    switch_upper            = switch_upper,
    num_changes_lower       = num_changes_lower,
    num_changes_upper       = num_changes_upper,
    show_results            = show_results,
    var_percent_exected     = var_percent_exected,
    swap_iterations         = swap_iterations,
    swap_distance           = swap_distance,
    swap_units              = swap_units,
    rng                     = rng,                         
)

result = astar.run(
    max_iterations             = max_iterations,
    num_new_nodes              = num_new_nodes,
    max_open_set_size          = max_open_set_size,
    random_restart_period      = random_restart_period,
    random_injection_count     = random_injection_count,
    prune_fraction             = prune_fraction,
    num_top_restart_nodes      = num_top_restart_nodes,
    stuck_fraction             = stuck_fraction,

)

print("Best criteria value:", astar.best_criteria_value)
print("Iterations run:", result)
print("Best criteria value:", astar.best_criteria_value)
print("Iterations run:", result)
# print(np.round(var_z_0, 1))
# print(np.round(var_y_0, 1))
astar.initial_design.show()
astar.best_design.show()

5000 30 200000 0.7 0.9


Generating initial designs:   0%|                      | 0/3000 [00:00<?, ?it/s]

Initial design 0: method = Original, efficiency z = 0.2145, y=0.3669
New best at idx=0: method = Original, efficiency z = 0.2145 and efficiency y = 0.3669
New best at idx=2: method = z/pi, efficiency z = 0.2145 and efficiency y = 0.3669
New best at idx=4: method = swap, efficiency z = 0.2147 and efficiency y = 0.4066


Generating initial designs:   0%|              | 6/3000 [00:00<00:59, 50.48it/s]

New best at idx=6: method = swap, efficiency z = 0.2274 and efficiency y = 0.3923
New best at idx=8: method = swap, efficiency z = 0.2435 and efficiency y = 0.6179


Generating initial designs:   2%|▏            | 49/3000 [00:01<00:58, 50.54it/s]

New best at idx=42: method = swap, efficiency z = 0.2615 and efficiency y = 0.3786


Generating initial designs:   7%|▊           | 203/3000 [00:03<00:40, 69.37it/s]

New best at idx=189: method = swap, efficiency z = 0.2636 and efficiency y = 0.4145


Generating initial designs:  18%|██          | 530/3000 [00:09<00:44, 55.07it/s]

New best at idx=520: method = swap, efficiency z = 0.2680 and efficiency y = 0.5043


Generating initial designs:  24%|██▉         | 724/3000 [00:11<00:29, 75.90it/s]

New best at idx=708: method = swap, efficiency z = 0.2729 and efficiency y = 0.4145


Generating initial designs:  69%|███████▌   | 2077/3000 [00:31<00:12, 74.79it/s]

New best at idx=2059: method = swap, efficiency z = 0.2741 and efficiency y = 0.5071


Generating initial designs: 100%|███████████| 3000/3000 [00:44<00:00, 66.67it/s]



Progress: 0.0% 
=== Best Solution Updated at Iteration 0 ===
  Balancing Method:    origine
  Real and E(.) z,y:   49886.93→49886.93, 9968.83→9968.83
  Best Cost (z):       765904.025
  Best Cost (y):       62744.6366
  number of new noded: 30
  rho (z, y):          0.706
  rho (z_hat, y_hat):  0.763
  rho (p, y):          0.668
  Criteria Value:      765904.025
  Efficiency z (0→f):  0.214 → 0.274 → dsd 0.2743→ cube 0.7112
  Efficiency y (0→f):  0.367 → 0.507 → dsd 0.5058→ cube 0.5133
  Efficiency z (srs):  3.526 → 0.274 → 4.5087
  Efficiency y (srs):  0.771 → 0.507 → 1.0628
  Alpha:               0.7771396447883352
  Num changes:         2
  Design Depth:        2
  Design Size (|D|):   104
  Open set size:       109

Progress: 0.0% 
=== Best Solution Updated at Iteration 1 ===
  Balancing Method:    origine
  Real and E(.) z,y:   49886.93→49886.93, 9968.83→9968.83
  Best Cost (z):       677370.735
  Best Cost (y):       61995.1704
  number of new noded: 30
  rho (z, y):          0.

In [None]:
import pandas as pd
print("size of init design",len(list(astar.initial_design)))
print("size of best design", len(list(astar.best_design)))
#print(astar.initial_design.criteria_value())
print(astar.criteria(astar.initial_design))
print(astar.criteria(astar.best_design))
print(N)
sum_inclusion_last = 0
sum_inclusion_init = 0
for k in range(int(N)):
    size_last = 0
    prob_last = 0
    len_last = 0
    prob_last_second = 0
    prob = 0
    z_ests = []
    y_ests = []
    probs  = []
    for item in astar.best_design:
        prob += item.probability
        size_last += 1
        len_last += len(item.ids)
        probs.append(item.probability)
        ids = np.array(list(item.ids), dtype=int)
        z_ests.append(np.sum(z[ids]/inclusions[ids]))
        y_ests.append(np.sum(y[ids]/inclusions[ids]))
            
        if k in item.ids:
            
            #print(np.round(np.sum(y[ids]/inclusions[ids])), np.round(np.sum(z[ids]/inclusions[ids])))
            prob_last += item.probability
            if k+1 in item.ids:
                prob_last_second += item.probability
    sum_inclusion_last += prob_last
        #print(item)
    size_ini = 0
    prob_ini = 0
    len_ini = 0
    prob_ini_second = 0
    for item in astar.initial_design:
        len_ini += len(item.ids)
        size_ini += 1
        if k in item.ids:
            #print(item.probability)
            prob_ini += item.probability
            if k+1 in item.ids:
                prob_ini_second += item.probability
    sum_inclusion_init += prob_ini
    
        #print(item)
    #print(np.round(prob_last, 3), np.round(prob_ini, 3), np.round(inclusions[k], 3))
    if  not (len_last%n == 0 and len_ini%n == 0 and 
        np.round(prob_last, 3) == np.round(prob_ini, 3) and 
        np.round(prob_last, 3) == np.round(inclusions[k], 3)):
        print('yessssss')
print('sum of inclusions, last ini and original', sum_inclusion_last, sum_inclusion_init, sum(inclusions))
print(prob)
print('n = ', n)
#print(pd.concat([pd.Series(z_ests), pd.Series(y_ests), pd.Series(probs)], axis=1))
y_ests = np.array(y_ests)
z_ests = np.array(z_ests)
probs = np.array(probs)
print(np.sum(y), np.sum(z), np.sum(y_ests*probs), np.sum(z_ests*probs))
print(np.sum(y), np.sum(z), np.sum(y_ests*probs), np.sum(z_ests*probs))
print('var z estimates', np.round(np.sum(z_ests**2*probs)-np.sum(z_ests*probs)**2, 3))
print('var y estimates', np.round(np.sum(y_ests**2*probs)-np.sum(y_ests*probs)**2, 3))