In [11]:
import pandas as pd
import numpy as np
df = pd.read_csv("hypercube_sampling_parameters_250711.csv")
df.head(5)


Unnamed: 0,potential1 (mV vs Hg/HgO),hold1 (s),potential3 (mV vs Hg/HgO),hold2 (s),potential2 (mV vs Hg/HgO),sweep speed (mV/s),cycle (P2-P3),duration (s),duration (h),duration (min)
0,-1197,926,-693,0,900,119,100,2268.890756,0.630247,38.0
1,-501,653,-1401,0,900,419,100,1204.312649,0.334531,20.0
2,-544,741,365,0,900,574,100,835.789199,0.232164,14.0
3,-956,141,-1719,0,900,955,100,416.039791,0.115567,7.0
4,-1804,1618,-1761,0,900,821,100,1942.169306,0.539491,32.0


In [12]:
potential1_range = [-2000, 2000]  
potential3_range = [-2000, 400]  
potential2_range = [900]  
hold1_range = [0, 1800]  
hold2_range = [0]  
sweep_speed_range = [10, 1000]  
cycle_range = [100]
bounds = [
    potential1_range,
    potential3_range,
    hold1_range,
    sweep_speed_range,
]

In [65]:
def get_bins(bound, nob):
    return np.linspace(bound[0],bound[1], nob +1 )

def get_occupied_bins(data, bins):
    print(data)
    nob = len(bins-1)
    occupied = []
    for i,b in enumerate(bins[:-1]):
        for d in data:
            if d>=bins[i] and d<=bins[i+1]:
                occupied.append(i)
                
                
    return list(set(occupied))

np.random.seed(42)            
n = 15
n_samples = 10
final_res = {}
for bound, col in zip(bounds,[0,2,1,5]):
    bins = get_bins(bound,n)
    occ = get_occupied_bins(df[df.columns[col]].tolist()[:5], bins)

    bins_idx = np.arange(0,n)
    free_bins = np.setdiff1d(bins_idx, occ)
    
    values = np.random.choice(free_bins, size=n_samples, replace=False)
    np.random.shuffle(values)
    res = []
    for v in values:
        res.append(int(np.round((bins[v+1]-bins[v])*np.random.random_sample()+bins[v],0)))
    final_res.update({col:res})
    print(col, occ,bound,free_bins, values, res)
    print(bins)

[-1197, -501, -544, -956, -1804]
0 [0, 3, 5] [-2000, 2000] [ 1  2  4  6  7  8  9 10 11 12 13 14] [13  7  2 11 12  8  4  1 10 14] [1467, 131, -1302, 1096, 1202, 139, -793, -1627, 679, 1993]
[-2000.         -1733.33333333 -1466.66666667 -1200.
  -933.33333333  -666.66666667  -400.          -133.33333333
   133.33333333   400.           666.66666667   933.33333333
  1200.          1466.66666667  1733.33333333  2000.        ]
[-693, -1401, 365, -1719, -1761]
2 [8, 1, 3, 14] [-2000, 400] [ 0  2  4  5  6  7  9 10 11 12 13] [13 11  6 10  9  2  7 12  0  5] [129, -224, -931, -330, -540, -1601, -874, 65, -1959, -1094]
[-2000. -1840. -1680. -1520. -1360. -1200. -1040.  -880.  -720.  -560.
  -400.  -240.   -80.    80.   240.   400.]
[926, 653, 741, 141, 1618]
1 [1, 5, 6, 7, 13] [0, 1800] [ 0  2  3  4  8  9 10 11 12 14] [ 8 10  2  0  9 12 14  4  3 11] [1061, 1290, 305, 70, 1196, 1513, 1713, 516, 380, 1322]
[   0.  120.  240.  360.  480.  600.  720.  840.  960. 1080. 1200. 1320.
 1440. 1560. 1680. 1

In [66]:
final_res


{0: [1467, 131, -1302, 1096, 1202, 139, -793, -1627, 679, 1993],
 2: [129, -224, -931, -330, -540, -1601, -874, 65, -1959, -1094],
 1: [1061, 1290, 305, 70, 1196, 1513, 1713, 516, 380, 1322],
 5: [249, 692, 344, 889, 757, 58, 316, 663, 503, 150]}

In [71]:
import math
potential1 = np.array(final_res[0])
potential3 = np.array(final_res[2])
hold1 = np.array(final_res[1])
sweep_speed = np.array(final_res[5])

num_samples = len(potential1)
potential2 = potential2_range * num_samples
hold2 = hold2_range * num_samples
#cycles = [100] * num_samples

interval1 = hold1 + np.abs(potential1 - potential3)/sweep_speed
interval2 = np.abs(potential3 - potential2)/sweep_speed + hold2
cycles = [min(100, math.floor((3600 - interval1[i]) / interval2[i])) for i in range(len(interval1))]
duration = interval1 + cycles*interval2

sample_df = pd.DataFrame({
    'potential1 (mV vs Hg/HgO)': potential1,
    'hold1 (s)': hold1,
    'potential3 (mV vs Hg/HgO)': potential3,
    'hold2 (s)': hold2,
    'potential2 (mV vs Hg/HgO)': potential2,
    'sweep speed (mV/s)': sweep_speed,
    'cycle (P2-P3)': cycles,
    'duration (s)': duration,
    'duration (h)': duration/3600,
    'duration (min)': np.round(duration/60),
})
sample_df.to_csv("10_additional_samples_chosen_so_that_the_first_5_samples_are_not_used.csv")
sample_df

Unnamed: 0,potential1 (mV vs Hg/HgO),hold1 (s),potential3 (mV vs Hg/HgO),hold2 (s),potential2 (mV vs Hg/HgO),sweep speed (mV/s),cycle (P2-P3),duration (s),duration (h),duration (min)
0,1467,1061,129,0,900,249,100,1376.012048,0.382226,23.0
1,131,1290,-224,0,900,692,100,1452.940751,0.403595,24.0
2,-1302,305,-931,0,900,344,100,838.34593,0.232874,14.0
3,1096,70,-330,0,900,889,100,209.961755,0.058323,3.0
4,1202,1196,-540,0,900,757,100,1388.52576,0.385702,23.0
5,139,1513,-1601,0,900,58,47,3569.672414,0.991576,59.0
6,-793,1713,-874,0,900,316,100,2274.648734,0.631847,38.0
7,-1627,516,65,0,900,663,100,644.494721,0.179026,11.0
8,679,380,-1959,0,900,503,100,953.634195,0.264898,16.0
9,1993,1322,-1094,0,900,150,100,2671.913333,0.742198,45.0


In [72]:
interval2

array([ 3.09638554,  1.62427746,  5.32267442,  1.38357705,  1.90224571,
       43.12068966,  5.61392405,  1.25942685,  5.68389662, 13.29333333])

In [73]:
interval1

array([1066.37349398, 1290.51300578,  306.07848837,   71.60404949,
       1198.3011889 , 1543.        , 1713.25632911,  518.5520362 ,
        385.2445328 , 1342.58      ])