In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
current_dir = os.getcwd()
ROOT_PATH = '/Users/robinburke/Documents/research/ratchet-search'
os.chdir(ROOT_PATH)
from ratchet_search import BinarySpaceSearch 
os.chdir(current_dir)

In [58]:
data_file = "hp_ret_pg_probs.csv"
data = pd.read_csv(data_file)
data['UNIQUE_ID'] = data['UNIQUE_ID'].astype('int')
sorted = data.sort_values('yes_hp_en', ascending=False)
sorted = sorted.reset_index(drop=True)
top_hp = sorted[:1125]
bot_hp = sorted[1125:]

In [27]:
def nodes2df (nodes, labels):
    frames = []
    for node in nodes:
        row_dict = {}
        row_dict[labels[0]] = node.id
        for i in range(0, len(node.features)):
            row_dict[labels[i+1]] = node.features[i]
        frames.append(pd.DataFrame(row_dict, index=[0]))
    return pd.concat(frames, ignore_index=True)

# Compute candidates to drop from the top half

In [28]:
shape_top = (8.3, 14.2, 4.6)

search_top = BinarySpaceSearch(top_hp, shape_top, 18)
boundary_top = search_top.search()
drop_df = nodes2df(search_top.enclosed, ['ID', 'hp', 'ret', 'pgp'])

print(f'Shape Top: {shape_top}')
print(f'Boundary: {boundary_top}')

Shape Top: (8.3, 14.2, 4.6)
Boundary: [0.41553408 0.71091372 0.230296  ]


In [9]:
exp_info_file = "top_results.csv"
df = pd.DataFrame({'shape_x': shape_top[0], 'shape_y': shape_top[1], 'shape_z': shape_top[2],
                   'bound_x': boundary_top[0], 'bound_y': boundary_top[1], 'bound_z': boundary_top[2]},
                    index=[1])

df.to_csv(exp_info_file, mode='a', header=not os.path.exists(exp_info_file),
          index=False)

# Compute candidates to include from bottom half
Since we want the best candidates in this set, we invert the probabilities.

In [23]:
bot_hp_xform = bot_hp.copy()

bot_hp_xform.iloc[:,1:4] = 1 - bot_hp_xform.iloc[:,1:4]

bot_hp_xform.describe()

Unnamed: 0,UNIQUE_ID,yes_hp_en,yes_ret_yhp_en,yes_pg_yhp_en
count,1125.0,1125.0,1125.0,1125.0
mean,243092800.0,0.720807,0.437868,0.624225
std,13998820.0,0.080685,0.107921,0.19939
min,120283700.0,0.60153,0.005313,0.036684
25%,245033400.0,0.654873,0.362802,0.484362
50%,245115100.0,0.709376,0.437357,0.650567
75%,245176400.0,0.77548,0.507535,0.788869
max,245274100.0,0.999527,0.999839,0.969007


In [70]:
shape_bottom = (8, 4, 8)
search_bottom = BinarySpaceSearch(bot_hp_xform, shape_bottom, 18)
boundary_bottom = search_bottom.search()
add_df = nodes2df(search_bottom.enclosed, ['ID', 'hp', 'ret', 'pgp'])

exp_info_file = "bot_results.csv"
df = pd.DataFrame({'shape_x': shape_bottom[0], 'shape_y': shape_bottom[1], 'shape_z': shape_bottom[2],
                   'bound_x': boundary_bottom[0], 'bound_y': boundary_bottom[1], 'bound_z': boundary_bottom[2]},
                    index=[1])

df.to_csv(exp_info_file, mode='a', header=not os.path.exists(exp_info_file),
          index=False)

print(f'Shape Bottom: {shape_bottom}')
print(f'Boundary: {boundary_bottom}')

Shape Bottom: (8, 4, 8)
Boundary: [0.65297103 0.32648551 0.65297103]


In [81]:
top_copy = top_hp.copy()

top_copy['Hire'] = True
top_copy.loc[top_copy.set_index('UNIQUE_ID').index.isin(drop_df.set_index('ID').index),'Hire'] = False

In [82]:
bot_copy = bot_hp.copy()

bot_copy['Hire'] = False
bot_copy.loc[bot_copy.set_index('UNIQUE_ID').index.isin(add_df.set_index('ID').index),'Hire'] = True

In [84]:
final_df = pd.concat([top_copy, bot_copy])

In [85]:
final_df.to_csv('final.cvs', index=False)