In [12]:
import pandas as pd
import numpy as np
import os

# --- Configuration ---
BASE_DATA_PATH = 'initial_data'
NUM_FUNCTIONS = 8
X_COLUMNS = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8']

In [13]:
# --- 1. Load the initial data from the NumPy files (10 points per function) ---
all_initial_data = []
for fn_id in range(1, NUM_FUNCTIONS + 1):
    func_folder = os.path.join(BASE_DATA_PATH, f'function_{fn_id}')
    inputs_path = os.path.join(func_folder, 'initial_inputs.npy')
    outputs_path = os.path.join(func_folder, 'initial_outputs.npy')
    
    try:
        X_inputs = np.load(inputs_path)
        Y_outputs = np.load(outputs_path).reshape(-1, 1)
        num_dims = X_inputs.shape[1]
        data_combined = np.hstack([X_inputs, Y_outputs])
        cols_to_use = X_COLUMNS[:num_dims] + ['Y']
        df_func = pd.DataFrame(data_combined, columns=cols_to_use)
        df_func['Function ID'] = fn_id
        for col in X_COLUMNS:
            if col not in df_func.columns:
                df_func[col] = np.nan
        df_func = df_func[['Function ID'] + X_COLUMNS + ['Y']]
        all_initial_data.append(df_func)
    except FileNotFoundError:
        print(f"FATAL ERROR: Files not found for Function {fn_id}. Check path.")
        exit()
df_initial_master = pd.concat(all_initial_data, ignore_index = True)


In [14]:
# --- 2. Define and Combine Week 1 Query Results (1 point per function) ---
# 8 queries and the 8 received Y-values.
week_1_data_list = [
    # F1 (2D) | Y: 2.3088e-248
    [1, 0.000000, 0.000000, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 2.308810742126141e-248],
    # F2 (2D) | Y: 0.135474
    [2, 1.000000, 0.000000, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.1354744006583224],      
    # F3 (3D) | Y: -0.083290
    [3, 1.000000, 1.000000, 0.837688, np.nan, np.nan, np.nan, np.nan, np.nan, -0.08329024344179033],   
    # F4 (4D) | Y: -10.306691
    [4, 0.531039, 0.401922, 0.000000, 0.276120, np.nan, np.nan, np.nan, np.nan, -10.306691761238401],  
    # F5 (4D) | Y: 1091.315 (Exploitation Target!)
    [5, 0.232921, 0.841389, 0.883358, 0.879468, np.nan, np.nan, np.nan, np.nan, 1091.315259681319],      
    # F6 (5D) | Y: -1.021071
    [6, 0.312024, 0.000000, 0.393987, 1.000000, 0.000000, np.nan, np.nan, np.nan, -1.021071953531923],   
    # F7 (6D) | Y: 0.540823
    [7, 0.000000, 0.207680, 1.000000, 0.000000, 0.323235, 1.000000, np.nan, np.nan, 0.5408239117650226], 
    # F8 (8D) | Y: 9.895121
    [8, 0.140670, 0.101576, 0.175983, 0.000000, 1.000000, 0.408319, 0.145334, 0.000000, 9.895121560684]
]

query_columns = ['Function ID'] + X_COLUMNS + ['Y']
df_week_1_queries = pd.DataFrame(week_1_data_list, columns = query_columns)

In [15]:
# --- 3. Concatenate and Save the New Master File ---
df_master_w2 = pd.concat([df_initial_master, df_week_1_queries], ignore_index=True)

# Drop any rows where 'Function ID' is NaN (the noisy trailing rows)
df_master_w2.dropna(subset=['Function ID'], inplace=True)

# Ensure 'Function ID' is an integer for clean grouping
df_master_w2['Function ID'] = df_master_w2['Function ID'].astype(int)

# Save the new master file.
df_master_w2.to_csv('bbo_master_w02.csv', index=False)

print("---------------------------------------------------------")
print(f"SUCCESS: New master data file 'bbo_master_w02.csv' created.")
print(f"Total rows in new file: {len(df_master_w2)} (80 initial + 8 queries = 88 total rows, clean).")
print(f"All noisy rows (Function ID is NaN) have been dropped.")
print("---------------------------------------------------------")

---------------------------------------------------------
SUCCESS: New master data file 'bbo_master_w02.csv' created.
Total rows in new file: 183 (80 initial + 8 queries = 88 total rows, clean).
All noisy rows (Function ID is NaN) have been dropped.
---------------------------------------------------------
