In [1]:
import pandas as pd
import numpy as np
import json
import os
import logging
import sys

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s', stream=sys.stdout)

# --- Configuration ---
# Previous master file (up to Round 8)
MASTER_FILE_PATH_OLD = 'bbo_master_w09.csv'
# New master file for Week 10 (will contain data up to Round 9)
MASTER_FILE_PATH_NEW = 'bbo_master_w10.csv'
ADD_DATA_DIR = 'add_data'

# Files for the Round 9 data
INPUTS_FILE = 'week09_clean_inputs.json'
OUTPUTS_FILE = 'week09_clean_outputs.json'

NUM_FUNCTIONS = 8
CURRENT_ROUND = 9 # Appending Round 9 data

# Dimensionality Map
FUNCTION_DIMS = {1: 2, 2: 2, 3: 3, 4: 4, 5: 4, 6: 5, 7: 6, 8: 8}

def load_json_file(file_path):
    full_path = os.path.join(ADD_DATA_DIR, file_path)
    if not os.path.exists(full_path):
        logging.error(f"ERROR: {full_path} not found.")
        return None
    with open(full_path, 'r') as f:
        return json.load(f)

def create_master_data():
    logging.info(f"--- Creating Master File for Round {CURRENT_ROUND} ---")

    # 1. Load Old Master
    if not os.path.exists(MASTER_FILE_PATH_OLD):
        logging.error(f"Old master file {MASTER_FILE_PATH_OLD} not found.")
        return
    
    df_old = pd.read_csv(MASTER_FILE_PATH_OLD)
    
    # 2. Load New Data
    inputs = load_json_file(INPUTS_FILE)
    outputs = load_json_file(OUTPUTS_FILE)
    
    if not inputs or not outputs:
        return

    # 3. Construct New Rows
    new_rows = []
    cols = df_old.columns.tolist()
    
    for i in range(NUM_FUNCTIONS):
        f_id = i + 1
        dim = FUNCTION_DIMS[f_id]
        
        # Get input coords and pad/truncate
        coords = inputs[i]
        score = outputs[i]
        
        # Validation/Padding
        if len(coords) > dim:
            coords = coords[:dim]
        elif len(coords) < dim:
            coords = coords + [0.5] * (dim - len(coords))
            
        # Pad to 8D for CSV storage
        coords_padded = coords + [np.nan] * (8 - dim)
        
        row = {'Function ID': f_id, 'Round': CURRENT_ROUND, 'Y': score}
        for d in range(8):
            row[f'X{d+1}'] = coords_padded[d]
            
        new_rows.append(row)
        
    df_new = pd.DataFrame(new_rows, columns=cols)
    
    # 4. Concatenate and Save
    df_final = pd.concat([df_old, df_new], ignore_index=True)
    df_final.to_csv(MASTER_FILE_PATH_NEW, index=False)
    
    logging.info(f"SUCCESS: {MASTER_FILE_PATH_NEW} created with {len(df_final)} rows.")
    logging.info(f"Tail of new data:\n{df_final[df_final['Round'] == CURRENT_ROUND][['Function ID', 'Y']]}")

if __name__ == '__main__':
    create_master_data()


INFO: --- Creating Master File for Round 9 ---
INFO: SUCCESS: bbo_master_w10.csv created with 152 rows.
INFO: Tail of new data:
     Function ID          Y
144            1  -0.003021
145            2  -0.009292
146            3  -0.058280
147            4 -27.260392
148            5  44.729803
149            6  -1.546083
150            7   0.754990
151            8   7.841793
