In [3]:
import pandas as pd
import numpy as np
import json
import os
import logging
import sys

# Set up logging for console output
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s', stream=sys.stdout)

# --- Configuration for Week 11 (Aggregating Round 10 Data) ---

# Previous master file (contains data up to Round 9)
MASTER_FILE_PATH_OLD = 'bbo_master_w10.csv'

# New master file for Week 11 (will contain data up to Round 10)
MASTER_FILE_PATH_NEW = 'bbo_master_w11.csv'

ADD_DATA_DIR = 'add_data'

# Files for the new data point (Round 10 data)
INPUTS_FILE = 'week10_clean_inputs.json'
OUTPUTS_FILE = 'week10_clean_outputs.json'

NUM_FUNCTIONS = 8

# The next round of data to be appended is Round 10.
CURRENT_ROUND = 10

# --- Function Dimensionality Mapping ---
# Defined in project sources: F1-F2 (2D), F3 (3D), F4-F5 (4D), F6 (5D), F7 (6D), F8 (8D)
FUNCTION_DIMS = {
    1: 2,  2: 2, 3: 3, 4: 4, 
    5: 4,  6: 5, 7: 6, 8: 8 
}

def load_json_file(file_path):
    """Loads a JSON file or returns None with a warning."""
    full_path = os.path.join(ADD_DATA_DIR, file_path)
    
    if not os.path.exists(full_path):
        logging.error(f"ERROR: Required file '{full_path}' not found. Please ensure it is in the '{ADD_DATA_DIR}' folder.")
        return None
    
    try:
        with open(full_path, 'r') as f:
            data = json.load(f)
            logging.info(f"Successfully loaded {file_path}.")
            return data
    except json.JSONDecodeError:
        logging.error(f"ERROR: Could not decode JSON from '{file_path}'. Check file format (must be valid JSON).")
        return None
    except Exception as e:
        logging.error(f"An unexpected error occurred loading {file_path}: {e}")
        return None

def create_master_data():
    logging.info("*"*50)
    logging.info(f"--- Starting BBO Master File Creation for Round {CURRENT_ROUND} ---")

    # 1. Load the previous master file (Week 10)
    if not os.path.exists(MASTER_FILE_PATH_OLD):
        logging.error(f"Master file '{MASTER_FILE_PATH_OLD}' not found. Cannot proceed with aggregation.")
        return None

    try:
        df_master_old = pd.read_csv(MASTER_FILE_PATH_OLD)
        logging.info(f"Loaded {len(df_master_old)} rows from {MASTER_FILE_PATH_OLD}.")
        
        # FIX: Ensure the 'Round' column exists and is positioned correctly
        # We expect 80 initial rows + 10 rounds (0-9) * 8 functions = 160 rows in old file
        num_rounds_after_r0 = (len(df_master_old) - 80) // 8
        
        # Generate round list: 80 zeros, then 8 ones, 8 twos, etc.
        round_list = [0] * 80 
        for r in range(1, num_rounds_after_r0 + 1):
            round_list.extend([r] * 8)
            
        if len(round_list) == len(df_master_old):
            if 'Round' in df_master_old.columns:
                df_master_old['Round'] = round_list
            else:
                df_master_old.insert(1, 'Round', round_list)
            logging.info(f"FIX APPLIED: 'Round' column verified (R0 to R{num_rounds_after_r0}).")
        else:
            logging.warning(f"Row count mismatch ({len(round_list)} vs {len(df_master_old)}). Skipping round correction.")

    except Exception as e:
        logging.error(f"Failed to read or process master CSV: {e}")
        return None

    # 2. Load the new inputs and outputs (Round 10)
    inputs_list = load_json_file(INPUTS_FILE)
    outputs_array = load_json_file(OUTPUTS_FILE)

    if inputs_list is None or outputs_array is None:
        logging.error("Aggregation aborted due to missing or invalid JSON files.")
        return None

    # --- INPUT VALIDATION STEP ---
    if not isinstance(inputs_list, list) or len(inputs_list) != NUM_FUNCTIONS:
        logging.critical(f"FATAL ERROR: Input file '{INPUTS_FILE}' must contain exactly {NUM_FUNCTIONS} entries.")
        return None

    if not isinstance(outputs_array, list) or len(outputs_array) != NUM_FUNCTIONS:
        logging.critical(f"FATAL ERROR: Output file '{OUTPUTS_FILE}' must contain exactly {NUM_FUNCTIONS} entries.")
        return None

    # Define the columns for the new DataFrame (must match the old one)
    cols_to_use = df_master_old.columns.tolist()

    # 3. Construct the new 8 rows (one for each function)
    new_data = []

    for i in range(NUM_FUNCTIONS):
        f_id = i + 1
        dim = FUNCTION_DIMS[f_id]
        f_name = f'f{f_id}'

        # Get inputs and outputs
        x_values_raw = inputs_list[i] if i < len(inputs_list) and isinstance(inputs_list[i], list) else []
        score = outputs_array[i] if i < len(outputs_array) else np.nan

        # Validation and Padding/Truncation Logic
        if len(x_values_raw) != dim:
            if len(x_values_raw) > dim:
                logging.warning(f"{f_name}: Input dimension mismatch. Expected {dim}, found {len(x_values_raw)}. TRUNCATING.")
                x_values_to_use = x_values_raw[:dim]
            elif len(x_values_raw) < dim:
                logging.warning(f"{f_name}: Input dimension mismatch. Expected {dim}, found {len(x_values_raw)}. PADDING with 0.5.")
                x_values_to_use = x_values_raw + [0.5] * (dim - len(x_values_raw))
            else:
                x_values_to_use = x_values_raw
        else:
            # Inputs match dimension perfectly
            x_values_to_use = x_values_raw

        # Fill the row with NaNs for X-columns beyond 'dim' (up to 8D)
        x_values = x_values_to_use + [np.nan] * (8 - dim)

        # Create the new row dictionary
        row_data = {
            'Function ID': f_id,
            'Y': score,
            'Round': CURRENT_ROUND
        }

        # Add the X-coordinates
        for d in range(8):
            row_data[f'X{d+1}'] = x_values[d]

        new_data.append(row_data)

    # Create new DataFrame 
    df_new_rows = pd.DataFrame(new_data, columns=cols_to_use)
    logging.info(f"Successfully constructed {len(df_new_rows)} new rows (Round {CURRENT_ROUND}).")

    # 4. Concatenate and Save
    df_master_new = pd.concat([df_master_old, df_new_rows], ignore_index=True)
    df_master_new.to_csv(MASTER_FILE_PATH_NEW, index=False)

    # 5. Verification
    total_rows = len(df_master_new)
    # Expected: 80 initial + 11 rounds (0-10) * 8 functions = 80 + 88 = 168 rows
    expected_rows = len(df_master_old) + NUM_FUNCTIONS

    logging.info("\n" + "*"*50)
    logging.info(f"SUCCESS: New master data file '{MASTER_FILE_PATH_NEW}' created.")
    logging.info(f"Total rows in the new file: {total_rows} (Expected {expected_rows}).")
    logging.info(f"Verification: There are now {total_rows / 8:.1f} data points per function.")
    logging.info("*"*50)
    
    return df_master_new

if __name__ == '__main__':
    # Ensure directory exists
    if not os.path.exists(ADD_DATA_DIR):
        os.makedirs(ADD_DATA_DIR)
        logging.warning(f"Directory '{ADD_DATA_DIR}' created.")

    df_final = create_master_data()

    if df_final is not None:
        print(f"\n--- Tail of {MASTER_FILE_PATH_NEW} (Showing the 8 newly appended rows) ---")
        # Display the key columns to confirm structure and padding
        print(df_final[df_final['Round'] == CURRENT_ROUND][['Function ID', 'Round', 'X1', 'X2', 'X3', 'Y']])

INFO: **************************************************
INFO: --- Starting BBO Master File Creation for Round 10 ---
INFO: Loaded 152 rows from bbo_master_w10.csv.
INFO: FIX APPLIED: 'Round' column verified (R0 to R9).
INFO: Successfully loaded week10_clean_inputs.json.
INFO: Successfully loaded week10_clean_outputs.json.
INFO: Successfully constructed 8 new rows (Round 10).
INFO: 
**************************************************
INFO: SUCCESS: New master data file 'bbo_master_w11.csv' created.
INFO: Total rows in the new file: 160 (Expected 160).
INFO: Verification: There are now 20.0 data points per function.
INFO: **************************************************

--- Tail of bbo_master_w11.csv (Showing the 8 newly appended rows) ---
     Function ID  Round        X1        X2        X3              Y
152            1     10  0.374540  0.950714       NaN -1.560647e-117
153            2     10  0.657063  0.991510       NaN   5.072010e-01
154            3     10  1.000000  0.51776