In [10]:
import pandas as pd
import numpy as np
import json
import logging
import os

In [11]:
# Set up logging for cleaner output during executiion
logging.basicConfig(level=logging.INFO,
                    format='%(levelname)s: %(message)s')

# --- Configuration ---
OLD_MASTER_FILE = 'bbo_master_w04.csv'
NEW_MASTER_FILE = 'bbo_master_w05.csv'
ADD_DATA_DIR = 'add_data'
INPUT_FILE = os.path.join(ADD_DATA_DIR, 'week04_clean_inputs.json')
OUTPUT_FILE = os.path.join(ADD_DATA_DIR, 'week04_clean_outputs.json')

# Expected rows: 80 initial + 4 queries * 8 functions = 112
EXPECTED_OLD_ROWS = 104
NEW_ROWS_COUNT = 8
EXPECTED_NEW_ROWS = EXPECTED_OLD_ROWS + NEW_ROWS_COUNT

In [12]:
def create_bbo_master_w05():
    """Loads the old master file, appends Week 4 query data, and saves the
    new master file."""
    
    # 1. Load existing master file
    try:
        df_old = pd.read_csv(OLD_MASTER_FILE)
        if len(df_old) != EXPECTED_OLD_ROWS:
            logging.warning(f"WARNING: Loaded {OLD_MASTER_FILE} with {len(df_old)} rows, " +
                            f" expected {EXPECTED_OLD_ROWS}.")
        else:
            logging.info(f"--- Loading initial data from {OLD_MASTER_FILE} " +
                         f"(Expected {EXPECTED_OLD_ROWS} rows) ---")
        df_old.replace('', np.nan, inplace=True) # Replace empty strings with NaN for consistency
    
    except FileNotFoundError:
        logging.error(f"ERROR: Master file {OLD_MASTER_FILE} not found. Exiting.")
        return
    
    # 2. Load new input and output data
    try:
        with open(INPUT_FILE, 'r') as f:
            inputs = json.load(f)
        with open(OUTPUT_FILE, 'r') as f:
            outputs = json.load(f)
    except FileNotFoundError:
        logging.error(f"ERROR: One or both new data files ({INPUT_FILE} " +
                      f"or {OUTPUT_FILE}) not found. Check the 'add_data' folder. Exiting.")
        return
    except json.JSONDecodeError:
        logging.error("ERROR: Could not parse JSON data from the new files. Exiting.")
        return
    
    if len(inputs) != NEW_ROWS_COUNT or len(outputs) != NEW_ROWS_COUNT:
        logging.error(f"ERROR: New data files must contain {NEW_ROWS_COUNT} rows " +
                      f"(one per function). Found {len(inputs)} inputs and " +
                      f"{len(outputs)} outputs. Exiting.")
        return
    
    # 3. Create new DataFrame for the Week 4 query
    new_data = []
    for i in range(NEW_ROWS_COUNT):  # Iterate through F1 to F8 (i=0 to 7)
        row = {'Function ID': i + 1, 'Y': outputs[i]}
        
        # The input array has length D (2 to 8). Fill X1..X8
        input_array = inputs[i]
        for j in range(8):
            col_name = f'X{j+1}'
            if j < len(input_array):
                row[col_name] = input_array[j]
            else:
                # Empty columns for lower dimensional functions
                row[col_name] = np.nan
                
        new_data.append(row)
        
    df_new = pd.DataFrame(new_data)
    # Ensure column order matches the master file structure
    df_new = df_new[['Function ID', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'Y']]
    
    logging.info(f"Successfully loaded {len(df_new)} new Week 4 query points " +
                 f"from '{ADD_DATA_DIR}' (F1-F8).")
    
    # 4. Append and save
    df_master = pd.concat([df_old, df_new], ignore_index=True)
    
    # Sanity check
    if len(df_master) != EXPECTED_NEW_ROWS:
        logging.warning(f"WARNING: Total row count is {len(df_master)}, " +
                        f"expected {EXPECTED_NEW_ROWS}.")
    
    # Use a high float precision for the values
    df_master.to_csv(NEW_MASTER_FILE, index=False, float_format='%.17f')
    
    logging.info("INFO: ----------------------------------------------")
    logging.info(f"SUCCESS: New master data file '{NEW_MASTER_FILE}' created.")
    logging.info(f"Total rows in the new file: {len(df_master)} (Expected {EXPECTED_NEW_ROWS}).")
    logging.info(f"Verification: There're now {len(df_master) / 8} data points for each function.")
    logging.info("INFO: ----------------------------------------------")

In [13]:
if __name__ == '__main__':
    create_bbo_master_w05()

INFO: --- Loading initial data from bbo_master_w04.csv (Expected 104 rows) ---
INFO: Successfully loaded 8 new Week 4 query points from 'add_data' (F1-F8).
INFO: INFO: ----------------------------------------------
INFO: SUCCESS: New master data file 'bbo_master_w05.csv' created.
INFO: Total rows in the new file: 112 (Expected 112).
INFO: Verification: There're now 14.0 data points for each function.
INFO: INFO: ----------------------------------------------
