In [2]:
import pandas as pd
import numpy as np
import json
import logging
import os

# Set up logging for cleaner output
logging.basicConfig(level=logging.INFO,
                    format='%(levelname)s: %(message)s')

# --- Configuration for Week 6 ---
# The previous master file (Week 5 data)
OLD_MASTER_FILE = 'bbo_master_w05.csv'
# The new master file to be created (Week 6 data)
NEW_MASTER_FILE = 'bbo_master_w06.csv' 
ADD_DATA_DIR = 'add_data'
# These are the files containing the data from the round you just completed
INPUTS_FILE = os.path.join(ADD_DATA_DIR, 'week05_clean_inputs.json') 
OUTPUTS_FILE = os.path.join(ADD_DATA_DIR, 'week05_clean_outputs.json') 

# Confirmed data state: 112 old + 8 new = 120 total
EXPECTED_OLD_ROWS = 112 
EXPECTED_NEW_ROWS = 120

FUNCTION_IDS = list(range(1, 9))
X_COLS = [f'X{i}' for i in range(1, 9)]
ALL_COLS_ORDER = ['Function ID'] + X_COLS + ['Y']


def create_bbo_master_w06():
    """
    Loads the old master file and combines it with new data from JSON inputs 
    and outputs, explicitly handling the list-of-lists structure of the input JSON.
    """
    logging.info(f"--- Starting BBO Master File Creation for Week 6 ---")
    
    # 1. Load the existing master file (Week 5)
    if not os.path.exists(OLD_MASTER_FILE):
        logging.error(f"FATAL: Old master file not found at '{OLD_MASTER_FILE}'.")
        return

    # Load old data
    df_old = pd.read_csv(OLD_MASTER_FILE, 
                         dtype={col: str for col in X_COLS})
    print(f"Loaded {len(df_old)} rows from {OLD_MASTER_FILE}.")

    # 2. Load the new inputs (X-values) and outputs (Y-values)
    try:
        with open(INPUTS_FILE, 'r') as f:
            # Expecting a list of 8 lists (list of lists)
            new_inputs_list = json.load(f) 
        with open(OUTPUTS_FILE, 'r') as f:
            new_outputs_y = json.load(f)
    except Exception as e:
        logging.error(f"FATAL: Error loading JSON files: {e}")
        return
    
    if len(new_inputs_list) != len(new_outputs_y) or len(new_inputs_list) != 8:
        logging.error(f"FATAL: Expected 8 inputs/outputs, found {len(new_inputs_list)} inputs and {len(new_outputs_y)} outputs.")
        return

    # 3. Assemble the 8 new rows (df_new) manually for guaranteed structure
    new_rows_list = []
    for i, func_id in enumerate(FUNCTION_IDS):
        
        # input_values_list is the inner list, e.g., [0.598193, 0.70091] for F1
        # This line retrieves the list of X values for the current function ID
        input_values_list = new_inputs_list[i] 
        output_data = new_outputs_y[i]
        
        # Start the new row dictionary
        new_row = {'Function ID': func_id, 'Y': output_data}
        
        # Initialize all 8 X columns to 0.0
        for col in X_COLS:
            new_row[col] = 0.0

        # Overwrite the relevant X columns with the actual values from the input list
        # We use the index (j) to correctly map the value to the column name (X1, X2, etc.)
        for j, value in enumerate(input_values_list):
            col_name = X_COLS[j] 
            
            # Ensure value is float
            if isinstance(value, str) and not value:
                value = 0.0
                
            new_row[col_name] = float(value)
            
        new_rows_list.append(new_row)

    # Create the new DataFrame with numeric X/Y values
    df_new_numeric = pd.DataFrame(new_rows_list, columns=ALL_COLS_ORDER)
    print(f"Successfully constructed {len(df_new_numeric)} new rows with Function IDs and X-values.")

    # 4. Prepare old data for concatenation: Convert old X-columns to numeric (0.0 for empty string)
    df_old_numeric = df_old.copy()
    for col in X_COLS:
         # Convert X columns to numeric, treating empty strings as NaN, then filling NaN with 0.0
         df_old_numeric[col] = pd.to_numeric(df_old_numeric[col], errors='coerce').fillna(0.0)
    
    df_old_numeric['Function ID'] = pd.to_numeric(df_old_numeric['Function ID'], errors='coerce').astype('Int64')
    df_old_numeric['Y'] = pd.to_numeric(df_old_numeric['Y'], errors='coerce')


    # 5. Concatenate the two numeric data frames
    df_master_w06_numeric = pd.concat([df_old_numeric, df_new_numeric], ignore_index=True)
    
    # 6. Final Formatting (Converting back to string/empty string format for CSV storage)
    df_master_w06 = df_master_w06_numeric.copy()
    
    for col in X_COLS:
          # Format X columns: keep precision for non-zero values, use empty string for 0.0
          df_master_w06[col] = df_master_w06[col].apply(
              lambda x: f"{x:.17f}" if (pd.notna(x) and x != 0.0) else ""
          )
          
    df_master_w06['Function ID'] = df_master_w06['Function ID'].astype(int)

    # Save to CSV
    df_master_w06.to_csv(NEW_MASTER_FILE, index=False)
    
    # 7. Verification
    total_rows = len(df_master_w06)
    
    logging.info("\n" + "*"*50)
    if total_rows == EXPECTED_NEW_ROWS:
        logging.info(f"SUCCESS: New master data file '{NEW_MASTER_FILE}' created.")
        logging.info(f"Total rows in the new file: {total_rows} (Expected {EXPECTED_NEW_ROWS}).")
        logging.info(f"Verification: There are now {total_rows / 8:.0f} data points for each of the 8 functions.")
    else:
        logging.warning(f"Master file created, but verification failed.")
        logging.warning(f"Expected {EXPECTED_NEW_ROWS} rows, found {total_rows}.")
    logging.info("*"*50)
    
    return df_master_w06

if __name__ == '__main__':
    if not os.path.exists(ADD_DATA_DIR):
        logging.warning(f"Directory '{ADD_DATA_DIR}' not found. Please create it and place your JSON files inside.")
        
    df_final = create_bbo_master_w06()
    if df_final is not None:
        print("\n--- Tail of bbo_master_w06.csv (Showing the 8 newly appended rows) ---")
        print(df_final[['Function ID', 'X1', 'X2', 'X3', 'X4', 'X8', 'Y']].tail(8))

INFO: --- Starting BBO Master File Creation for Week 6 ---
INFO: 
**************************************************
INFO: SUCCESS: New master data file 'bbo_master_w06.csv' created.
INFO: Total rows in the new file: 120 (Expected 120).
INFO: Verification: There are now 15 data points for each of the 8 functions.
INFO: **************************************************


Loaded 112 rows from bbo_master_w05.csv.
Successfully constructed 8 new rows with Function IDs and X-values.

--- Tail of bbo_master_w06.csv (Showing the 8 newly appended rows) ---
     Function ID                   X1                   X2  \
112            1  0.59819299999999997  0.70091000000000003   
113            2  0.20862200000000000  0.31549100000000002   
114            3  0.57332899999999998                        
115            4  0.40657399999999999  0.22015799999999999   
116            5                       0.99999899999999997   
117            6  0.78326899999999999  0.22778799999999999   
118            7  0.55417499999999997  0.60464899999999999   
119            8                       0.80457400000000001   

                      X3                   X4                   X8  \
112                                                                  
113                                                                  
114  0.32047999999999999                          