In [1]:
import pandas as pd
import numpy as np
import os
import json
import logging

In [2]:
# Set up logging for cleaner output during execution
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

# --- Configuration ---
BASE_DATA_PATH = 'bbo_master_w02.csv' 
ADD_DATA_PATH = 'add_data'  # Assuming JSON files are in the current working directory
OUTPUT_FILE_NAME = 'bbo_master_w03.csv'
X_COLUMNS = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8']
N_FUNCTIONS = 8      # Number of unique Function IDs (1 through 8)
POINTS_PER_FUNCTION_BLOCK = 10  # Each block in the initial 80 rows has 10 existing pts.

In [3]:
# --- Helper Functions ---

def load_new_queries_from_json_files(add_data_path, n_functions, x_columns):
    """
    Loads the 8 NEW Week 2 data points from the JSON files.
    Assumes the first 8 elements in both JSON arrays correspond to the new F1-F8 query results.
    Returns a DataFrame of the 8 new rows (W2 queries).
    """
    INPUTS_FILE = 'week02_clean_inputs.json'
    OUTPUTS_FILE = 'week02_clean_outputs.json'
    
    try:
        inputs_filepath = os.path.join(add_data_path, INPUTS_FILE)
        with open(inputs_filepath, 'r') as f:
            X_data = json.load(f)[:n_functions]
            
        outputs_filepath = os.path.join(add_data_path, OUTPUTS_FILE)
        with open(outputs_filepath, 'r') as f:
            Y_data = json.load(f)[:n_functions]
            
    except FileNotFoundError as e:
        logging.error(f"Required file not found: {e}. Please ensure the '{ADD_DATA_PATH}' directory and JSON files exist.")
        return pd.DataFrame()
    except Exception as e:
        logging.error(f"Error loading JSON files: {e}")
        return pd.DataFrame()

    total_points = len(X_data)
    if total_points != n_functions or total_points != len(Y_data):
        logging.error(f"Expected {n_functions} new points after slicing, but found {total_points} or lengths do not match. Cannot proceed.")
        return pd.DataFrame()

    logging.info(f"Successfully loaded {total_points} new Week 2 query points (F1-F8).")

    new_rows = []
    for i in range(total_points):
        func_id = i + 1
        X_values = X_data[i]
        Y_value = Y_data[i]
        
        # Pad X list to 8 dimensions with NaN
        X_padded = X_values + [np.nan] * (len(x_columns) - len(X_values))
        
        row_data = {
            'Function ID': func_id,
            'Y': Y_value,
            **dict(zip(x_columns, X_padded))
        }
        new_rows.append(row_data)

    return pd.DataFrame(new_rows)


In [4]:
def create_bbo_master_w03():
    """
    Creates the final W03 master data file using Structure 2 (Chronological Stacks): 
    Initial 80 rows -> Week 1 Queries (8 rows) -> Week 2 Queries (8 rows).
    """
    logging.info(f"--- Loading initial data from {BASE_DATA_PATH} (Expected 88 rows) ---")
    try:
        df_master_w02 = pd.read_csv(BASE_DATA_PATH)
        df_master_w02['Function ID'] = df_master_w02['Function ID'].astype(int)
        
        if len(df_master_w02) != 88:
            logging.warning(f"Expected 88 rows in {BASE_DATA_PATH}, but found {len(df_master_w02)}. Proceeding based on indices.")
        
        logging.info(f"Initial data points loaded: {len(df_master_w02)}.")
    except FileNotFoundError:
        logging.error(f"Base data file not found: {BASE_DATA_PATH}. Please ensure 'bbo_master_w02.csv' exists.")
        return
    
    # 1. Load the new W2 query results (8 rows)
    df_w2_queries = load_new_queries_from_json_files(ADD_DATA_PATH, N_FUNCTIONS, X_COLUMNS)
    
    # FIX: Check if the DataFrame is empty using .empty
    if df_w2_queries.empty: 
        logging.warning("No new query results were loaded. Stopping.")
        return
        
    # 2. Split the W02 file based on the Chronological Stacks structure
    
    # Block A (Rows 0-79): The 8 blocks of 10 initial points (80 rows)
    INITIAL_BLOCK_SIZE = N_FUNCTIONS * POINTS_PER_FUNCTION_BLOCK
    df_initial_blocks = df_master_w02.iloc[:INITIAL_BLOCK_SIZE].copy()
    
    # Block B (Rows 80-87): The final 8 rows which are the W1 queries (8 rows)
    df_w1_queries = df_master_w02.iloc[INITIAL_BLOCK_SIZE:].copy()
    
    # df_w2_queries is Block C (8 rows)
    
    logging.info(f"W02 Data Split: {len(df_initial_blocks)} Initial rows, {len(df_w1_queries)} W1 Query rows.")

    # 3. Concatenate the three parts in the desired chronological order: A -> B -> C
    df_combined = pd.concat([df_initial_blocks, df_w1_queries, df_w2_queries], 
                            ignore_index=True, 
                            verify_integrity=True)
    
    # Final cleanup 
    df_combined['Function ID'] = df_combined['Function ID'].astype(int) 
    
    # 4. Save the new master file
    logging.info("---------------------------------------------")
    df_combined.to_csv(OUTPUT_FILE_NAME, index=False)
    logging.info(f"SUCCESS: New master data file '{OUTPUT_FILE_NAME}' created.")
    logging.info(f"Total rows in the new file: {len(df_combined)} (Expected 96).")
    logging.info("Structure: 80 initial pts, then 8 W1 pts, then 8 W2 pts (Chronological Stacks).")
    logging.info("---------------------------------------------")


In [5]:
if __name__ == '__main__':
    create_bbo_master_w03()

INFO: --- Loading initial data from bbo_master_w02.csv (Expected 88 rows) ---
INFO: Initial data points loaded: 88.
INFO: Successfully loaded 8 new Week 2 query points (F1-F8).
INFO: W02 Data Split: 80 Initial rows, 8 W1 Query rows.
INFO: ---------------------------------------------
INFO: SUCCESS: New master data file 'bbo_master_w03.csv' created.
INFO: Total rows in the new file: 96 (Expected 96).
INFO: Structure: 80 initial pts, then 8 W1 pts, then 8 W2 pts (Chronological Stacks).
INFO: ---------------------------------------------
