In [1]:
import numpy as np
import os
import re
import ast
import json
import datetime
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt

# =========================
# CONFIGURATION
# =========================
base_path = "data/"
inputs_txt = "week9/inputs.txt"       # file dari email (hasil minggu sebelumnya)
outputs_txt = "week9/outputs.txt"
n_candidates = 4000
log_folder = "week10_logs"

# Buat folder log jika belum ada
os.makedirs(log_folder, exist_ok=True)

# =========================
# LOAD INPUTS (ambil batch terakhir)
# =========================
def load_inputs(path):
    with open(path, "r") as f:
        text = f.read().strip()
    batches = re.split(r"\]\s*\n\s*\[", text)
    last_batch = "[" + batches[-1].strip().lstrip("[").rstrip("]") + "]"
    last_batch = re.sub(r'array\(', '', last_batch).replace(')', '')
    chunks = re.findall(r'\[([^\[\]]+)\]', last_batch)

    vectors = []
    for ch in chunks:
        nums = [float(x) for x in ch.split(",") if x.strip()]
        vectors.append(np.array(nums))
    print(f"âœ… Parsed {len(vectors)} input vectors (latest batch only).")
    return vectors

# =========================
# LOAD OUTPUTS (ambil batch terakhir)
# =========================
def load_outputs(path):
    with open(path, "r") as f:
        text = f.read().strip()
    batches = re.split(r"\]\s*\n\s*\[", text)
    last_batch = batches[-1].strip()
    last_batch = last_batch.replace("np.float64(", "").replace(")", "")
    last_batch = last_batch.replace("[", "").replace("]", "")
    last_batch = re.sub(r"[^\deE\-\.\,\s]", "", last_batch)

    numbers = re.findall(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", last_batch)
    outputs = np.array([float(x) for x in numbers], dtype=float)
    print(f"âœ… Parsed {len(outputs)} outputs (latest batch only).")
    return outputs


# =========================
# LOAD DATA FROM EMAIL
# =========================
new_inputs = load_inputs(inputs_txt)
new_outputs = load_outputs(outputs_txt)

# =========================
# MODEL TRAINING & EXPLANATION LOGGING
# =========================
queries_out = []
explanations = {}

for i in range(1, 9):
    folder = os.path.join(base_path, f"function_{i}")
    input_file = os.path.join(folder, "week9_inputs.npy")
    output_file = os.path.join(folder, "week9_outputs.npy")

    # Gabungkan data lama + batch baru
    X_prev = np.load(input_file)
    y_prev = np.load(output_file)
    X_combined = np.vstack([X_prev, new_inputs[i - 1].reshape(1, -1)])
    y_combined = np.append(y_prev, new_outputs[i - 1])

    np.save(os.path.join(folder, "week10_inputs.npy"), X_combined)
    np.save(os.path.join(folder, "week10_outputs.npy"), y_combined)

    dim = X_combined.shape[1]
    print(f"\n=== Function {i} ({dim}D) ===")
    print(f"  Data size: {len(X_combined)}, Output range: [{y_combined.min():.4f}, {y_combined.max():.4f}]")

    # MLP surrogate (same as Week 9 for consistency)
    model = make_pipeline(
        StandardScaler(),
        MLPRegressor(
            hidden_layer_sizes=(512, 256, 128),
            activation='relu',
            solver='adam',
            alpha=3e-4,
            learning_rate_init=0.001,
            max_iter=3000,
            random_state=42
        )
    )
    model.fit(X_combined, y_combined)

    # --- Plot performance for transparency ---
    y_pred_train = model.predict(X_combined)
    plt.figure(figsize=(5, 5))
    plt.scatter(y_combined, y_pred_train, c='blue', edgecolor='k')
    plt.plot([y_combined.min(), y_combined.max()],
             [y_combined.min(), y_combined.max()],
             'r--', lw=2)
    plt.title(f'Function {i} - Actual vs Predicted (Week 10)')
    plt.xlabel('Actual y')
    plt.ylabel('Predicted y')
    plt.grid(True)
    plt.savefig(os.path.join(log_folder, f"function_{i}_fit.png"))
    plt.close()

    # --- Candidate search ---
    candidates = np.random.uniform(0, 1, (n_candidates, dim))
    preds = model.predict(candidates)
    top_indices = np.argsort(preds)[-5:][::-1]  # ambil 5 tertinggi
    best_idx = top_indices[0]
    best_query = np.clip(candidates[best_idx], 0.0, 1.0)
    query_str = "-".join([f"{x:.6f}" for x in best_query])

    print(f"  Best predicted output: {preds[best_idx]:.4f}")
    print(f"  Query to submit: {query_str}")

    # --- Explanation log ---
    explanations[f"Function_{i}"] = {
        "timestamp": str(datetime.datetime.now()),
        "dimensionality": dim,
        "data_points_used": len(X_combined),
        "output_range": [float(y_combined.min()), float(y_combined.max())],
        "model_architecture": [512, 256, 128],
        "top_5_predictions": preds[top_indices].tolist(),
        "top_5_candidates": candidates[top_indices].tolist(),
        "selected_query": best_query.tolist(),
        "reasoning": (
            "Selected based on highest predicted output from MLP surrogate. "
            "Query chosen from top-5 candidate set to prioritise stable, high-value regions "
            "identified by model confidence and historical performance trends."
        ),
    }

    queries_out.append(f"Function {i}: {query_str}")

# =========================
# SAVE WEEK10 QUERIES + LOGS
# =========================
with open("week10_queries.txt", "w") as f:
    f.write("\n".join(queries_out))

with open(os.path.join(log_folder, "week10_explanations.json"), "w") as f:
    json.dump(explanations, f, indent=4)

print("\nðŸ’¾ Saved all week10 queries to week10_queries.txt")
print("ðŸ§  Detailed explanations saved to week10_logs/week10_explanations.json")
print("ðŸ“Š Plots saved under week10_logs/")

âœ… Parsed 8 input vectors (latest batch only).
âœ… Parsed 8 outputs (latest batch only).

=== Function 1 (2D) ===
  Data size: 19, Output range: [-0.0036, 64.0000]
  Best predicted output: 27.4823
  Query to submit: 0.134004-0.569748

=== Function 2 (2D) ===
  Data size: 19, Output range: [-0.0656, 3.1124]
  Best predicted output: 0.7968
  Query to submit: 0.739313-0.722208

=== Function 3 (3D) ===
  Data size: 24, Output range: [-0.3989, 71.0000]
  Best predicted output: 19.3789
  Query to submit: 0.189178-0.625108-0.708724

=== Function 4 (4D) ===
  Data size: 39, Output range: [-32.6257, 64.0000]
  Best predicted output: -0.9950
  Query to submit: 0.363422-0.431794-0.364787-0.419088

=== Function 5 (4D) ===
  Data size: 29, Output range: [0.1129, 4440.5227]
  Best predicted output: 3094.4953
  Query to submit: 0.109443-0.976541-0.999893-0.922674

=== Function 6 (5D) ===
  Data size: 29, Output range: [-2.5712, 64.0000]
  Best predicted output: 25.7313
  Query to submit: 0.433696-0.