In [1]:
import numpy as np
import os, re, ast, datetime, json
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# =========================
# CONFIGURATION
# =========================
base_path = "data/"
inputs_txt = "week11/inputs.txt"       # file hasil minggu sebelumnya
outputs_txt = "week11/outputs.txt"
n_candidates = 4000
log_folder = "week12_logs"
os.makedirs(log_folder, exist_ok=True)

# =========================
# LOAD INPUTS (ambil batch terakhir)
# =========================
def load_inputs(path):
    with open(path, "r") as f:
        text = f.read().strip()
    batches = re.split(r"\]\s*\n\s*\[", text)
    last_batch = "[" + batches[-1].strip().lstrip("[").rstrip("]") + "]"
    last_batch = re.sub(r'array\(', '', last_batch).replace(')', '')
    chunks = re.findall(r'\[([^\[\]]+)\]', last_batch)
    return [np.array([float(x) for x in ch.split(",") if x.strip()]) for ch in chunks]

def load_outputs(path):
    with open(path, "r") as f:
        text = f.read().strip()
    batches = re.split(r"\]\s*\n\s*\[", text)
    last_batch = batches[-1].strip()
    last_batch = last_batch.replace("np.float64(", "").replace(")", "")
    last_batch = last_batch.replace("[", "").replace("]", "")
    last_batch = re.sub(r"[^\deE\-\.\,\s]", "", last_batch)
    numbers = re.findall(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", last_batch)
    return np.array([float(x) for x in numbers], dtype=float)

# =========================
# LOAD NEW DATA
# =========================
new_inputs = load_inputs(inputs_txt)
new_outputs = load_outputs(outputs_txt)
print(f"âœ… Parsed {len(new_inputs)} input vectors, {len(new_outputs)} outputs.")

# =========================
# PCA + SURROGATE STRATEGY
# =========================
queries_out = []
pca_info = {}

for i in range(1, 9):
    folder = os.path.join(base_path, f"function_{i}")
    X_prev = np.load(os.path.join(folder, "week11_inputs.npy"))
    y_prev = np.load(os.path.join(folder, "week11_outputs.npy"))

    # Gabungkan data lama + batch baru
    X_combined = np.vstack([X_prev, new_inputs[i-1].reshape(1, -1)])
    y_combined = np.append(y_prev, new_outputs[i-1])

    np.save(os.path.join(folder, "week12_inputs.npy"), X_combined)
    np.save(os.path.join(folder, "week12_outputs.npy"), y_combined)

    dim = X_combined.shape[1]
    print(f"\n=== Function {i} ({dim}D) ===")
    print(f"  Data size: {len(X_combined)}, Output range: [{y_combined.min():.4f}, {y_combined.max():.4f}]")

    # Build surrogate model
    model = make_pipeline(
        StandardScaler(),
        MLPRegressor(hidden_layer_sizes=(512, 256, 128),
                     activation='relu',
                     solver='adam',
                     alpha=2e-4,
                     learning_rate_init=0.001,
                     max_iter=3000,
                     random_state=42)
    )
    model.fit(X_combined, y_combined)

    # PCA analysis
    pca = PCA(n_components=min(dim, 3))
    pca.fit(X_combined)
    explained = pca.explained_variance_ratio_
    top_pc = pca.components_[0]
    print(f"  PCA variance explained: {explained[:3]}")
    print(f"  Principal direction (first PC): {top_pc}")

    # Generate candidates along principal axis
    base_point = np.mean(X_combined, axis=0)
    candidates = np.random.uniform(0, 1, (n_candidates, dim))
    pc_variation = base_point + np.outer(np.linspace(-0.5, 0.5, n_candidates), top_pc)
    pc_variation = np.clip(pc_variation, 0, 1)

    all_candidates = np.vstack([candidates, pc_variation])
    preds = model.predict(all_candidates)

    best_idx = np.argmax(preds)
    best_query = np.clip(all_candidates[best_idx], 0.0, 1.0)
    query_str = "-".join([f"{x:.6f}" for x in best_query])

    print(f"  Best predicted output: {preds[best_idx]:.4f}")
    print(f"  Query to submit: {query_str}")

    # Visualization
    y_pred_train = model.predict(X_combined)
    plt.figure(figsize=(5, 5))
    plt.scatter(y_combined, y_pred_train, c='blue', edgecolor='k')
    plt.plot([y_combined.min(), y_combined.max()],
             [y_combined.min(), y_combined.max()], 'r--', lw=2)
    plt.title(f'Function {i} - Actual vs Predicted (Week 12)')
    plt.xlabel('Actual y'); plt.ylabel('Predicted y')
    plt.grid(True)
    plt.savefig(os.path.join(log_folder, f"function_{i}_fit.png"))
    plt.close()

    pca_info[f"Function_{i}"] = {
        "data_points": len(X_combined),
        "dim": dim,
        "variance_explained": explained.tolist(),
        "principal_direction": top_pc.tolist(),
        "selected_query": best_query.tolist()
    }

    queries_out.append(f"Function {i}: {query_str}")

# =========================
# SAVE RESULTS
# =========================
with open("week12_queries.txt", "w") as f:
    f.write("\n".join(queries_out))

with open(os.path.join(log_folder, "week12_pca_info.json"), "w") as f:
    json.dump(pca_info, f, indent=4)

print("\nðŸ’¾ Saved all week12 queries to week12_queries.txt")
print("ðŸ“Š PCA logs saved under week12_logs/")


âœ… Parsed 8 input vectors, 8 outputs.

=== Function 1 (2D) ===
  Data size: 21, Output range: [-0.0036, 64.0000]
  PCA variance explained: [0.65776081 0.34223919]
  Principal direction (first PC): [0.90600879 0.42325887]
  Best predicted output: 22.4281
  Query to submit: 0.137741-0.557042

=== Function 2 (2D) ===
  Data size: 21, Output range: [-0.0656, 3.1124]
  PCA variance explained: [0.73912458 0.26087542]
  Principal direction (first PC): [0.45459117 0.89070021]
  Best predicted output: 0.7224
  Query to submit: 0.733554-0.692283

=== Function 3 (3D) ===
  Data size: 26, Output range: [-0.3989, 71.0000]
  PCA variance explained: [0.60591885 0.23649734 0.15758381]
  Principal direction (first PC): [-0.55222802  0.07002363  0.8307472 ]
  Best predicted output: 19.6139
  Query to submit: 0.232268-0.650747-0.800166

=== Function 4 (4D) ===
  Data size: 41, Output range: [-32.6257, 64.0000]
  PCA variance explained: [0.4503197  0.25506756 0.1815725 ]
  Principal direction (first PC):