In [None]:
# @title 1. Environment setup and data loading
import os
import sys
import importlib.util
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Project path configuration.
DRIVE_FOLDER = "/content/drive/My Drive/projects/TensorMorph"
LOCAL_FOLDER = "/content/tensormorph_local"

# Mount Drive for data and schema access.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Workspace initialization.
os.makedirs(f"{LOCAL_FOLDER}/experimental", exist_ok=True)
os.makedirs(f"{LOCAL_FOLDER}/data", exist_ok=True)

# Syncing local experimental scripts and the generated data.
print("Syncing files from Drive...")
!rsync -av --progress "{DRIVE_FOLDER}/experimental/" "{LOCAL_FOLDER}/experimental/"
!cp -r "{DRIVE_FOLDER}/data/"* "{LOCAL_FOLDER}/data/"

os.chdir(LOCAL_FOLDER)

# Direct path import for schema.py to ensure alignment with DataGen.
schema_path = os.path.join(LOCAL_FOLDER, "experimental/schema.py")
spec = importlib.util.spec_from_file_location("schema", schema_path)
schema = importlib.util.module_from_spec(spec)
try:
    spec.loader.exec_module(schema)
    global FEATURES, TARGET
    FEATURES = schema.FEATURES
    TARGET = schema.TARGET
    print(f"Schema loaded: {len(FEATURES)} features identified.")
except Exception as e:
    print(f"Error: Failed to load schema.py: {e}")

# Load the datasets.
df_mem = pd.read_csv("data/dataset_memory_bound.csv")
df_comp = pd.read_csv("data/dataset_compute_bound.csv")

print(f"Loaded {len(df_mem)} Memory-Bound and {len(df_comp)} Compute-Bound samples.")
print("Environment ready.")

In [None]:
# @title 2. Data preprocessing and feature engineering
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Constants for the 70/15/15 split.
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15
SEED = 42
NORMALIZE = True

def prepare_dataset(df, feature_cols, target_col):
    """
    Splits data into train, validation, and test sets.
    """
    X = df[feature_cols].values
    y = df[target_col].values

    # Isolate the test set first.
    X_temp, X_test, y_temp, y_test = train_test_split(
        X, y, test_size=TEST_RATIO, random_state=SEED
    )

    # Split the remainder into train and val.
    relative_val_ratio = VAL_RATIO / (TRAIN_RATIO + VAL_RATIO)
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=relative_val_ratio, random_state=SEED
    )

    scaler = None
    if NORMALIZE:
        scaler = StandardScaler()
        # Scale based on training distribution.
        X_train = scaler.fit_transform(X_train)
        X_val = scaler.transform(X_val)
        X_test = scaler.transform(X_test)

    return X_train, X_val, X_test, y_train, y_val, y_test, scaler

# Process both hardware targets.
res_mem = prepare_dataset(df_mem, FEATURES, TARGET)
X_train_mem, X_val_mem, X_test_mem, y_train_mem, y_val_mem, y_test_mem, scaler_mem = res_mem

res_comp = prepare_dataset(df_comp, FEATURES, TARGET)
X_train_comp, X_val_comp, X_test_comp, y_train_comp, y_val_comp, y_test_comp, scaler_comp = res_comp

# Print explicit counts.
print(f"Memory split: {len(X_train_mem)} train, {len(X_val_mem)} val, {len(X_test_mem)} test.")
print(f"Compute split: {len(X_train_comp)} train, {len(X_val_comp)} val, {len(X_test_comp)} test.")

In [None]:
# @title 3. Model training and evaluation
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Ensemble hyperparameters.
N_TREES = 100
LR = 0.1
DEPTH = 4

def run_training(X_train, X_val, X_test, y_train, y_val, y_test, label):
    model = GradientBoostingRegressor(
        n_estimators=N_TREES,
        learning_rate=LR,
        max_depth=DEPTH,
        random_state=SEED
    )

    # Fit the model.
    model.fit(X_train, y_train)

    # Check validation performance.
    val_preds = model.predict(X_val)
    val_r2 = r2_score(y_val, val_preds)

    # Unbiased final test.
    test_preds = model.predict(X_test)
    test_r2 = r2_score(y_test, test_preds)
    test_mse = mean_squared_error(y_test, test_preds)

    print(f"Results for {label}:")
    print(f"  Val R2:  {val_r2:.4f}")
    print(f"  Test R2: {test_r2:.4f}")
    print(f"  Test MSE: {test_mse:.4f}\n")

    return model

# Train the specialized advisors.
model_mem = run_training(
    X_train_mem, X_val_mem, X_test_mem, y_train_mem, y_val_mem, y_test_mem, "Memory-Bound"
)

model_comp = run_training(
    X_train_comp, X_val_comp, X_test_comp, y_train_comp, y_val_comp, y_test_comp, "Compute-Bound"
)

In [None]:
# @title 4. Feature importance and analysis
import numpy as np
import matplotlib.pyplot as plt

def plot_importance(model, feature_names, title, ax):
    # Extract importance scores.
    importances = model.feature_importances_
    indices = np.argsort(importances)

    # Create horizontal bar chart.
    ax.barh(range(len(indices)), importances[indices], color='steelblue', align='center')
    ax.set_yticks(range(len(indices)))
    ax.set_yticklabels([feature_names[i] for i in indices])
    ax.set_title(title)
    ax.set_xlabel("Importance score")
    ax.grid(axis='x', alpha=0.3)

# Compare hardware profiles.
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

plot_importance(model_mem, FEATURES, "Memory-Bound features", ax1)
plot_importance(model_comp, FEATURES, "Compute-Bound features", ax2)

plt.tight_layout()
plt.show()

In [None]:
# @title 5. Model persistence
import joblib
import os

# Create local directory.
os.makedirs("models", exist_ok=True)

# Save Memory-Bound artifacts.
joblib.dump(model_mem, "models/model_mem.joblib")
joblib.dump(scaler_mem, "models/scaler_mem.joblib")

# Save Compute-Bound artifacts.
joblib.dump(model_comp, "models/model_comp.joblib")
joblib.dump(scaler_comp, "models/scaler_comp.joblib")

# Ensure Drive directory exists.
drive_models_path = f"{DRIVE_FOLDER}/models"
os.makedirs(drive_models_path, exist_ok=True)

# Sync to Drive.
!cp -r models/* "{drive_models_path}/"

print(f"Models and scalers persisted to {drive_models_path}.")

In [None]:
# @title 6. Model transpilation and C++ export
import datetime
import os

def export_to_cpp(model, scaler, feature_names, class_name, filename, profile_id):
    """
    Converts a Gradient Boosting model and its scaler to a C++ header
    that implements the full Advisor interface.
    """
    init_val = model.init_.constant_[0][0]
    n_trees = len(model.estimators_)
    means = scaler.mean_
    scales = scaler.scale_

    with open(filename, 'w') as f:
        f.write(f"// Autogenerated - DO NOT EDIT. Created: {datetime.date.today()}\n")
        f.write(f"#pragma once\n")
        f.write(f"#include <vector>\n")
        f.write(f"#include <string>\n")
        f.write(f'#include "experimental/Advisor.h"\n\n')

        f.write(f"class {class_name} : public Advisor {{\n")
        f.write(f"public:\n")

        # Implementation of the Advisor interface.
        f.write(f"    float Predict(const std::vector<float>& features) const override {{\n")
        f.write(f"        return predict(features);\n")
        f.write(f"    }}\n\n")

        f.write(f"    std::string GetProfileName() const override {{\n")
        f.write(f'        return "{profile_id}";\n')
        f.write(f"    }}\n\n")

        f.write(f"    static float predict(const std::vector<float>& input) {{\n")
        f.write(f"        float x[{len(feature_names)}];\n")
        for i in range(len(feature_names)):
            f.write(f"        x[{i}] = (input[{i}] - {means[i]:.8f}f) / {scales[i]:.8f}f;\n")

        f.write(f"\n        float score = {init_val}f;\n")
        for i in range(n_trees):
            f.write(f"        score += tree_{i}(x);\n")
        f.write(f"        return score;\n")
        f.write(f"    }}\n\n")

        f.write(f"private:\n")

        for i, estimator in enumerate(model.estimators_):
            tree = estimator[0].tree_
            f.write(f"    static float tree_{i}(const float* x) {{\n")

            def recurse(node, depth):
                indent = "        " + "    " * depth
                if tree.feature[node] != -2:
                    feat_idx = tree.feature[node]
                    threshold = tree.threshold[node]
                    f.write(f"{indent}if (x[{feat_idx}] <= {threshold:.6f}f) {{\n")
                    recurse(tree.children_left[node], depth + 1)
                    f.write(f"{indent}}} else {{\n")
                    recurse(tree.children_right[node], depth + 1)
                    f.write(f"{indent}}}\n")
                else:
                    val = tree.value[node][0][0]
                    f.write(f"{indent}return {val:.8f}f;\n")

            recurse(0, 0)
            f.write(f"    }}\n\n")
        f.write(f"}}; \n")

# Create the codegen directory.
os.makedirs("experimental/codegen", exist_ok=True)

# Export both hardware targets.
export_to_cpp(model_mem, scaler_mem, FEATURES, "MemoryAdvisor",
              "experimental/codegen/MemoryAdvisor.h", "memory_bound")
export_to_cpp(model_comp, scaler_comp, FEATURES, "ComputeAdvisor",
              "experimental/codegen/ComputeAdvisor.h", "compute_bound")

# Sync headers to Drive.
drive_codegen_path = f"{DRIVE_FOLDER}/experimental/codegen"
os.makedirs(drive_codegen_path, exist_ok=True)
!cp experimental/codegen/*.h "{drive_codegen_path}/"

print(f"Inference headers exported to {drive_codegen_path}.")

In [None]:
# @title 7. C++ inference verification
import subprocess
import os

cpp_test_code = """
#include <iostream>
#include <vector>
#include <iomanip>
#include "experimental/Advisor.h"

void run_test(const std::string& profile, const std::vector<float>& sample) {
    auto advisor = CreateAdvisor(profile);
    float prediction = advisor->Predict(sample);
    std::cout << std::left << std::setw(16) << profile << ": "
              << std::fixed << std::setprecision(6) << prediction << std::endl;
}

int main() {
    std::vector<float> sample = {112.0f, 112.0f, 32.0f, 32.0f, 3.0f, 1.0f, 1.0f, 8.0f, 1.0f};
    std::cout << "Comparing Profit Ratios for same shape:\\n";
    std::cout << "----------------------------------------\\n";
    try {
        run_test("memory_bound", sample);
        run_test("compute_bound", sample);
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }
    return 0;
}
"""

!mkdir -p experimental/codegen
!cp "{DRIVE_FOLDER}/experimental/Advisor.h" experimental/
!cp "{DRIVE_FOLDER}/experimental/Advisor.cpp" experimental/
!cp "{DRIVE_FOLDER}/experimental/codegen/"*.h experimental/codegen/

with open("test_runner.cpp", "w") as f:
    f.write(cpp_test_code)

# FIX: Remove -Iexperimental and use only -I.
# This forces all includes to be resolved from the project root.
compile_cmd = "g++ -std=c++17 test_runner.cpp experimental/Advisor.cpp -o advisor_test -I."
result = subprocess.run(compile_cmd.split(), capture_output=True, text=True)

if result.returncode == 0:
    print("Compilation successful.\\n")
    run_result = subprocess.run(["./advisor_test"], capture_output=True, text=True)
    print(run_result.stdout)
else:
    print("Compilation failed:\\n")
    print(result.stderr)