In [1]:
#%pip install git+https://github.com/cair/tmu.git
#%pip install numpy==1.26.4

In [2]:
# Import all required libraries

import numpy as np
import pickle
import optuna
import os

from tmu.models.classification.vanilla_classifier import TMClassifier

  from .autonotebook import tqdm as notebook_tqdm


2025-05-08 16:23:48,823 - tmu.clause_bank.clause_bank_cuda - ERROR - No module named 'pycuda'
Traceback (most recent call last):
  File "/Users/kjellhaaland/Documents/GitHub/uia-master-thesis/.venv/lib/python3.12/site-packages/tmu/clause_bank/clause_bank_cuda.py", line 41, in <module>
    from pycuda._driver import Device, Context
ModuleNotFoundError: No module named 'pycuda'


In [3]:
train_datasets = np.array([55, 81, 47])
test_datasets = np.array([33])

In [4]:
# Create folder models if it does not exist
os.makedirs("models", exist_ok=True)

In [5]:
# Helper function to save the model
def save_model(model, filename):
    with open(filename, "wb") as f:
        pickle.dump(model, f)


# Helper function to save the accuracy
def save_accuracy(epoch, accuracy, tp, tn, fp, fn):
    with open("accuracy.txt", "a") as f:
        f.write(f"{epoch},{accuracy},{tp},{tn},{fp},{fn}\n")


# Helper function to load dataset
def load_dataset(farm, event_id):
    X = np.loadtxt(f"./data_train/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_dataset_labels(farm, event_id):
    y = np.loadtxt(f"./data_train/y_{farm}_{event_id}.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def load_test_dataset(farm, event_id):
    X = np.loadtxt(f"./data_test/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_test_dataset_labels(farm, event_id):
    y = np.loadtxt(f"./data_test/y_{farm}_{event_id}.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def get_train_dataset():
    # Load all train datasets and concat them
    dataset = np.concatenate([load_dataset("C", dataset) for dataset in train_datasets])
    labels = np.concatenate([load_dataset_labels("C", dataset) for dataset in train_datasets])
    return dataset, labels


def get_test_dataset():
    # Load all train datasets and concat them
    dataset = np.concatenate([load_test_dataset("C", dataset) for dataset in test_datasets])
    labels = np.concatenate([load_test_dataset_labels("C", dataset) for dataset in test_datasets])
    return dataset, labels

In [6]:
(X_train, y_train) = get_train_dataset()
(X_test, y_test) = get_test_dataset()

# Ensure that the the dataset can be divided by 250
X_train = X_train[: len(X_train) - len(X_train) % 250]
y_train = y_train[: len(y_train) - len(y_train) % 250]

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (6500, 4760)
y_train shape: (6500,)
X_test shape: (3313, 4760)
y_test shape: (3313,)


In [7]:
def run_evaluation(tm: TMClassifier) -> float:
    pred = tm.predict(X_test)

    accuracy = np.sum(pred == y_test) / len(y_test)

    return accuracy


def objective(trial: optuna.Trial) -> float:
    number_of_features = X_train.shape[1]

    number_of_clauses = trial.suggest_int("number_of_clauses", 20, 15000, 2)
    T = trial.suggest_int("T", 20, 10000, 2)
    s = trial.suggest_float("s", 1, 50)
    max_included_literals = trial.suggest_int("max_included_literals", 20, 3 * number_of_features, 2)

    tm = TMClassifier(
        number_of_clauses=number_of_clauses,
        T=T,
        s=s,
        max_included_literals=max_included_literals,
        weighted_clauses=True,
        platform="CPU",  # TODO: Change to CUDA
        batch_size=250,
    )

    print(f"Starting training for trial {trial.number}")

    for e in range(5):
        tm.fit(X_train, y_train)

    # Write the current best result to file "temp_best.txt"
    with open("temp_params.txt", "w") as f:
        f.write(f"Trial: {trial.number}\n")

    return run_evaluation(tm)


In [8]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

# Save the best params to file
best_params = study.best_params

print(f"Best params: {best_params}")

with open("best_params.txt", "w") as f:
    for key, value in best_params.items():
        f.write(f"{key}: {value}\n")

Starting training for trial 0


  number_of_clauses = trial.suggest_int("number_of_clauses", 20, 15000, 2)
  T = trial.suggest_int("T", 20, 10000, 2)
  max_included_literals = trial.suggest_int("max_included_literals", 20, 3 * number_of_features, 2)


Starting training for trial 1
Starting training for trial 2
Starting training for trial 3
Starting training for trial 4
Starting training for trial 5
Starting training for trial 6
Starting training for trial 7
Starting training for trial 8
Starting training for trial 9
Starting training for trial 10
Starting training for trial 11
Starting training for trial 12
Starting training for trial 13
Starting training for trial 14
Starting training for trial 15
Starting training for trial 16
Starting training for trial 17
Starting training for trial 18
Starting training for trial 19
Starting training for trial 20
Starting training for trial 21
Starting training for trial 22
Starting training for trial 23
Starting training for trial 24
Starting training for trial 25
Starting training for trial 26
Starting training for trial 27
Starting training for trial 28
Starting training for trial 29
Starting training for trial 30
Starting training for trial 31
Starting training for trial 32
Starting training

In [9]:
best_params = study.best_params
print(best_params)

{'number_of_clauses': 2140, 'T': 4622, 's': 23.706791496433414, 'max_included_literals': 8658}


In [10]:
import plotly.io as pio

# Set default template to white
pio.templates.default = "plotly_white"
# Create plots and save them as PDF

# 1. Optimization history
fig = optuna.visualization.plot_optimization_history(study)
fig.write_image('optimization_history.pdf')

# 2. Parameter importances
fig = optuna.visualization.plot_param_importances(study)
fig.write_image('param_importances.pdf')

# 3. Parallel coordinate plot
fig = optuna.visualization.plot_parallel_coordinate(study)
fig.write_image('parallel_coordinate.pdf')

# 4. Slice plot
fig = optuna.visualization.plot_slice(study)
fig.write_image('slice_plot.pdf')

# 5. Contour plot (for 2 parameters)
if len(study.best_params) >= 2:
    params = list(study.best_params.keys())[:2]
    fig = optuna.visualization.plot_contour(study, params=params)
    fig.write_image('contour_plot.pdf')

# 6. EDF plot
fig = optuna.visualization.plot_edf(study)
fig.write_image('edf_plot.pdf')

