In [1]:
#%pip install git+https://github.com/cair/tmu.git
#%pip install numpy==1.26.4

In [2]:
# Import all required libraries

import numpy as np
import pickle
import optuna
import os

from tmu.models.classification.vanilla_classifier import TMClassifier

  from .autonotebook import tqdm as notebook_tqdm


2025-04-11 17:28:30,380 - tmu.clause_bank.clause_bank_cuda - ERROR - No module named 'pycuda'
Traceback (most recent call last):
  File "/Users/kjellhaaland/Documents/GitHub/uia-master-thesis/.venv/lib/python3.12/site-packages/tmu/clause_bank/clause_bank_cuda.py", line 41, in <module>
    from pycuda._driver import Device, Context
ModuleNotFoundError: No module named 'pycuda'


In [3]:
train_datasets = np.array([34, 7, 53])
test_datasets = np.array([27])

In [4]:
# Create folder models if it does not exist
os.makedirs("models", exist_ok=True)

In [5]:
## Helper functions for saving the model and accuracy

# Helper function to save the model
def save_model(model, filename):
    with open(filename, "wb") as f:
        pickle.dump(model, f)


# Helper function to save the accuracy
def save_accuracy(epoch, accuracy, tp, tn, fp, fn):
    with open("accuracy.txt", "a") as f:
        f.write(f"{epoch},{accuracy},{tp},{tn},{fp},{fn}\n")


# Helper function to load dataset
def load_dataset(farm, event_id):
    X = np.loadtxt(f"./data_train/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_dataset_labels(farm, event_id):
    y = np.loadtxt(f"./data_train/y_{farm}_{event_id}.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def load_test_dataset(farm, event_id):
    X = np.loadtxt(f"./data_test/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_test_dataset_labels(farm, event_id):
    y = np.loadtxt(f"./data_test/y_{farm}_{event_id}.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def get_train_dataset():
    # Load all train datasets and concat them
    dataset = np.concatenate([load_dataset("B", dataset) for dataset in train_datasets])
    labels = np.concatenate([load_dataset_labels("B", dataset) for dataset in train_datasets])
    return dataset, labels


def get_test_dataset():
    # Load all train datasets and concat them
    dataset = np.concatenate([load_test_dataset("B", dataset) for dataset in test_datasets])
    labels = np.concatenate([load_test_dataset_labels("B", dataset) for dataset in test_datasets])
    return dataset, labels

In [6]:
(X_train, y_train) = get_train_dataset()
(X_test, y_test) = get_test_dataset()

# Ensure that the the dataset can be divided by 250
X_train = X_train[: len(X_train) - len(X_train) % 250]
y_train = y_train[: len(y_train) - len(y_train) % 250]

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

FileNotFoundError: ./data_train/X_B_53.txt not found.

In [23]:
def run_evaluation(tm: TMClassifier) -> float:
    pred = tm.predict(X_test)

    accuracy = np.sum(pred == y_test) / len(y_test)

    return accuracy


def objective(trial: optuna.Trial) -> float:
    number_of_features = X_train.shape[1]

    number_of_clauses = trial.suggest_int("number_of_clauses", 20, 15000)
    T = trial.suggest_int("T", 10, 10000)
    s = trial.suggest_int("s", 1, 100)
    max_included_literals = trial.suggest_int("max_included_literals", 1, 3 * number_of_features)

    tm = TMClassifier(
        number_of_clauses=number_of_clauses,
        T=T,
        s=s,
        max_included_literals=max_included_literals,
        weighted_clauses=True,
        platform="CPU",  # TODO: Change to CUDA
        batch_size=250,
    )

    print(f"Starting training for trial {trial.number}")

    for e in range(5):
        tm.fit(X_train, y_train)

    # Write the current best result to file "temp_best.txt"
    with open("temp_params.txt", "w") as f:
        f.write(f"Trial: {trial.number}\n")

    return run_evaluation(tm)


In [24]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=3)

# Save the best params to file
best_params = study.best_params

print(f"Best params: {best_params}")

with open("best_params.txt", "w") as f:
    for key, value in best_params.items():
        f.write(f"{key}: {value}\n")

Starting training for trial 0
Best params: {'number_of_clauses': 1762, 'T': 4365, 's': 63, 'max_included_literals': 666}


In [25]:
best_params = study.best_params
print(best_params)

{'number_of_clauses': 1762, 'T': 4365, 's': 63, 'max_included_literals': 666}
