In [1]:
%pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading SQLAlchemy-2.0.38-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
Downloading alembic-1.14.1-py3-none-any.whl (233 kB)
Downloading SQLAlchemy-2.0.38-cp312-cp312-macosx_11_0_arm64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
Installing collected packages: sqlalchemy, Mako, color

In [3]:
import numpy as np
import optuna

from tmu.models.autoencoder.autoencoder import TMAutoEncoder

In [4]:
def load_train_dataset(farm, event_id):
    X = np.loadtxt(f"./data_train/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X

In [5]:
def calculate_accuracy(X, pred):
    correct = np.sum(X == pred)
    accuracy = correct / len(X)
    return accuracy

In [6]:
train_datasets = [83, 52, 21, 2, 23, 87, 74, 86, 82]

# Load all datasets into one array
X_train = np.concatenate([load_train_dataset("B", i) for i in train_datasets])

print(f"X_train shape: {X_train.shape}")

FileNotFoundError: ./data_train/X_B_83.txt not found.

In [26]:
def objective(trial: optuna.Trial) -> float:
    number_of_features = X_train.shape[1]
    output_active = np.arange(number_of_features, dtype=np.uint32)

    number_of_clauses = trial.suggest_int("number_of_clauses", 50, 2000)
    T = trial.suggest_int("T", 50, 50000)
    s = trial.suggest_int("s", 1, 100)
    max_included_literals = trial.suggest_int("max_included_literals", 1, 3 * number_of_features)
    accumulation = trial.suggest_int("accumulation", 1, 10)
    feature_negation = trial.suggest_categorical("feature_negation", [True, False])
    output_balancing = trial.suggest_float("output_balancing", 0, 10)

    tm = TMAutoEncoder(
        number_of_clauses=number_of_clauses,
        T=T,
        s=s,
        output_active=output_active,
        max_included_literals=max_included_literals,
        accumulation=accumulation,
        feature_negation=feature_negation,
        platform="CPU",  # TODO: Change to CUDA
        output_balancing=output_balancing,
    )

    print(f"Starting training for {args['epochs']} epochs")

    for e in range(args["epochs"]):
        tm.fit(X_train, number_of_examples=args["number_of_examples"])

        save_model(tm, f"latest_{e}.pkl")
        print(f"Epoch: {e + 1}")


In [27]:
number_of_features = X_train.shape[1]
output_active = np.arange(number_of_features, dtype=np.uint32)

number_of_clauses = 500

args: dict = {
    "clause_weight_threshold": 1,
    "number_of_examples": 40000,
    "output_active": output_active,
    "accumulation": 1,
    "num_clauses": number_of_clauses,
    "T": int(number_of_clauses * 0.75 * 100),
    "s": 25,
    "epochs": 25,
    "platform": "CPU",
    "output_balancing": 0,
    "max_included_literals": number_of_features,
    "feature_negation": True,
}

result = train(args)

Starting training for 25 epochs
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
