In [1]:
#%pip install git+https://github.com/cair/tmu.git
#%pip install numpy==1.26.4

In [2]:
import numpy as np
import pickle
import os

from tmu.models.autoencoder.autoencoder import TMAutoEncoder

2025-03-25 20:34:42,953 - tmu.clause_bank.clause_bank_cuda - ERROR - No module named 'pycuda'
Traceback (most recent call last):
  File "/Users/kjellhaaland/Documents/GitHub/uia-master-thesis/.venv/lib/python3.12/site-packages/tmu/clause_bank/clause_bank_cuda.py", line 41, in <module>
    from pycuda._driver import Device, Context
ModuleNotFoundError: No module named 'pycuda'


In [3]:
# Create folder models if it does not exist
os.makedirs("models", exist_ok=True)

In [4]:
def load_train_dataset(farm, event_id):
    X = np.loadtxt(f"./data_train/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


In [5]:
def save_model(tm: TMAutoEncoder, filename: str):
    a, d = tm.X_train, tm.encoded_X_train

    tm.X_train = None
    tm.encoded_X_train = None

    with open(f"./models/{filename}", "wb") as f:
        pickle.dump(tm, f)

    tm.X_train = a
    tm.encoded_X_train = d

In [6]:
train_datasets = [34, 7]  #, 53, 27, 19, ]  # 77, 83, 52, 21, 2, 23, 87, 74, 86, 82]
test_dataset = [52]

X_train = np.concatenate([load_train_dataset("B", i) for i in train_datasets])

X_test = np.concatenate([load_train_dataset("B", i) for i in test_dataset])

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (93206, 315)
X_test shape: (43994, 315)


In [7]:
def hamming_loss(pred, X_test):
    """
    Computes the Hamming loss between predicted and ground truth binary arrays.

    Parameters:
    - pred (numpy array): Binary predictions of shape (n_samples, n_bits).
    - X_test (numpy array): Ground truth binary values of shape (n_samples, n_bits).

    Returns:
    - float: Hamming loss (fraction of incorrect bits).
    """
    assert pred.shape == X_test.shape, "Shapes of pred and X_test must match"

    # Compute the number of differing bits
    incorrect_bits = np.sum(pred != X_test)

    # Total number of bits
    total_bits = np.prod(X_test.shape)

    # Hamming loss is the fraction of incorrect bits
    return incorrect_bits / total_bits

In [8]:
def test(tm, X):
    pred = tm.predict(X)

    loss = [hamming_loss(X[i], pred[i]) for i in range(len(X))]

    return np.mean(loss), np.median(loss), np.max(loss), np.min(loss)

In [9]:
def train(args):
    tm = TMAutoEncoder(
        number_of_clauses=args["num_clauses"],
        T=args["T"],
        s=args["s"],
        output_active=args["output_active"],
        max_included_literals=args["max_included_literals"],
        accumulation=args["accumulation"],
        feature_negation=args["feature_negation"],
        platform=args["platform"],
        output_balancing=args["output_balancing"],
    )

    print(f"Starting training for {args['epochs']} epochs")

    for e in range(args["epochs"]):
        tm.fit(X_train, number_of_examples=args["number_of_examples"])

        lmean, lmed, lmax, lmin = test(tm, X_test)
        print(f"Epoch: {e + 1} Mean loss: {lmean:4f}, Median loss: {lmed:4f}, Max loss: {lmax:4f}, Min loss: {lmin:4f}")

        save_model(tm, f"latest_{e}.pkl")


In [10]:
number_of_features = X_train.shape[1]
output_active = np.arange(number_of_features, dtype=np.uint32)

print(f"Number of features {number_of_features}")

Number of features 315


In [11]:
number_of_clauses = 1024

args: dict = {
    "number_of_examples": 100,
    "output_active": output_active,
    "accumulation": 1,
    "num_clauses": number_of_clauses,
    "T": int(number_of_clauses * 0.75),
    "s": 25.0,
    "epochs": 15,
    "platform": "CPU",
    "output_balancing": 0,
    "max_included_literals": 2 * number_of_features,
    "feature_negation": True,
}

result = train(args)

Starting training for 25 epochs
Epoch: 1 Mean loss: 0.195910, Median loss: 0.190476, Max loss: 0.533333, Min loss: 0.073016
Epoch: 2 Mean loss: 0.160870, Median loss: 0.155556, Max loss: 0.663492, Min loss: 0.050794
Epoch: 3 Mean loss: 0.150542, Median loss: 0.142857, Max loss: 0.682540, Min loss: 0.041270
Epoch: 4 Mean loss: 0.146906, Median loss: 0.136508, Max loss: 0.638095, Min loss: 0.044444
Epoch: 5 Mean loss: 0.143412, Median loss: 0.133333, Max loss: 0.638095, Min loss: 0.025397
Epoch: 6 Mean loss: 0.140243, Median loss: 0.126984, Max loss: 0.638095, Min loss: 0.034921
Epoch: 7 Mean loss: 0.137050, Median loss: 0.120635, Max loss: 0.638095, Min loss: 0.038095
Epoch: 8 Mean loss: 0.133851, Median loss: 0.117460, Max loss: 0.638095, Min loss: 0.041270
Epoch: 9 Mean loss: 0.131331, Median loss: 0.114286, Max loss: 0.638095, Min loss: 0.038095
Epoch: 10 Mean loss: 0.128827, Median loss: 0.114286, Max loss: 0.638095, Min loss: 0.038095
Epoch: 11 Mean loss: 0.126634, Median loss: 0.1