In [47]:
#%pip install git+https://github.com/cair/tmu.git
#%pip install numpy==1.26.4

In [48]:
import numpy as np
import pickle
import os

from tmu.models.autoencoder.autoencoder import TMAutoEncoder

In [49]:
# Create folder models if it does not exist
os.makedirs("models", exist_ok=True)

In [50]:
def load_train_dataset(farm, event_id):
    X = np.loadtxt(f"./data_train/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X

In [51]:
def save_model(tm: TMAutoEncoder, filename: str):
    a, d = tm.X_train, tm.encoded_X_train

    tm.X_train = None
    tm.encoded_X_train = None

    with open(f"./models/{filename}", "wb") as f:
        pickle.dump(tm, f)

    tm.X_train = a
    tm.encoded_X_train = d

In [52]:
train_datasets = [68, 22]  #, 72, 73, 0, 26, 40, 42, 10, 45, 84, 25, 69, 13, 24, 3, 17, 38, 71, 14, 92, 51]
test_dataset = [25]

# Load all datasets into one array
X_train = np.concatenate([load_train_dataset("A", i) for i in train_datasets])

X_test = np.concatenate([load_train_dataset("A", i) for i in test_dataset])

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (81241, 405)
X_test shape: (40838, 405)


In [53]:
def hamming_loss(pred, X_test):
    """
    Computes the Hamming loss between predicted and ground truth binary arrays.

    Parameters:
    - pred (numpy array): Binary predictions of shape (n_samples, n_bits).
    - X_test (numpy array): Ground truth binary values of shape (n_samples, n_bits).

    Returns:
    - float: Hamming loss (fraction of incorrect bits).
    """
    assert pred.shape == X_test.shape, "Shapes of pred and X_test must match"

    # Compute the number of differing bits
    incorrect_bits = np.sum(pred != X_test)

    # Total number of bits
    total_bits = np.prod(X_test.shape)

    # Hamming loss is the fraction of incorrect bits
    return incorrect_bits / total_bits

In [54]:
def test(tm, X):
    pred = tm.predict(X)

    loss = [hamming_loss(X[i], pred[i]) for i in range(len(X))]

    return np.mean(loss), np.median(loss), np.max(loss), np.min(loss)

In [55]:
def train(args):
    tm = TMAutoEncoder(
        number_of_clauses=args["num_clauses"],
        T=args["T"],
        s=args["s"],
        output_active=args["output_active"],
        max_included_literals=args["max_included_literals"],
        accumulation=args["accumulation"],
        feature_negation=args["feature_negation"],
        platform=args["platform"],
        output_balancing=args["output_balancing"],
    )

    print(f"Starting training for {args['epochs']} epochs")

    for e in range(args["epochs"]):
        tm.fit(X_train, number_of_examples=args["number_of_examples"])

        lmean, lmed, lmax, lmin = test(tm, X_test)
        print(f"Epoch: {e + 1} Mean loss: {lmean:4f}, Median loss: {lmed:4f}, Max loss: {lmax:4f}, Min loss: {lmin:4f}")

        save_model(tm, f"latest_{e}.pkl")

In [56]:
number_of_features = X_train.shape[1]
output_active = np.arange(number_of_features, dtype=np.uint32)

number_of_clauses = int(1024 * 0.25)

print(f"Number of features {number_of_features}")
print(f"Number of clauses {number_of_clauses}")

Number of features 405
Number of clauses 225


In [57]:
args: dict = {
    "number_of_examples": 1000,
    "output_active": output_active,
    "accumulation": 1,
    "num_clauses": number_of_clauses,
    "T": int(number_of_clauses * 0.75),
    "s": 25.0,
    "epochs": 25,
    "platform": "CPU",
    "output_balancing": 0,
    "max_included_literals": 2 * number_of_features,
    "feature_negation": True,
}

result = train(args)

Starting training for 25 epochs
Epoch: 1 Mean loss: 0.194275, Median loss: 0.170370, Max loss: 0.681481, Min loss: 0.069136
Epoch: 2 Mean loss: 0.182428, Median loss: 0.160494, Max loss: 0.676543, Min loss: 0.059259
Epoch: 3 Mean loss: 0.174614, Median loss: 0.158025, Max loss: 0.681481, Min loss: 0.061728
Epoch: 4 Mean loss: 0.165739, Median loss: 0.153086, Max loss: 0.669136, Min loss: 0.039506
Epoch: 5 Mean loss: 0.159833, Median loss: 0.150617, Max loss: 0.656790, Min loss: 0.046914
Epoch: 6 Mean loss: 0.157057, Median loss: 0.150617, Max loss: 0.629630, Min loss: 0.051852
Epoch: 7 Mean loss: 0.155513, Median loss: 0.150617, Max loss: 0.644444, Min loss: 0.046914
Epoch: 8 Mean loss: 0.155658, Median loss: 0.150617, Max loss: 0.444444, Min loss: 0.046914
Epoch: 9 Mean loss: 0.154360, Median loss: 0.150617, Max loss: 0.577778, Min loss: 0.044444
Epoch: 10 Mean loss: 0.154989, Median loss: 0.150617, Max loss: 0.644444, Min loss: 0.039506
Epoch: 11 Mean loss: 0.154430, Median loss: 0.1