In [21]:
import torch
import pandas as pd
from sklearn.metrics import accuracy_score
import numpy as np
import ltn
from ltn_imp.automation.knowledge_base import KnowledgeBase
from ltn_imp.automation.data_loaders import LoaderWrapper

## Data Preparation

In [22]:
!poetry run poe download-datasets

[37mPoe =>[0m [94mmkdir -p examples/datasets[0m
[37mPoe =>[0m [94mcurl -L -o examples/datasets/iris_training.csv https://raw.githubusercontent.com/tommasocarraro/LTNtorch/main/examples/datasets/iris_training.csv[0m
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2218  100  2218    0     0  38201      0 --:--:-- --:--:-- --:--:-- 38241
[37mPoe =>[0m [94mcurl -L -o examples/datasets/iris_test.csv https://raw.githubusercontent.com/tommasocarraro/LTNtorch/main/examples/datasets/iris_test.csv[0m
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   598  100   598    0     0   2985      0 --:--:-- --:--:-- --:--:--  2990


In [23]:
train_data = pd.read_csv("datasets/iris_training.csv")
test_data = pd.read_csv("datasets/iris_test.csv")

train_labels = train_data.pop("species")
test_labels = test_data.pop("species")

train_data = torch.tensor(train_data.to_numpy()).float()
test_data = torch.tensor(test_data.to_numpy()).float()
train_labels = torch.tensor(train_labels.to_numpy()).long()
test_labels = torch.tensor(test_labels.to_numpy()).long()

In [24]:
# we define predicate P
class MLP(torch.nn.Module):
    """
    This model returns the logits for the classes given an input example. It does not compute the softmax, so the output
    are not normalized.
    This is done to separate the accuracy computation from the satisfaction level computation. Go through the example
    to understand it.
    """
    def __init__(self, layer_sizes=(4, 16, 16, 8, 3)):
        super(MLP, self).__init__()
        self.elu = torch.nn.ELU()
        self.dropout = torch.nn.Dropout(0.2)
        self.linear_layers = torch.nn.ModuleList([torch.nn.Linear(layer_sizes[i - 1], layer_sizes[i])
                                                  for i in range(1, len(layer_sizes))])

    def forward(self, x, training=False):
        """
        Method which defines the forward phase of the neural network for our multi class classification task.
        In particular, it returns the logits for the classes given an input example.

        :param x: the features of the example
        :param training: whether the network is in training mode (dropout applied) or validation mode (dropout not applied)
        :return: logits for example x
        """
        for layer in self.linear_layers[:-1]:
            x = self.elu(layer(x))
            if training:
                x = self.dropout(x)
        logits = self.linear_layers[-1](x)
        return logits


class LogitsToPredicate(torch.nn.Module):
    """
    This model has inside a logits model, that is a model which compute logits for the classes given an input example x.
    The idea of this model is to keep logits and probabilities separated. The logits model returns the logits for an example,
    while this model returns the probabilities given the logits model.

    In particular, it takes as input an example x and a class label l. It applies the logits model to x to get the logits.
    Then, it applies a softmax function to get the probabilities per classes. Finally, it returns only the probability related
    to the given class l.
    """
    def __init__(self, logits_model):
        super(LogitsToPredicate, self).__init__()
        self.logits_model = logits_model
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x, l, training=True):
        logits = self.logits_model(x, training=training)
        probs = self.softmax(logits)
        out = torch.sum(probs * l, dim=1)
        return out

In [25]:
class DataLoader(object):
    def __init__(self,
                 data,
                 labels,
                 batch_size=1,
                 shuffle=True):
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __len__(self):
        return int(np.ceil(self.data.shape[0] / self.batch_size))

    def __iter__(self):
        n = self.data.shape[0]
        idxlist = list(range(n))
        if self.shuffle:
            np.random.shuffle(idxlist)

        for _, start_idx in enumerate(range(0, n, self.batch_size)):
            end_idx = min(start_idx + self.batch_size, n)
            data = self.data[idxlist[start_idx:end_idx]]
            labels = self.labels[idxlist[start_idx:end_idx]]

            yield data, labels

# create train and test loader
train_loader = DataLoader(train_data, train_labels, 64, shuffle=True)
test_loader = DataLoader(test_data, test_labels, 64, shuffle=False)

In [26]:
def compute_accuracy(loader, model):
    mean_accuracy = 0.0
    for data, labels in loader:
        predictions = model(data).detach().numpy()
        predictions = np.argmax(predictions, axis=1)
        mean_accuracy += accuracy_score(labels, predictions)
    return mean_accuracy / len(loader)

## My Implementation

In [27]:
mlp = MLP()
model = LogitsToPredicate(mlp)
predicates = {"Classifier": model }
expression_1 = "all x. (Classifier(x,y))"
rules = [expression_1]

In [28]:
loader = LoaderWrapper(loader=train_loader, variables=["x"],target="y", num_classes=3)

In [29]:
rule_to_data_loader_mapping = {expression_1: [loader]}

In [30]:
kb = KnowledgeBase(rules=rules, 
                   predicates=predicates,
                   rule_to_data_loader_mapping=rule_to_data_loader_mapping,
                   quantifier_impls={"forall" : "pmean_error"})

In [31]:
compute_accuracy(train_loader, model = mlp)

0.34933035714285715

In [37]:
compute_accuracy(test_loader, model = mlp)

0.9666666666666667

In [33]:
kb.optimize(501, log_steps=100)

Epoch 1/501, Loss: 0.6688594818115234

Epoch 101/501, Loss: 0.3226736783981323

Epoch 201/501, Loss: 0.22726833820343018

Epoch 301/501, Loss: 0.23317193984985352

Epoch 401/501, Loss: 0.20174187421798706

Epoch 501/501, Loss: 0.16337835788726807



In [14]:
compute_accuracy(train_loader, model = mlp)

0.9832589285714286

In [15]:
compute_accuracy(test_loader,model = mlp)

0.9666666666666667

## LTN

In [16]:
l_A = ltn.Constant(torch.tensor([1, 0, 0]))
l_B = ltn.Constant(torch.tensor([0, 1, 0]))
l_C = ltn.Constant(torch.tensor([0, 0, 1]))

In [17]:
mlp = MLP()
P = ltn.Predicate(LogitsToPredicate(mlp))
Forall = ltn.Quantifier(ltn.fuzzy_ops.AggregPMeanError(p=2), quantifier="f")
SatAgg = ltn.fuzzy_ops.SatAgg()

In [18]:
# it computes the overall satisfaction level on the knowledge base using the given data loader (train or test)
def compute_sat_level(loader):
    mean_sat = 0
    for data, labels in loader:
        x_A = ltn.Variable("x_A", data[labels == 0])
        x_B = ltn.Variable("x_B", data[labels == 1])
        x_C = ltn.Variable("x_C", data[labels == 2])
        mean_sat += SatAgg(
            Forall(x_A, P(x_A, l_A)),
            Forall(x_B, P(x_B, l_B)),
            Forall(x_C, P(x_C, l_C))
        )
    mean_sat /= len(loader)
    return mean_sat

# it computes the overall accuracy of the predictions of the trained model using the given data loader
# (train or test)
def compute_accuracy(loader):
    mean_accuracy = 0.0
    for data, labels in loader:
        predictions = mlp(data).detach().numpy()
        predictions = np.argmax(predictions, axis=1)
        mean_accuracy += accuracy_score(labels, predictions)

    return mean_accuracy / len(loader)

In [19]:
optimizer = torch.optim.Adam(P.parameters(), lr=0.001)

for epoch in range(501):
    train_loss = 0.0
    for batch_idx, (data, labels) in enumerate(train_loader):
        optimizer.zero_grad()

        x_A = ltn.Variable("x_A", data[labels == 0]) # class A examples
        x_B = ltn.Variable("x_B", data[labels == 1]) # class B examples
        x_C = ltn.Variable("x_C", data[labels == 2]) # class C examples

        sat_agg = SatAgg(
            Forall(x_A, P(x_A, l_A, training=True)),
            Forall(x_B, P(x_B, l_B, training=True)),
            Forall(x_C, P(x_C, l_C, training=True))
        )

        loss = 1. - sat_agg
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss = train_loss / len(train_loader)

    if epoch % 100 == 0:
        print(" epoch %d | loss %.4f | Train Sat %.3f | Test Sat %.3f | Train Acc %.3f | Test Acc %.3f"
              %(epoch, train_loss, compute_sat_level(train_loader), compute_sat_level(test_loader),
                    compute_accuracy(train_loader), compute_accuracy(test_loader)))

 epoch 0 | loss 0.6731 | Train Sat 0.332 | Test Sat 0.337 | Train Acc 0.305 | Test Acc 0.467
 epoch 100 | loss 0.2814 | Train Sat 0.689 | Test Sat 0.716 | Train Acc 0.975 | Test Acc 0.967
 epoch 200 | loss 0.1787 | Train Sat 0.814 | Test Sat 0.816 | Train Acc 0.983 | Test Acc 0.967
 epoch 300 | loss 0.1548 | Train Sat 0.850 | Test Sat 0.766 | Train Acc 0.992 | Test Acc 0.933
 epoch 400 | loss 0.0972 | Train Sat 0.851 | Test Sat 0.890 | Train Acc 0.992 | Test Acc 0.967
 epoch 500 | loss 0.1367 | Train Sat 0.861 | Test Sat 0.876 | Train Acc 0.991 | Test Acc 0.967
