In [1]:
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from ltn_imp.automation.knowledge_base import KnowledgeBase
from ltn_imp.automation.data_loaders import LoaderWrapper

## Data Prep

In [2]:
!poetry run poe download-datasets

[37mPoe =>[0m [94mmkdir -p examples/datasets[0m
[37mPoe =>[0m [94mcurl -L -o examples/datasets/iris_training.csv https://raw.githubusercontent.com/tommasocarraro/LTNtorch/main/examples/datasets/iris_training.csv[0m
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2218  100  2218    0     0  10952      0 --:--:-- --:--:-- --:--:-- 10980
[37mPoe =>[0m [94mcurl -L -o examples/datasets/iris_test.csv https://raw.githubusercontent.com/tommasocarraro/LTNtorch/main/examples/datasets/iris_test.csv[0m
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   598  100   598    0     0  12274      0 --:--:-- --:--:-- --:--:-- 12458


In [3]:
nr_samples = 100
dataset = torch.rand((nr_samples, 2))
labels_dataset = torch.sum(torch.square(dataset - torch.tensor([.5, .5])), dim=1) < .09

In [4]:
train_data = pd.read_csv("datasets/iris_training.csv")
test_data = pd.read_csv("datasets/iris_test.csv")

train_labels = train_data.pop("species")
test_labels = test_data.pop("species")

train_data = torch.tensor(train_data.to_numpy()).float()
test_data = torch.tensor(test_data.to_numpy()).float()
train_labels = torch.tensor(train_labels.to_numpy()).long()
test_labels = torch.tensor(test_labels.to_numpy()).long()

In [5]:
class BinaryDataLoader(object):
    def __init__(self,
                 data,
                 labels,
                 batch_size=1,
                 shuffle=True):
        
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __len__(self):
        return int(np.ceil(self.data.shape[0] / self.batch_size))

    def __iter__(self):
        n = self.data.shape[0]
        idxlist = list(range(n))
        if self.shuffle:
            np.random.shuffle(idxlist)

        for _, start_idx in enumerate(range(0, n, self.batch_size)):
            end_idx = min(start_idx + self.batch_size, n)
            data = self.data[idxlist[start_idx:end_idx]]
            labels = self.labels[idxlist[start_idx:end_idx]]
            yield data, labels
            
binary_train_loader = BinaryDataLoader(dataset[:50], labels_dataset[:50], batch_size=64, shuffle=True)
binary_test_loader = BinaryDataLoader(dataset[50:], labels_dataset[50:], batch_size=64, shuffle=False)

In [6]:
class MultiLabelDataLoader(object):
    def __init__(self,
                 data,
                 labels,
                 batch_size=1,
                 shuffle=True):
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __len__(self):
        return int(np.ceil(self.data.shape[0] / self.batch_size))

    def __iter__(self):
        n = self.data.shape[0]
        idxlist = list(range(n))
        if self.shuffle:
            np.random.shuffle(idxlist)

        for _, start_idx in enumerate(range(0, n, self.batch_size)):
            end_idx = min(start_idx + self.batch_size, n)
            data = self.data[idxlist[start_idx:end_idx]]
            labels = self.labels[idxlist[start_idx:end_idx]]

            yield data, labels

multi_train_loader = MultiLabelDataLoader(train_data, train_labels, 64, shuffle=True)
multi_test_loader = MultiLabelDataLoader(test_data, test_labels, 64, shuffle=False)

## Classes and Methods

In [7]:
import torch

class BinaryClassifier(torch.nn.Module):
    def __init__(self):
        super(BinaryClassifier, self).__init__()
        self.elu = torch.nn.ELU()
        self.layer1 = torch.nn.Linear(2, 16)
        self.layer2 = torch.nn.Linear(16, 16)
        self.layer3 = torch.nn.Linear(16, 2) 

    def forward(self, x):
        x = self.elu(self.layer1(x))
        x = self.elu(self.layer2(x))
        logits = self.layer3(x)
        return logits

# we define predicate P
class MultiLabelClassifier(torch.nn.Module):
    def __init__(self, layer_sizes=(4, 16, 16, 8, 3)):
        super(MultiLabelClassifier, self).__init__()
        self.elu = torch.nn.ELU()
        self.dropout = torch.nn.Dropout(0.2)
        self.linear_layers = torch.nn.ModuleList([torch.nn.Linear(layer_sizes[i - 1], layer_sizes[i])
                                                  for i in range(1, len(layer_sizes))])

    def forward(self, x, training=False):
        for layer in self.linear_layers[:-1]:
            x = self.elu(layer(x))
            if training:
                x = self.dropout(x)
        logits = self.linear_layers[-1](x)
        return logits

class LogitsToPredicate(torch.nn.Module):
    def __init__(self, logits_model):
        super(LogitsToPredicate, self).__init__()
        self.logits_model = logits_model
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x, y, training=True):
        logits = self.logits_model(x)
        probs = self.softmax(logits)
        # y is expected to be a one-hot encoded vector
        out = torch.sum(probs * y, dim=1, keepdim=True)
        return out

In [8]:
def compute_accuracy(loader, model):
    mean_accuracy = 0.0
    for data, labels in loader:
        predictions = model.logits_model(data).detach().numpy()
        predictions = np.argmax(predictions, axis=1)
        mean_accuracy += accuracy_score(labels, predictions)
    return mean_accuracy / len(loader)

## Optimization and Evaluation

In [9]:
binary = LogitsToPredicate( BinaryClassifier() )  # type: ignore
multi = LogitsToPredicate( MultiLabelClassifier() ) # type: ignore

predicates = {"Binary": binary, "Multi": multi}
expression_1 = "( all x. (Binary(x,y)) and (all a. ( Multi(a, b))) )"

rules = [expression_1]

In [10]:
binary_loader = LoaderWrapper(variables=["x"], num_classes=2, target="y", loader=binary_train_loader)
multi_loader = LoaderWrapper(variables=["a"], num_classes=3, target="b", loader=multi_train_loader)

In [11]:
rule_to_data_loader_mapping = {expression_1: [ binary_loader, multi_loader ]} # type: ignore

In [12]:
kb = KnowledgeBase(expressions=rules, 
                   predicates=predicates,
                   rule_to_data_loader_mapping=rule_to_data_loader_mapping,
                   quantifier_impls={"forall" : "pmean_error"})

In [13]:
compute_accuracy(binary_train_loader, binary)

0.46

In [14]:
compute_accuracy(binary_test_loader, binary)

0.58

In [15]:
compute_accuracy(multi_train_loader, multi)

0.3515625

In [16]:
compute_accuracy(multi_test_loader, multi)

0.26666666666666666

In [17]:
kb.optimize(num_epochs=1000, log_steps=100)

Epoch 1/1000, Loss: 0.681427001953125

Epoch 101/1000, Loss: 0.485770583152771

Epoch 201/1000, Loss: 0.4660830497741699

Epoch 301/1000, Loss: 0.44517719745635986

Epoch 401/1000, Loss: 0.3358440399169922

Epoch 501/1000, Loss: 0.3024158477783203

Epoch 601/1000, Loss: 0.28943735361099243

Epoch 701/1000, Loss: 0.2582472562789917

Epoch 801/1000, Loss: 0.211930513381958

Epoch 901/1000, Loss: 0.1780761480331421



In [18]:
compute_accuracy(binary_train_loader, binary)

0.98

In [19]:
compute_accuracy(binary_test_loader, binary) # type: ignore

0.94

In [20]:
compute_accuracy(multi_train_loader, multi)

0.9821428571428572

In [21]:
compute_accuracy(multi_test_loader, multi)

0.9666666666666667