In [1]:
from ltn_imp.automation.knowledge_base import KnowledgeBase
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from ltn_imp.parsing.parser import convert_to_ltn
from ltn_imp.parsing.expression_transformations import transform_expression
from nltk.sem.logic import Expression

## Data Prep

In [2]:
train_data = pd.read_csv("datasets/iris_training.csv")
test_data = pd.read_csv("datasets/iris_test.csv")

train_labels = train_data.pop("species")
test_labels = test_data.pop("species")

train_data = torch.tensor(train_data.to_numpy()).float()
test_data = torch.tensor(test_data.to_numpy()).float()
train_labels = torch.tensor(train_labels.to_numpy()).long()
test_labels = torch.tensor(test_labels.to_numpy()).long()

In [3]:
class DataLoader(object):
    def __init__(self,
                 data,
                 labels,
                 batch_size=1,
                 shuffle=True):
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __len__(self):
        return int(np.ceil(self.data.shape[0] / self.batch_size))

    def __iter__(self):
        n = self.data.shape[0]
        idxlist = list(range(n))
        if self.shuffle:
            np.random.shuffle(idxlist)

        for _, start_idx in enumerate(range(0, n, self.batch_size)):
            end_idx = min(start_idx + self.batch_size, n)
            data = self.data[idxlist[start_idx:end_idx]]
            labels = self.labels[idxlist[start_idx:end_idx]]

            yield data, labels

# create train and test loader
train_loader = DataLoader(train_data, train_labels, 64, shuffle=True)
test_loader = DataLoader(test_data, test_labels, 64, shuffle=False)

## Model and Evaluation Methods

In [4]:
class PetalLength():
    def __init__(self):
        self.name = "PetalLength"
        self.value = None

    def forward(self, x):
        return x[1]

    def __str__(self):
        return self.name
    
    def __call__(self, x):
        return self.forward(x)

In [5]:
class Classifier():
    def __init__(self):
        self.name = "Classifier"
        self.value = None
    
    def forward(self, x, y):
        return torch.tensor([1])
    
    def __call__(self, x, y):
        return self.forward(x, y)

In [6]:
def compute_accuracy(loader, model):
    mean_accuracy = 0.0
    for data, labels in loader:
        predictions = model(data).detach().numpy()
        predictions = np.argmax(predictions, axis=1)
        mean_accuracy += accuracy_score(labels, predictions)
    return mean_accuracy / len(loader)

## Rules and Optimization

In [7]:
expression_1 = "PetalLength(x,z) and (z < 3) <-> Classifier(z,0)"
expression_2 = "(z >= 3)"
expressions = [expression_1, expression_2]

In [8]:
predicates = {"PetalLength": PetalLength(), "Classifier": Classifier()}

In [9]:
x = train_data[0]

In [10]:
knowledge_base = KnowledgeBase(expressions = expressions, rule_to_data_loader_mapping=[], predicates= predicates)

No parameters to optimize


In [13]:
outputs = [rule( {"x" : x}) for rule in knowledge_base.rules] 
outputs

PetalLength(x,z)
Classifier(z,0)


[tensor([1.]), tensor([0.])]

In [12]:
loss = knowledge_base.loss( [rule( {"x" : x}) for rule in knowledge_base.rules] ) 

PetalLength(x,z)
Classifier(z,0)
