In [1]:
from ltn_imp.automation.knowledge_base import KnowledgeBase
import torch
import pandas as pd

In [2]:
!poetry run poe download-medical-datasets

[37mPoe =>[0m [94mmkdir -p examples/medical/datasets[0m
[37mPoe =>[0m [94mcurl -L -o examples/medical/datasets/pima_indians_imputed.csv https://raw.githubusercontent.com/ChristelSirocchi/hybrid-ML/main/pima_indians_imputed.csv[0m
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 33428  100 33428    0     0  99103      0 --:--:-- --:--:-- --:--:--     0- --:--:-- --:--:-- 99192


In [3]:
pd.read_csv("datasets/pima_indians_imputed.csv", index_col=0).astype(float).to_csv("datasets/pima_indians_imputed.csv")

In [4]:
def predict(model, x):
    model.eval()  # Ensure the model is in evaluation mode
    with torch.no_grad():  # No need to track gradients
        # Ensure x is a tensor and has the right dtype
        if not isinstance(x, torch.Tensor):
            x = torch.tensor(x, dtype=torch.float32)
        elif x.dtype != torch.float32:
            x = x.float()
        
        # Forward pass through the model
        probs = model(x)
        
        # Apply binary classification threshold at 0.5
        preds = (probs > 0.5).float()
    return preds

def compute_accuracy(model, data_loader):
    correct = 0
    total = 0
    
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient computation
        for data, labels in data_loader:
            # Ensure data and labels are the correct dtype
            if not isinstance(data, torch.Tensor):
                data = torch.tensor(data, dtype=torch.float32)
            elif data.dtype != torch.float32:
                data = data.float()
            
            if not isinstance(labels, torch.Tensor):
                labels = torch.tensor(labels, dtype=torch.float32)
            elif labels.dtype != torch.float32:
                labels = labels.float()
            
            # Get predictions
            preds = predict(model, data)
            
            # Squeeze predictions and labels to remove dimensions of size 1
            predicted_labels = preds.squeeze()
            true_labels = labels.squeeze()

            # Ensure the shapes match before comparison
            if predicted_labels.shape != true_labels.shape:
                true_labels = true_labels.view_as(predicted_labels)
            
            # Count correct predictions
            correct += (predicted_labels == true_labels).sum().item()
            total += true_labels.size(0)

    accuracy = correct / total if total > 0 else 0
    print(f"Correct: {correct}, Total: {total}, Accuracy: {accuracy:.4f}")
    return accuracy


In [5]:
kb = KnowledgeBase("medical_config.yaml")

In [6]:
x, y = next(iter(kb.loaders[0]))
test_data = pd.DataFrame(x)
high_BMI = test_data[test_data[5] > 35]
low_BMI = test_data[test_data[5] <= 25]
high_Glucose = test_data[test_data[1] > 127]
low_Glucose = test_data[test_data[1] <= 100]

In [7]:
kb.predicates["Classifier"](torch.tensor(high_BMI.values, dtype=torch.float32)).mean()

tensor(0.3475, grad_fn=<MeanBackward0>)

In [8]:
kb.predicates["Classifier"](torch.tensor(low_BMI.values, dtype=torch.float32)).mean()

tensor(0.3185, grad_fn=<MeanBackward0>)

In [9]:
kb.predicates["Classifier"](torch.tensor(high_Glucose.values, dtype=torch.float32)).mean()

tensor(0.3785, grad_fn=<MeanBackward0>)

In [10]:
kb.predicates["Classifier"](torch.tensor(low_Glucose.values, dtype=torch.float32)).mean()

tensor(0.2877, grad_fn=<MeanBackward0>)

In [11]:
compute_accuracy(kb.predicates["Classifier"], kb.loaders[0])

Correct: 491, Total: 768, Accuracy: 0.6393


0.6393229166666666

In [12]:
kb.optimize(num_epochs=3001, log_steps=500, lr=0.0001)

['∀ person.(((y == diabetes) -> Classifier(person)))', '∀ person.(((y == healthy) -> ~(Classifier(person))))', '∀ person.(((person[BMI] < 25) -> ~(Classifier(person))))', '∀ person.(((person[BMI] > 35) -> Classifier(person)))', '∀ person.(((person[Glucose] < 100) -> ~(Classifier(person))))', '∀ person.(((person[Glucose] > 127) -> Classifier(person)))']
Rule Outputs:  [tensor(0.5465, grad_fn=<RsubBackward1>), tensor(0.5343, grad_fn=<RsubBackward1>), tensor(0.8165, grad_fn=<RsubBackward1>), tensor(0.6790, grad_fn=<RsubBackward1>), tensor(0.7955, grad_fn=<RsubBackward1>), tensor(0.6960, grad_fn=<RsubBackward1>)]
Epoch 1/3001, Loss: 0.33997392654418945

['∀ person.(((y == diabetes) -> Classifier(person)))', '∀ person.(((y == healthy) -> ~(Classifier(person))))', '∀ person.(((person[BMI] < 25) -> ~(Classifier(person))))', '∀ person.(((person[BMI] > 35) -> Classifier(person)))', '∀ person.(((person[Glucose] < 100) -> ~(Classifier(person))))', '∀ person.(((person[Glucose] > 127) -> Classifier

In [13]:
compute_accuracy(kb.predicates["Classifier"], kb.loaders[0])

Correct: 552, Total: 768, Accuracy: 0.7188


0.71875

In [14]:
kb.predicates["Classifier"](torch.tensor(high_BMI.values, dtype=torch.float32)).mean()

tensor(0.4901, grad_fn=<MeanBackward0>)

In [15]:
kb.predicates["Classifier"](torch.tensor(low_BMI.values, dtype=torch.float32)).mean()

tensor(0.0109, grad_fn=<MeanBackward0>)

In [16]:
kb.predicates["Classifier"](torch.tensor(high_Glucose.values, dtype=torch.float32)).mean()

tensor(0.5827, grad_fn=<MeanBackward0>)

In [17]:
kb.predicates["Classifier"](torch.tensor(low_Glucose.values, dtype=torch.float32)).mean()

tensor(0.1174, grad_fn=<MeanBackward0>)