In [5]:
# %%
import sys

import sklearn
from sklearn.datasets import load_digits
from torch.utils.data import DataLoader

sys.path.append('..')
import torch
import numpy as np
import matplotlib.pyplot as plt

from deep_logic.utils.relunn import get_reduced_model, prune_features
from deep_logic import fol
import deep_logic as dl
from data import ConceptToTaskDataset
torch.manual_seed(0)
np.random.seed(0)



In [None]:
dataset = ConceptToTaskDataset("../data/CUB_200_2011")

X = dataset.attributes
y = np.asarray(dataset.targets)

concept_names = dataset.attribute_names.tolist()
concept_names

In [4]:
y = sklearn.preprocessing.OneHotEncoder(sparse=False).fit_transform(y.reshape(-1, 1))
X = sklearn.preprocessing.MinMaxScaler((0, 1)).fit_transform(X)
print(f'X shape: {X.shape}\nClasses: {np.unique(y)}')
print(f'X max: {X.max()} X min {X.min()}')

X shape: (11788, 312)
Classes: [0. 1.]
X max: 1.0 X min 0.0


In [5]:
from sklearn.model_selection import train_test_split
X, X_test, y, y_test = train_test_split(X, y, test_size=0.1)
print(f'X shape: {X.shape}\nY shape: {y.shape}')
print(f'X_test shape: {X_test.shape}\nY_test shape: {y_test.shape}')

X shape: (10609, 312)
Y shape: (10609, 200)
X_test shape: (1179, 312)
Y_test shape: (1179, 200)


In [6]:
x_train = torch.tensor(X, dtype=torch.float)
print(x_train.shape)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
torch.Size([10609, 312])


In [None]:
y_train = torch.zeros((y.shape[0], y.shape[1]), dtype=torch.float)
y_train = torch.tensor(y, dtype=torch.float)
x_test = x_train
n_classes = y_train.size(1)
print(n_classes)
print(y_train)
y_train.sum(dim=0)

In [47]:
torch.cuda.set_device(0)
device = torch.device("cuda")
x_train = x_train.to(device)
y_train = y_train.argmax(dim=1).to(torch.long).to(device)[:, 0]
n_classes = 1
loss_form = torch.nn.BCELoss() # CrossEntropyLoss()
y_train

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [46]:
torch.manual_seed(0)
np.random.seed(0)
dimensions = [10, 50, 20]
layers = [
    torch.nn.Linear(x_train.size(1), dimensions[0] * n_classes),
    torch.nn.LeakyReLU(),
    dl.nn.XLinear(dimensions[0], dimensions[1], n_classes),
    torch.nn.LeakyReLU(),
    dl.nn.XLinear(dimensions[1], dimensions[2], n_classes),
    torch.nn.LeakyReLU(),
    dl.nn.XLinear(dimensions[2], 1, n_classes),
    torch.nn.Softmax(),
]
model = torch.nn.Sequential(*layers)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
model.train()
need_pruning = True
for epoch in range(3000):
    # forward pass
    optimizer.zero_grad()
    y_pred = model(x_train)
    # Compute Loss
    loss = loss_form(y_pred, y_train)

    for module in model.children():
        if isinstance(module, torch.nn.Linear):
            loss += 1e-8 * torch.norm(module.weight, 1)
            break

    # backward pass
    loss.backward()
    optimizer.step()

    if epoch > 1500 and need_pruning:
        prune_features(model, n_classes, device=device)
        need_pruning = False

    # compute accuracy
    if (epoch + 1) % 10 == 0:
        y_pred_d = torch.argmax(y_pred, dim=1)
        y_train_d = y_train # torch.argmax(y_train, dim=1)
        accuracy = y_pred_d.eq(y_train_d).sum().item() / y_train.size(0) * 100.
        print(f'Epoch: {epoch + 1} train accuracy: {accuracy:.2f} loss: {loss:.4f}')

Epoch: 10 train accuracy: 4.00 loss: 5.2969
Epoch: 20 train accuracy: 15.80 loss: 5.1510
Epoch: 30 train accuracy: 23.56 loss: 5.0729
Epoch: 40 train accuracy: 34.47 loss: 4.9709
Epoch: 50 train accuracy: 39.97 loss: 4.9101
Epoch: 60 train accuracy: 45.51 loss: 4.8551
Epoch: 70 train accuracy: 48.36 loss: 4.8271
Epoch: 80 train accuracy: 49.82 loss: 4.8123
Epoch: 90 train accuracy: 51.13 loss: 4.7995
Epoch: 100 train accuracy: 51.30 loss: 4.7978
Epoch: 110 train accuracy: 52.18 loss: 4.7891
Epoch: 120 train accuracy: 52.85 loss: 4.7824
Epoch: 130 train accuracy: 53.24 loss: 4.7782
Epoch: 140 train accuracy: 53.74 loss: 4.7732
Epoch: 150 train accuracy: 54.02 loss: 4.7705
Epoch: 160 train accuracy: 54.14 loss: 4.7695
Epoch: 170 train accuracy: 54.43 loss: 4.7667
Epoch: 180 train accuracy: 54.99 loss: 4.7610
Epoch: 190 train accuracy: 55.42 loss: 4.7566
Epoch: 200 train accuracy: 55.52 loss: 4.7554
Epoch: 210 train accuracy: 55.75 loss: 4.7534
Epoch: 220 train accuracy: 55.90 loss: 4.751

KeyboardInterrupt: 

# Local explanations

In [None]:

np.set_printoptions(precision=2, suppress=True)
outputs = []
for i, (xin, yin) in enumerate(zip(x_train, y_train)):
    model_reduced = get_reduced_model(model, xin)
    for module in model_reduced.children():
        if isinstance(module, torch.nn.Linear):
            wa = module.weight.detach().numpy()
            ba = module.bias.detach().numpy()
            break
    output = model_reduced(xin)

    pred_class = torch.argmax(output)
    true_class = torch.argmax(y_train[i])

    # generate local explanation only if the prediction is correct
    if pred_class.eq(true_class):
        local_explanation = fol.relunn.explain_local(model, x_train, y_train, xin)
        print(f'Input {(i + 1)}')
        print(f'\tx={xin.detach().numpy()}')
        print(f'\ty={output.detach().numpy()}, y_label={yin}')
        print(f'\tw={wa}')
        print(f'\tb={ba}')
        print(f'\tExplanation: {local_explanation}')
        print()
        xin = xin.reshape(8, 8)
        plt.figure(1, figsize=(3, 3))
        plt.imshow(xin, cmap=plt.cm.gray_r, interpolation='nearest')
        plt.show()
#         wa = wa.reshape(8, 8)
#         plt.figure(1, figsize=(3, 3))
#         plt.imshow(wa * xin.numpy(), cmap=plt.cm.gray_r, interpolation='nearest')
#         plt.show()

    outputs.append(output)
    if i > 10:
        break

In [None]:
# %% md

# Combine local explanations

# %%
counters = []
from sklearn.metrics import f1_score
y_train_d = torch.argmax(y_train, dim=1)
for target_class in range(n_classes):
    global_explanation, predictions, counter = fol.combine_local_explanations(model, x_train, y_train,
                                                                              topk_explanations=10,
                                                                              target_class=target_class,
                                                                              concept_names=concept_names)

    y2 = torch.argmax(y_train, dim=1) == target_class
    accuracy = sum(predictions == y2.detach().numpy().squeeze()) / len(predictions)
    f1 = f1_score(y_train[:, target_class], predictions)
    print(f'Class {target_class} - Global explanation: "{global_explanation}" - Accuracy: {accuracy:.4f} - F1: {f1:.4f}')
    counters.append(counter)


In [None]:
for i, counter in enumerate(counters):
    for j, values in enumerate(counter.items()):
        print(i, j, values)

In [None]:
from sklearn.metrics import f1_score, accuracy_score
y_pred = model(torch.Tensor(X_test)).argmax(dim=1).detach().numpy()
y_test = np.argmax(y_test, axis=1)


In [None]:
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
print(f"Accuracy: {accuracy:.2f}.\nF1: {f1}")

In [43]:
from sklearn.metrics import f1_score, accuracy_score
from sklearn.tree import DecisionTreeClassifier
tree_model = DecisionTreeClassifier(max_depth=30)
X_bool = X > 0.5
tree_model.fit(X, y_train_d.cpu().numpy())
X_test_bool = X_test > 0.5

y_pred = tree_model.predict(X)
accuracy = accuracy_score(y_train.cpu().numpy(), y_pred)
f1 = f1_score(y_train.cpu().numpy(), y_pred, average='macro')
print(f"Accuracy: {accuracy:.2f}.\nF1: {f1:.2f}")

y_pred = tree_model.predict(X_test)
accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred)
f1 = f1_score(np.argmax(y_test, axis=1), y_pred, average='macro')
print(f"Accuracy: {accuracy:.2f}.\nF1: {f1:.2f}")

Accuracy: 0.88.
F1: 0.89
Accuracy: 0.27.
F1: 0.27


In [None]:
sklearn.tree.plot_tree(tree_model)
plt.show()