## MLP Feature Attribution

In [38]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from feature_extractor import FeatureExtractor
from sklearn.metrics import f1_score, accuracy_score

from utils import load_dataset_split
from mlp_torch import MLP, CitationDataset

def idx_to_label(y):
    mapping = ["background", "method", "result"]
    out = []
    for i in y:
        out.append(mapping[i])
    return out

In [39]:
train_x, train_y, test_x, test_y = load_dataset_split()

feature_list = {"word_vector"}
vect = "count"
vect_pca = True

feat_ext = FeatureExtractor(feature_list=feature_list, word_vectorizer=vect, vector_pca=False, vector_filter=True)
train_feat = feat_ext.extract_features(train_x, train=True).values
test_feat = feat_ext.extract_features(test_x).values

train_data = CitationDataset(train_feat, train_y.values)
test_data = CitationDataset(test_feat, test_y.values)

train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data)


In [40]:
print(len(train_feat[0]))

27424


In [41]:
model = MLP(len(train_feat[0]))
model.cuda()

model.train(train_dataloader)
test_pred = idx_to_label(model.predict(test_dataloader))

print(f"Model: MLP_torch", f", Features: {feature_list}")
if "word_vector" in feature_list:
    print(f"Vectorizer: {vect}") 
    print(f"Vector PCA: {vect_pca}")
print("Accuracy: ", accuracy_score(test_y, test_pred))
print("F1 Score: ", f1_score(test_y, test_pred, average="macro"))

Epoch: 0 / 30 Loss: 0.737544059753418
Epoch: 1 / 30 Loss: 0.6256468892097473
Epoch: 2 / 30 Loss: 0.11711911112070084
Epoch: 3 / 30 Loss: 0.048118870705366135
Epoch: 4 / 30 Loss: 0.005014120135456324
Epoch: 5 / 30 Loss: 0.0019885061774402857
Epoch: 6 / 30 Loss: 0.00027434463845565915
Epoch: 7 / 30 Loss: 0.00011717282905010507
Epoch: 8 / 30 Loss: 0.0004549185687210411
Epoch: 9 / 30 Loss: 0.00017286477668676525
Epoch: 10 / 30 Loss: 4.721865479950793e-05
Epoch: 11 / 30 Loss: 0.0004317023849580437
Epoch: 12 / 30 Loss: 0.17438974976539612
Epoch: 13 / 30 Loss: 0.00013715215027332306
Epoch: 14 / 30 Loss: 0.00010179907258134335
Epoch: 15 / 30 Loss: 1.0813814697030466e-05
Epoch: 16 / 30 Loss: 0.0012265030527487397
Epoch: 17 / 30 Loss: 6.302192196017131e-05
Epoch: 18 / 30 Loss: 5.658420195686631e-05
Epoch: 19 / 30 Loss: 3.116462266916642e-06
Epoch: 20 / 30 Loss: 3.547002052073367e-05
Epoch: 21 / 30 Loss: 8.208321560232434e-06
Epoch: 22 / 30 Loss: 9.29822363104904e-06
Epoch: 23 / 30 Loss: 0.000542

In [45]:
from captum.attr import IntegratedGradients

input_sentence = "Our findings agree with recent work [69,70] where continual organic enrichment from farming processes resulted in increased macrofaunal abundances despite expectations of negative impacts from this contamination."
input_feat = feat_ext.extract_features([input_sentence]).values

ig = IntegratedGradients(model)
input = torch.tensor(input_feat, requires_grad=True, dtype=torch.float32)

all_attributions = []

for class_idx in range(3):
    attributions, approx_error = ig.attribute(input, target=class_idx, return_convergence_delta=True, internal_batch_size=16)

    all_attributions.append(attributions)



In [43]:
print(all_attributions)

[tensor([[0., 0., -0.,  ..., 0., 0., 0.]], dtype=torch.float64,
       grad_fn=<AddBackward0>), tensor([[-0., 0., 0.,  ..., -0., -0., -0.]], dtype=torch.float64,
       grad_fn=<AddBackward0>), tensor([[0., -0., 0.,  ..., -0., 0., -0.]], dtype=torch.float64,
       grad_fn=<AddBackward0>)]


In [46]:
vocab = feat_ext.vectorizer.vocabulary_

word_list = [w for w, _ in sorted(vocab.items(), key=lambda x: x[1])]

mean_val = torch.mean(torch.stack([torch.mean(i, 0) for i in all_attributions]), dim=0)

labels = ["background", "method", "results"]

for label, attributions in zip(labels, all_attributions):
    sorted_words = [(word, weight) for word, weight in sorted(zip(word_list, attributions[0]), key=lambda x: x[1], reverse=True)]

    print(label)
    for word, weight in sorted_words[:20]:
        print(f"{word}: {weight}")
    print("")

# for label, attributions in zip(labels, all_attributions):
#     mean_att = torch.mean(attributions, 0) - mean_val
#     weights = [i.item() for i in mean_att]

#     sorted_words = [(word, weight) for word, weight in sorted(zip(word_list, weights), key=lambda x: x[1], reverse=True)]

#     print(label)
#     for word, weight in sorted_words[:20]:
#         print(f"{word}: {weight}")
#     print("")

#     with open(f"../results/mlp_stopword_{label}.txt", "a") as f:
#         for word, weight in sorted_words:
#             f.write(f"{word}: {weight}\n")



background
impacts: 1.510992095572858
organic: 0.6065488973995976
processes: 0.5074216984608848
contamination: 0.49124341475879724
work: 0.4700559090309714
farming: 0.4235120323462682
increased: 0.30183356369221287
negative: 0.23394303795982874
abundances: 0.1537517331778544
enrichment: 0.11386483945215153
aa: 0.0
aachen: 0.0
aactg: -0.0
aadahl: 0.0
aaednv: 0.0
aag: 0.0
aagaard: -0.0
aal: -0.0
aalto: 0.0
aamm: -0.0

method
expectations: 0.9323014375137118
negative: 0.16624281165705737
farming: 0.09265758557394345
enrichment: 0.06247529718311356
aa: -0.0
aachen: 0.0
aactg: 0.0
aadahl: -0.0
aaednv: -0.0
aag: -0.0
aagaard: 0.0
aal: 0.0
aalto: -0.0
aamm: 0.0
aamodt: -0.0
aanonsen: -0.0
aao: -0.0
aaos: -0.0
aap: -0.0
aaps: -0.0

results
findings: 2.4839210235223477
agree: 1.5080942692521186
despite: 1.447583054018094
resulted: 1.2505068467132894
recent: 0.3690873031705178
abundances: 0.21612012696152066
work: 0.07467093306865327
contamination: 0.07435091744049478
processes: 0.06579236976907