In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from transformers import BertTokenizer, BertForSequenceClassification, BertConfig

from captum.attr import visualization as viz
from captum.attr import LayerConductance, LayerIntegratedGradients
from sklearn.model_selection import train_test_split
from spacy.lang.en import English
from splitbert.textsplit import text_segmentation
from splitbert.SplitBertEncoderAttentionModel import SplitBertEncoderAttentionModel
from splitbert.utils import conduct_input_ids_and_attention_masks
from splitbert.utils import make_masks
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import tqdm

# Data Preparation

In [4]:
post_df = pd.read_csv('../predicting-satisfaction-using-graphs/csv/dataset/liwc_post.csv', encoding='UTF-8')
comment_df = pd.read_csv('../predicting-satisfaction-using-graphs/csv/dataset/liwc_comment.csv', encoding='UTF-8')
reply_df = pd.read_csv('../predicting-satisfaction-using-graphs/csv/dataset/avg_satisfaction_raw_0-999.csv', encoding='ISO-8859-1')

modes = [['seg', 'seg', 'snt']]

nlp = English()
nlp.add_pipe("sentencizer")

# satisfaction score (y)
satisfactions_float = list(reply_df['satisfy_composite'])
satisfactions = []

for s in satisfactions_float:
    if s < 3.5:
        satisfactions.append(0)
    elif s < 5:
        satisfactions.append(1)
    else:
        satisfactions.append(2)

reply_contents = list(reply_df['replyContent'])
post_contents = list(post_df['content'])
comment_bodies = list(comment_df['content'])


def get_sequences(contents, mode):
    sequences = []

    if mode == 'all':
        for content in contents:
            sequences.append([content])
    elif mode == 'seg':
        for content in contents:
            sentences = list(map(lambda x: str(x), list(nlp(content).sents)))
            sequences.append(text_segmentation(sentences))
    else:  # sentences
        for content in contents:
            sequences.append(list(map(lambda x: str(x), list(nlp(content).sents))))

    return sequences


for mode in modes:
    print(mode)
    post_sequences = get_sequences(post_contents, mode[0])
    comment_sequences = get_sequences(comment_bodies, mode[1])
    reply_sequences = get_sequences(reply_contents, mode[2])

    data = []
    max_post, max_comment, max_reply = 0, 0, 0
    i = 0
    for post, comment, reply, satisfaction, satisfaction_float in zip(post_sequences, comment_sequences,
                                                                          reply_sequences, satisfactions,
                                                                          satisfactions_float):
        if len(post) > max_post:
            max_post = len(post)
        if len(comment) > max_comment:
            max_comment = len(comment)
        if len(reply) > max_reply:
            max_reply = len(reply)

        data.append([i, post, comment, reply, satisfaction, satisfaction_float])
        i += 1

    print(max_post, max_comment, max_reply)
    max_count = max(max_post, max_comment, max_reply)
    print(max_count)

    columns = ['index', 'post_contents', 'comment_contents', 'reply_contents', 'label', 'score']
    df = pd.DataFrame(data, columns=columns)

    # data split (train & test sets)
    idx_train, idx_remain = train_test_split(df.index.values, test_size=0.20, random_state=42)
    idx_val, idx_test = train_test_split(idx_remain, test_size=0.50, random_state=42)

    train_df = df.iloc[idx_train]
    val_df = df.iloc[idx_val]
    test_df = df.iloc[idx_test]

    count_min_label = min(train_df['label'].value_counts())

    labels = [0, 1, 2]

    train_sample_df = pd.DataFrame([], columns=columns)

    for label in labels:
        tmp = train_df[train_df['label'] == label]
        tmp_sampled = tmp.sample(frac=1).iloc[:count_min_label]
        train_sample_df = pd.concat([train_sample_df, tmp_sampled])

    train_sample_df = train_sample_df.sample(frac=1)

['seg', 'seg', 'snt']
10 4 10
10


# Model Preparation

In [6]:
def normalize_tensor(data):
    return (data - torch.min(data)) / (torch.max(data) - torch.min(data))

In [125]:
def forward_func_ig(inputs, p_count, c_count, model):
    embeddings = inputs
    encoder_outputs = torch.empty(size=(1, model.embedding_size * 2)).to(model.device)
    outputs_list = []

    non_zero_rows = embeddings[0][embeddings[0].sum(dim=1) != 0]
    zero_rows = torch.zeros((embeddings[0].shape[0] - non_zero_rows.shape[0], model.embedding_size),
                            dtype=torch.int, device=model.device)
    embeddings = torch.cat([non_zero_rows, zero_rows])
    embeddings = embeddings.unsqueeze(0)
    embeddings = embeddings.swapaxes(0, 1)
    outputs_list.append(embeddings)
    src_mask, src_key_padding_mask = make_masks(model.max_post_len, [p_count, c_count], model.device, model.max_post_len, 
                                                model.max_comment_len, "concat_all")
    if model.encoder_mask_mode:
        encoder_output = model.encoder(embeddings, mask=src_mask, src_key_padding_mask=src_key_padding_mask)
    else:
        encoder_output = model.encoder(embeddings, src_key_padding_mask=src_key_padding_mask)
    encoder_outputs[0][:model.embedding_size] = torch.mean(encoder_output[:p_count+c_count], dim=0).squeeze(0)

    if model.attention_mask_mode:
        attention = model.mhead_attention(encoder_output, encoder_output, encoder_output, attn_mask=src_mask,
                                          key_padding_mask=src_key_padding_mask)[0]
    else:
        attention = model.mhead_attention(encoder_output, encoder_output, encoder_output, key_padding_mask=src_key_padding_mask)[0]

    # mul mask - diagonal masking
    attention = attention.swapaxes(0, 2)
    mask = torch.tensor([1] * (p_count + c_count) + [0] * (model.max_post_len + model.max_comment_len - (p_count + c_count))).to(
            model.device)
    attention = attention.mul(mask).swapaxes(0, 2)

    attention = torch.flatten(attention)
    attention = model.attn_classifier1(attention)
    attention = model.attn_classifier2(attention)
    encoder_outputs[0][model.embedding_size:] = attention
    
    encoder_outputs = model.mean_attn_layer(encoder_outputs)
    logits = model.classifier2(encoder_outputs)
    if model.softmax:
        logits = F.softmax(logits, dim=1)
    return logits
    

ig = IntegratedGradients(forward_func_ig)

In [119]:
def prepare_model(target, path, epoch, encoder_mask_mode, attention_mask_mode, softmax):
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    device = torch.device('cpu')

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    
    model_path = f'{path}/epoch_{epoch}.model'
    
    model = SplitBertEncoderAttentionModel(num_labels=len(labels), embedding_size=384, max_len=max_count,
                                               max_post_len=max_post, max_comment_len=max_comment, device=device,
                                               target="post_comment", encoder_mask_mode=encoder_mask_mode,
                                               attention_mask_mode=attention_mask_mode, softmax=softmax)

    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to('cpu')
    model.eval()

    for param in model.sbert.parameters():
        param.requires_grad = False
    
    for param in model.bert.parameters():
        param.requires_grad = False
        
    return device, model, tokenizer

In [8]:
def construct_input_ref_pair(targets):
    input_ids_list, ref_input_ids_list, attention_masks_list, sentence_count_list = [], [], [], []
    
    for contents in targets:
        result = tokenizer(contents, pad_to_max_length=True, truncation=True, max_length=256, return_tensors='pt')
        
        input_ids = result['input_ids']
        sentence_count_list.append(torch.tensor(len(input_ids)).unsqueeze(0))
        attention_masks = result['attention_mask']
        
        pad = (0, 0, 0, max_count-len(input_ids))
        input_ids = nn.functional.pad(input_ids, pad, "constant", 0)
        attention_masks = nn.functional.pad(attention_masks, pad, "constant", 0)
        ref_input_ids = torch.zeros_like(input_ids)

        input_ids_list.append(input_ids.unsqueeze(0))
        ref_input_ids_list.append(ref_input_ids.unsqueeze(0))
        attention_masks_list.append(attention_masks.unsqueeze(0))
    
    return input_ids_list, ref_input_ids_list, attention_masks_list, sentence_count_list

In [149]:
def summarize_attributions(attribution):
    attributions = attribution.sum(dim=-1).squeeze(0)
    print(torch.norm(attributions))
    attributions = attributions / torch.norm(attributions)
    return attributions

In [10]:
def get_indexes(filename):
    index_df = pd.read_csv(filename, encoding='UTF-8')
    index_df.columns = ['Unnamed: 0', 'prediction', 'label', 'score', 'idx']
    val_index = sorted(list(index_df.idx.values))
    
    return val_index

In [11]:
index_list = get_indexes(f'../predicting-satisfaction-using-graphs/csv/splitbert_classifier/post_comment/seg_seg/epoch_4_result.csv')

In [151]:
def splitbert_integrated_gradient_post_comment(index, post, comment, p_sentences, c_sentences, label, score, visualize=False, softmax=False):
    
    def post_or_comment_or_reply(index):
        for i, sentences in enumerate(all_sentences):
            if all_tokens[index] in sentences:
                if i == 0:
                    return 'post'
                elif i == 1:
                    return 'comment'
                else:
                    return 'reply'
    
    input_ids, ref_input_ids, attention_masks, sentence_counts = construct_input_ref_pair([post, comment])
    
    one_hot_labels = torch.nn.functional.one_hot(torch.tensor(label), num_classes=len(labels))
    inputs = {'labels': one_hot_labels.type(torch.float).to(device),
          'input_ids1': input_ids[0].to(device),
          'input_ids2': input_ids[1].to(device),
          'attention_mask1': attention_masks[0].to(device),
          'attention_mask2': attention_masks[1].to(device),
          'sentence_count1': sentence_counts[0].to(device),
          'sentence_count2': sentence_counts[1].to(device)
         }
    
    with torch.no_grad():
        inputs = pc_model(**inputs).hidden_states
    inputs = inputs[0]

    # inputs = torch.stack(embeddings, dim=0)
    baselines = torch.zeros((1, 14, 384))    
    pred = forward_func_ig(inputs, sentence_counts[0], sentence_counts[1], pc_model)
    base_pred = forward_func_ig(baselines, sentence_counts[0], sentence_counts[1], pc_model)
    
#     pred = forward_func_ig(inputs, sentence_counts[0], sentence_counts[1], pc_model)
#     base_pred = forward_func_ig(baselines, sentence_counts[0], sentence_counts[1], pc_model)
#     print(f'pred: {pred}, base_pred: {base_pred}')
    
    result = []
    
    
    i_attribution, i_delta = ig.attribute(inputs=inputs, target=torch.argmax(pred), additional_forward_args=(sentence_counts[0], sentence_counts[1], pc_model), n_steps=50, internal_batch_size=1, return_convergence_delta=True)
    b_attribution, b_delta = ig.attribute(inputs=baselines, target=torch.argmax(pred), additional_forward_args=(sentence_counts[0], sentence_counts[1], pc_model), n_steps=50, internal_batch_size=1, return_convergence_delta=True)
    print(torch.sum(summarize_attributions(i_attribution)))
    print(torch.sum(summarize_attributions(b_attribution)))
    attribution, delta = ig.attribute(inputs=inputs, target=torch.argmax(pred), additional_forward_args=(sentence_counts[0], sentence_counts[1], pc_model), n_steps=50, internal_batch_size=1, return_convergence_delta=True)
    attributions = summarize_attributions(attribution)
    f_attributions = torch.flatten(attributions)
    f_attributions = f_attributions[f_attributions.nonzero()].squeeze(1)
    abs_attributions = list(map(abs, map(float, f_attributions)))
    idx_attributions = []
    for j in range(len(abs_attributions)):
        idx_attributions.append((j, abs_attributions[j], f_attributions[j].item()))
    idx_attributions.sort(key=lambda x:x[1], reverse=True)
        
    top3 = idx_attributions[:3]
    
    if visualize:
        all_sentences = [['[[post]]'], post, ['[[comment]]'], comment]
        all_tokens = [item for all_sentences in all_sentences for item in all_sentences]
        
        vis_attributions = []
        
        j = 0
        for i in range(len(all_tokens)):
            if all_tokens[i] in ['[[post]]', '[[comment]]']:
                vis_attributions.append(0)
            else:
                vis_attributions.append(f_attributions[j].item())
                j += 1
                
        vis_attributions = torch.tensor(vis_attributions)
        
        score_vis = viz.VisualizationDataRecord(vis_attributions,
                                                torch.max(torch.softmax(pred, dim=0)),
                                                torch.argmax(pred),  # predicted label
                                                f'{label}, {score}',  # true label
                                                p_sentences + ' ' + c_sentences,
                                                vis_attributions.sum(),
                                                all_tokens,
                                                delta)
        raw_text = ' '.join(post) + ' '.join(comment)
        
        print('\033[1m', 'Visualization For Score', '\033[0m')
        viz.visualize_text([score_vis])
        print(f'pred: {pred}, base_pred: {base_pred}')
        print(f'sub: {pred - base_pred}')
        print(f'sum sub: {torch.sum(pred-base_pred)}')
        print(vis_attributions)
        print('delta: ', delta)
        
    else:
        where = []
        
        all_sentences = [post, comment]
        all_tokens = [item for all_sentences in all_sentences for item in all_sentences]

        for j in range(len(top3)):
            where.append(post_or_comment_or_reply(top3[j][0]))
            result.append([index, post, comment, score, all_tokens[top3[j][0]], top3[j][2], post_or_comment_or_reply(top3[j][0])])

        # result.append([index, post, comment, score, all_tokens[top3[0][0]], top3[0][2], where[0]])
        # result.append([index, post, comment, score, all_tokens[top3[1][0]], top3[1][2], where[1]])
        # result.append([index, post, comment, score, all_tokens[top3[2][0]], top3[2][2], where[2]])

        return result, label, torch.argmax(pred).item()

In [147]:
def splitbert_integrated_gradient_post_comment(index, post, comment, p_sentences, c_sentences, label, score, visualize=False, softmax=False):
    
    def post_or_comment_or_reply(index):
        for i, sentences in enumerate(all_sentences):
            if all_tokens[index] in sentences:
                if i == 0:
                    return 'post'
                elif i == 1:
                    return 'comment'
                else:
                    return 'reply'
    
    input_ids, ref_input_ids, attention_masks, sentence_counts = construct_input_ref_pair([post, comment])
    
    one_hot_labels = torch.nn.functional.one_hot(torch.tensor(label), num_classes=len(labels))
    inputs = {'labels': one_hot_labels.type(torch.float).to(device),
          'input_ids1': input_ids[0].to(device),
          'input_ids2': input_ids[1].to(device),
          'attention_mask1': attention_masks[0].to(device),
          'attention_mask2': attention_masks[1].to(device),
          'sentence_count1': sentence_counts[0].to(device),
          'sentence_count2': sentence_counts[1].to(device)
         }
    
    with torch.no_grad():
        inputs = pc_model(**inputs).hidden_states
    inputs = inputs[0]

    # inputs = torch.stack(embeddings, dim=0)
    baselines = torch.zeros((1, 14, 384))    
    pred = forward_func_ig(inputs, sentence_counts[0], sentence_counts[1], pc_model)
    base_pred = forward_func_ig(baselines, sentence_counts[0], sentence_counts[1], pc_model)
    
#     pred = forward_func_ig(inputs, sentence_counts[0], sentence_counts[1], pc_model)
#     base_pred = forward_func_ig(baselines, sentence_counts[0], sentence_counts[1], pc_model)
#     print(f'pred: {pred}, base_pred: {base_pred}')
    
    result = []
    
    for target in [0, 1, 2]:
        i_attribution, i_delta = ig.attribute(inputs=inputs, target=target, additional_forward_args=(sentence_counts[0], sentence_counts[1], pc_model), n_steps=50, internal_batch_size=1, return_convergence_delta=True)
        b_attribution, b_delta = ig.attribute(inputs=baselines, target=target, additional_forward_args=(sentence_counts[0], sentence_counts[1], pc_model), n_steps=50, internal_batch_size=1, return_convergence_delta=True)
        attribution, delta = ig.attribute(inputs=inputs, target=target, additional_forward_args=(sentence_counts[0], sentence_counts[1], pc_model), n_steps=50, internal_batch_size=1, return_convergence_delta=True)
        attributions = summarize_attributions(attribution)
        f_attributions = torch.flatten(attributions)
        f_attributions = f_attributions[f_attributions.nonzero()].squeeze(1)
        abs_attributions = list(map(abs, map(float, f_attributions)))
        idx_attributions = []
        for j in range(len(abs_attributions)):
            idx_attributions.append((j, abs_attributions[j], f_attributions[j].item()))
        idx_attributions.sort(key=lambda x:x[1], reverse=True)

        top3 = idx_attributions[:3]

        if visualize:
            all_sentences = [['[[post]]'], post, ['[[comment]]'], comment]
            all_tokens = [item for all_sentences in all_sentences for item in all_sentences]

            vis_attributions = []

            j = 0
            for i in range(len(all_tokens)):
                if all_tokens[i] in ['[[post]]', '[[comment]]']:
                    vis_attributions.append(0)
                else:
                    vis_attributions.append(f_attributions[j].item())
                    j += 1

            vis_attributions = torch.tensor(vis_attributions)

            score_vis = viz.VisualizationDataRecord(vis_attributions,
                                                    torch.max(torch.softmax(pred, dim=0)),
                                                    torch.argmax(pred),  # predicted label
                                                    f'{label}, {score}',  # true label
                                                    p_sentences + ' ' + c_sentences,
                                                    vis_attributions.sum(),
                                                    all_tokens,
                                                    delta)
            raw_text = ' '.join(post) + ' '.join(comment)

            print('\033[1m', 'Visualization For Score', '\033[0m')
            viz.visualize_text([score_vis])
            print(f'target: {target}')
            print(f'pred: {pred}, base_pred: {base_pred}')
            print(f'sub: {pred - base_pred}')
            print(f'sum sub: {torch.sum(pred-base_pred)}')
            print(f'sum of input attributions: {torch.sum(summarize_attributions(i_attribution))}')
            print(f'baseline attributions: {summarize_attributions(b_attribution)}')
            print(vis_attributions)
            print('delta: ', delta)

        else:
            where = []

            all_sentences = [post, comment]
            all_tokens = [item for all_sentences in all_sentences for item in all_sentences]

            for j in range(len(top3)):
                where.append(post_or_comment_or_reply(top3[j][0]))
                result.append([index, post, comment, score, all_tokens[top3[j][0]], top3[j][2], post_or_comment_or_reply(top3[j][0])])

            # result.append([index, post, comment, score, all_tokens[top3[0][0]], top3[0][2], where[0]])
            # result.append([index, post, comment, score, all_tokens[top3[1][0]], top3[1][2], where[1]])
            # result.append([index, post, comment, score, all_tokens[top3[2][0]], top3[2][2], where[2]])

            return result, label, torch.argmax(pred).item()

In [120]:
device, pc_model, tokenizer = prepare_model('post_comment', '../predicting-satisfaction-using-graphs/splitbert/model/seg_seg/attention', 5, False, 'diagonal', True)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [152]:
for i in index_list[:5]:
    splitbert_integrated_gradient_post_comment(i, post_sequences[i], comment_sequences[i], post_contents[i], comment_bodies[i], satisfactions[i], satisfactions_float[i], True, True)

tensor(1.0514, dtype=torch.float64)
tensor(0.9504, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(nan, dtype=torch.float64)
tensor(1.0514, dtype=torch.float64)
[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
"0, 2.45",2 (1.00),"Im the guy no one suspects is as messed up as I am....Am wealthy, have nice wife, kids, everyone envious of our home (8000 sf so not small in wealthiest county in the USA).....good looking and well you name it ..........But Im now unemployed, wife has and I think still sleeps with other men, am now drinking a lot, no sex in years, kids make jokes about my drinking to wife......I embarass my wife out in piblic, I dont work out anymore and have 50 lbs more than I did a yr and half ago, I am going broke and well Im a loser I think........But when i drink at least the pain and how much of a loser I am doesnt keep me awake all night and makes me feel like I can do stuff when drunk...... As bad as I feel about the depression, and I am pretty sure you are just taking out your pent up anger on him, you came out swinging first by calling him a moron over a little mistsle first",0.95,"[[post]] Im the guy no one suspects is as messed up as I am....Am wealthy, have nice wife, kids, everyone envious of our home (8000 sf so not small in wealthiest county in the USA).....good looking and well you name it ..........But Im now unemployed, wife has and I think still sleeps with other men, am now drinking a lot, no sex in years, kids make jokes about my drinking to wife......I embarass my wife out in piblic, I dont work out anymore and have 50 lbs more than I did a yr and half ago, I am going broke and well Im a loser I think........But when i drink at least the pain and how much of a loser I am doesnt keep me awake all night and makes me feel like I can do stuff when drunk...... [[comment]] As bad as I feel about the depression, and I am pretty sure you are just taking out your pent up anger on him, you came out swinging first by calling him a moron over a little mistsle first"
,,,,


pred: tensor([[1.4614e-03, 1.4889e-05, 9.9852e-01]], grad_fn=<SoftmaxBackward0>), base_pred: tensor([[4.9324e-09, 9.9995e-01, 4.6728e-05]], grad_fn=<SoftmaxBackward0>)
sub: tensor([[ 0.0015, -0.9999,  0.9985]], grad_fn=<SubBackward0>)
sum sub: 0.0
tensor([ 0.0000,  0.9988,  0.0000, -0.0484])
delta:  tensor([0.0007], dtype=torch.float64)
tensor(1.0516, dtype=torch.float64)
tensor(0.9447, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(nan, dtype=torch.float64)
tensor(1.0516, dtype=torch.float64)
[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
"0, 2.45",0 (1.00),"I'm wondering if anyone here has thought about suicide methods that come close to killing you but can't. I'm not ready to die but I want to hurt. I want people to see physically how much I'm hurting emotionally.I've thought about jumping in front of cars or falling from non-lethal heights. Those aren't the nicest methods but I'll go with them if I can't find a better one. Donât do these things please. If other people donât believe how hurt you are emotionally, try your best to let it not affect you.",0.94,"[[post]] I'm wondering if anyone here has thought about suicide methods that come close to killing you but can't. I'm not ready to die but I want to hurt. I want people to see physically how much I'm hurting emotionally. I've thought about jumping in front of cars or falling from non-lethal heights. Those aren't the nicest methods but I'll go with them if I can't find a better one. [[comment]] Donât do these things please. If other people donât believe how hurt you are emotionally, try your best to let it not affect you."
,,,,


pred: tensor([[0.9921, 0.0060, 0.0018]], grad_fn=<SoftmaxBackward0>), base_pred: tensor([[4.9324e-09, 9.9995e-01, 4.6728e-05]], grad_fn=<SoftmaxBackward0>)
sub: tensor([[ 0.9921, -0.9939,  0.0018]], grad_fn=<SubBackward0>)
sum sub: -5.51808625459671e-08
tensor([ 0.0000, -0.0539,  0.0000,  0.9985])
delta:  tensor([0.0012], dtype=torch.float64)
tensor(0.0029, dtype=torch.float64)
tensor(1.5048, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(nan, dtype=torch.float64)
tensor(0.0029, dtype=torch.float64)
[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
"1, 4.0",1 (1.00),"I remember being pretty excited for the new sun and moon version and had quite a bit of fun when it came out but now almost a week later I just dont have the energy to finish it, its just not enjoyable anymore. I feel like depression steals all of the fun in things i liked doing. And the fact that i just failed an exam at uni doesnt help things. Sometimes I dont feel like living but dont want to die either and it sucks, sorry for rambling, just needed to get it out somewhere :c I just started playing a HeartGold ROM on my android device and after 2:37 of playtime I don't see the point anymore. I know how the game plays out already, no point in carrying on for another 70 hours.",1.5,"[[post]] I remember being pretty excited for the new sun and moon version and had quite a bit of fun when it came out but now almost a week later I just dont have the energy to finish it, its just not enjoyable anymore. I feel like depression steals all of the fun in things i liked doing. And the fact that i just failed an exam at uni doesnt help things. Sometimes I dont feel like living but dont want to die either and it sucks, sorry for rambling, just needed to get it out somewhere :c [[comment]] I just started playing a HeartGold ROM on my android device and after 2:37 of playtime I don't see the point anymore. I know how the game plays out already, no point in carrying on for another 70 hours."
,,,,


pred: tensor([[1.7266e-05, 9.9998e-01, 7.3158e-08]], grad_fn=<SoftmaxBackward0>), base_pred: tensor([[7.2887e-08, 9.9556e-01, 4.4361e-03]], grad_fn=<SoftmaxBackward0>)
sub: tensor([[ 1.7193e-05,  4.4187e-03, -4.4360e-03]], grad_fn=<SubBackward0>)
sum sub: -5.681067705154419e-08
tensor([0.0000, 0.5042, 0.8504, 0.0000, 0.1501])
delta:  tensor([1.3400e-05], dtype=torch.float64)
tensor(0.9140, dtype=torch.float64)
tensor(1.0115, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(nan, dtype=torch.float64)
tensor(0.9140, dtype=torch.float64)
[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
"1, 3.6",2 (1.00),"Probably sounds stupid but the only thing I look forward to everyday is laying in bed after doing nothing all day and just sleeping for 15 hours. I wish I could sleep longer Sertraline (Zoloft) changed my life for real, but everyone is different.",1.01,"[[post]] Probably sounds stupid but the only thing I look forward to everyday is laying in bed after doing nothing all day and just sleeping for 15 hours. I wish I could sleep longer [[comment]] Sertraline (Zoloft) changed my life for real, but everyone is different."
,,,,


pred: tensor([[0.0650, 0.0040, 0.9310]], grad_fn=<SoftmaxBackward0>), base_pred: tensor([[4.9324e-09, 9.9995e-01, 4.6728e-05]], grad_fn=<SoftmaxBackward0>)
sub: tensor([[ 0.0650, -0.9960,  0.9309]], grad_fn=<SubBackward0>)
sum sub: 5.960464477539063e-08
tensor([0.0000, 0.9999, 0.0000, 0.0116])
delta:  tensor([-0.0064], dtype=torch.float64)
tensor(1.5240, dtype=torch.float64)
tensor(0.2112, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(nan, dtype=torch.float64)
tensor(1.5240, dtype=torch.float64)
[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
"2, 6.4",1 (1.00),"My girlfriend has been severely depressed for the better part of 2 months and fighting her symptoms in addition to medication adherance and weekly talk therapy just wasn't cutting it. She made the decision to go inpatient today after a week of intermittent SI and thoughts of SIB. I work in acute mental health treatment and have chronic mental illness myself and while she's told me that she'll alert me when I've hit a raw spot or need to take off my mental health thinking cap, I've never been on the other side of this, at least not in this deep. She had a rough patch when I first started dating her, but we weren't living together. Any recommendations for being supportive of her while she's being hospitalized, with thoughts to how my experience as both a mental health provider and consumer may hinder/help my behavior and thoughts? I won't be visiting everyday, partially because the hospital is over an hour away and also because she's got support just a phone call away and I know relatively what the environment is going to be like. I will be visiting a couple of times this week but I'm going to be as unintrusive as I can. Try and be there for her as partner instead of as someone who works in the field. I think it's actually a positive that you are so familiar with how all that jazz works. She's inpatient so she'll have access to doctors, therapists, meds, whatever. What she won't have access is a caring partner, and that's the role you need to fill. Also, be sure and take stock of your own wellness. It can't be easy having an SO struggling as you also have your own afflictions.",0.21,"[[post]] My girlfriend has been severely depressed for the better part of 2 months and fighting her symptoms in addition to medication adherance and weekly talk therapy just wasn't cutting it. She made the decision to go inpatient today after a week of intermittent SI and thoughts of SIB. I work in acute mental health treatment and have chronic mental illness myself and while she's told me that she'll alert me when I've hit a raw spot or need to take off my mental health thinking cap, I've never been on the other side of this, at least not in this deep. She had a rough patch when I first started dating her, but we weren't living together. Any recommendations for being supportive of her while she's being hospitalized, with thoughts to how my experience as both a mental health provider and consumer may hinder/help my behavior and thoughts? I won't be visiting everyday, partially because the hospital is over an hour away and also because she's got support just a phone call away and I know relatively what the environment is going to be like. I will be visiting a couple of times this week but I'm going to be as unintrusive as I can. [[comment]] Try and be there for her as partner instead of as someone who works in the field. I think it's actually a positive that you are so familiar with how all that jazz works. She's inpatient so she'll have access to doctors, therapists, meds, whatever. What she won't have access is a caring partner, and that's the role you need to fill. Also, be sure and take stock of your own wellness. It can't be easy having an SO struggling as you also have your own afflictions."
,,,,


pred: tensor([[4.3075e-05, 9.1854e-01, 8.1412e-02]], grad_fn=<SoftmaxBackward0>), base_pred: tensor([[1.0209e-06, 5.9206e-01, 4.0794e-01]], grad_fn=<SoftmaxBackward0>)
sub: tensor([[ 4.2054e-05,  3.2649e-01, -3.2653e-01]], grad_fn=<SubBackward0>)
sum sub: 2.9802322387695312e-08
tensor([ 0.0000, -0.4122, -0.2816,  0.0000,  0.8656,  0.0393])
delta:  tensor([-0.0047], dtype=torch.float64)


In [14]:
device, pc_model, tokenizer = prepare_model('post_comment', '../predicting-satisfaction-using-graphs/splitbert/model/seg_seg/wo_softmax', 4, False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [61]:
for i in index_list[:1]:
    splitbert_integrated_gradient_post_comment(i, post_sequences[i], comment_sequences[i], post_contents[i], comment_bodies[i], satisfactions[i], satisfactions_float[i], True, False)

torch.Size([768])
torch.Size([384])


RuntimeError: a view of a leaf Variable that requires grad is being used in an in-place operation.