In [0]:
!pip install fever-scorer

Collecting fever-scorer
  Downloading https://files.pythonhosted.org/packages/61/d1/95f1133ded0d74a9d24fe5e15c43f2b3c31f018d0227fa34376f93cf0f08/fever-scorer-2.0.39.tar.gz
Building wheels for collected packages: fever-scorer
  Building wheel for fever-scorer (setup.py) ... [?25l[?25hdone
  Created wheel for fever-scorer: filename=fever_scorer-2.0.39-cp36-none-any.whl size=3585 sha256=3e11437505765264adfe4340013bb177bacc508dcd558f0e87d6fd824f8f7223
  Stored in directory: /root/.cache/pip/wheels/9d/f1/2f/bdeac68eff673e4c1cfaab09d14438cd4e4c8a585aeba7ff40
Successfully built fever-scorer
Installing collected packages: fever-scorer
Successfully installed fever-scorer-2.0.39


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import torch
import spacy
from tqdm import tqdm
from torchtext import data
from torchtext import datasets
import torch.nn.functional as tnf
from fever.scorer import fever_score
import pandas as pd
import pdb
import dill

In [0]:
SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(include_lengths = True, tokenize='spacy')
LABEL = data.LabelField()
OTHER = data.RawField()
OTHER.is_target = False


In [0]:
trainset_fields = {"sent_label":("sent_label",LABEL), "sentence":("sentence",TEXT), "claim":("claim",TEXT)}
devset_fields = {"sent_label":("sent_label",LABEL), "sentence":("sentence",TEXT), "claim":("claim", TEXT), 
                 "org_sentence":("org_sentence",OTHER), "docid_claimid_sentno":("docid_claimid_sentno",OTHER)}

In [0]:
# set paths
train_path = "/content/gdrive/My Drive/NLPWikiData/processed_train_data2.csv"
processed_dev_path = "/content/gdrive/My Drive/NLPWikiData/processed_golddev_data2.csv" 
dev_path = "/content/gdrive/My Drive/NLPWikiData/dev.jsonl"
# sen_preds_output_path = "/content/gdrive/My Drive/NLPWikiData/sen_pred_train.jsonl"
vocabulary_path = "/content/gdrive/My Drive/"

In [0]:
trainset = data.TabularDataset(train_path, format="CSV", fields=trainset_fields, skip_header=False)
devset = data.TabularDataset(processed_dev_path, format="CSV", fields=devset_fields, skip_header=False)

In [0]:
print(len(trainset))
print(vars(trainset.examples[0]))
print(len(devset))
print(vars(devset.examples[0]))

3346905
{'sent_label': 'False', 'sentence': ['Nikolaj', 'Coster', '-', 'Waldau', '-LRB-', '-LSB-', 'neɡ̊olaɪ̯', 'kʰʌsd̥ɐ', 'ˈʋald̥ɑʊ̯', '-RSB-', ';', 'born', '27', 'July', '1970', '-RRB-', 'is', 'a', 'Danish', 'actor', ',', 'producer', 'and', 'screenwriter', '.'], 'claim': ['Nikolaj', 'Coster', '-', 'Waldau', 'worked', 'with', 'the', 'Fox', 'Broadcasting', 'Company', '.']}
166897
{'sent_label': 'True', 'sentence': ['Soul', 'Food', 'is', 'a', '1997', 'American', 'comedy', '-', 'drama', 'film', 'produced', 'by', 'Kenneth', '`', '`', 'Babyface', "''", 'Edmonds', ',', 'Tracey', 'Edmonds', 'and', 'Robert', 'Teitel', 'and', 'released', 'by', 'Fox', '2000', 'Pictures', '.'], 'claim': ['Fox', '2000', 'Pictures', 'released', 'the', 'film', 'Soul', 'Food', '.'], 'org_sentence': "Soul Food is a 1997 American comedy-drama film produced by Kenneth `` Babyface '' Edmonds , Tracey Edmonds and Robert Teitel and released by Fox 2000 Pictures .", 'docid_claimid_sentno': 'Soul_Food_-LRB-film-RRB-{#--#}13

In [0]:
TEXT.build_vocab(trainset,vectors="glove.6B.100d",unk_init=torch.Tensor.normal_)

.vector_cache/glove.6B.zip: 862MB [06:31, 2.20MB/s]                          
100%|█████████▉| 398218/400000 [00:20<00:00, 19792.43it/s]

In [0]:
LABEL.build_vocab(trainset)

In [0]:
# vocabulary of training data (same to be used for dev and test)
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in TEXT vocabulary: 90622
Unique tokens in LABEL vocabulary: 2


In [0]:
print(TEXT.vocab.freqs.most_common(20))
print(TEXT.vocab.itos[:10])
print(vars(LABEL.vocab))

[('.', 6679593), (',', 5993860), ('the', 5457627), ('in', 3084025), ('and', 3053898), ('of', 2935330), ('a', 2786757), ('is', 2062525), ('was', 1467657), ('to', 1264993), ('The', 1254807), ('-LRB-', 1104248), ('-RRB-', 1104213), ('-', 1028424), ('for', 960542), ('as', 863462), ("'s", 799846), ('by', 771416), ('`', 761885), ('an', 748843)]
['<unk>', '<pad>', '.', ',', 'the', 'in', 'and', 'of', 'a', 'is']
{'freqs': Counter({'False': 3082707, 'True': 264198}), 'itos': ['False', 'True'], 'stoi': defaultdict(<function _default_unk_index at 0x7f8238dc3a60>, {'False': 0, 'True': 1}), 'vectors': None}


In [0]:
with open(vocabulary_path + "TEXT_VOCAB_5EPOCH", mode="wb") as f:
    dill.dump(TEXT, f)
    print("Text Dumping Successfull")
with open(vocabulary_path + "LABEL_VOCAB_5EPOCH", mode="wb") as f:
    dill.dump(LABEL,f)
    print("Label Dumping Successfull")



Text Dumping Successfull
Label Dumping Successfull


In [0]:
with open(vocabulary_path+"TEXT_VOCAB_5EPOCH", "rb") as f:
    X = dill.load(f)
    print("Loaded Successfully")
with open(vocabulary_path+"LABEL_VOCAB_5EPOCH", "rb") as f:
    Y = dill.load(f)
    print("Loaded Successfully")


Loaded Successfully
Loaded Successfully


In [0]:
BATCH_SIZE=128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("We are working with ", device)

We are working with  cuda


In [0]:
train_iterator = data.BucketIterator(
    trainset, 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    sort_key = lambda x: (len(x.claim)),
    device = device)
dev_iterator = data.BucketIterator(
    devset, 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    sort_key = lambda x: (len(x.claim)),
    device = device)

In [0]:
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=n_layers, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim*2*2, output_dim)

        # self.dropoutVar = nn.Dropout(dropout)
        
    def forward_again(self, text, text_lengths):
        
        # print(text)
        # text = [sent_len, batch_size]
        # print("Text_Shape:  ",text.shape)
        # print("Text_Length: ",text_lengths)
        # print("Text_Length_Shape: ",text_lengths.shape)

        output = self.embedding(text) #get embeddings
        pps = nn.utils.rnn.pack_padded_sequence(output, text_lengths, enforce_sorted=False) #perform packed padded sequence
        output2, (hiddenLSTM, cellLSTM) = self.lstm(pps) #lstm
        hidden = torch.cat((hiddenLSTM[-2,:,:], hiddenLSTM[-1,:,:]),1) #get concatenated hidden

        # print("Output:  ",output)
        # print("Output_Shape:  ",output.shape)
        
        # print("PPS:  ",pps)
        # print("PPS_Shape:  ",pps.shape)

        # print("Output2:  ",output2)
        # print("Output2_Shape:  ",output2.shape)
        
        # print("Hidden:  ",hidden)
        # print("Hidden_Shape:  ",hidden.shape)
        
        return hidden

    def forward(self, claims, sentences):
        claim_text = claims[0]
        claim_text_length = claims[1]
        sentence_text = sentences[0]
        sentence_text_length = sentences[1]

        claim_hidden = self.forward_again(claim_text, claim_text_length)
        sentence_hidden = self.forward_again(sentence_text, sentence_text_length)

        concatenated_hidden = torch.cat((claim_hidden,sentence_hidden), 1)

        return self.fc(concatenated_hidden)

In [0]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 2
N_LAYERS = 1
BIDIRECTIONAL = True
# DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = LSTM(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            PAD_IDX)

In [0]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 9,797,434 trainable parameters


In [0]:
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)

torch.Size([90622, 100])


In [0]:
model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[-0.1117, -0.4966,  0.1631,  ...,  1.2647, -0.2753, -0.1325],
        [-0.8555, -0.7208,  1.3755,  ...,  0.0825, -1.1314,  0.3997],
        [-0.3398,  0.2094,  0.4635,  ..., -0.2339,  0.4730, -0.0288],
        ...,
        [-0.7135,  0.4351, -1.9781,  ..., -0.3908,  2.2369, -0.8613],
        [ 0.1130,  0.3988,  0.0366,  ...,  0.5790, -1.6348,  0.1398],
        [-0.7313,  1.9365, -0.3453,  ...,  0.1143,  0.0194,  0.5561]])

In [0]:
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.embedding.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.3398,  0.2094,  0.4635,  ..., -0.2339,  0.4730, -0.0288],
        ...,
        [-0.7135,  0.4351, -1.9781,  ..., -0.3908,  2.2369, -0.8613],
        [ 0.1130,  0.3988,  0.0366,  ...,  0.5790, -1.6348,  0.1398],
        [-0.7313,  1.9365, -0.3453,  ...,  0.1143,  0.0194,  0.5561]])


In [0]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [0]:
criterion = nn.CrossEntropyLoss()

model = model.to(device)
criterion = criterion.to(device)

In [0]:
def get_score(probabilities, docid_claimid_sentno, org_sentence, dev_path):
    org_dev_data = pd.read_json(dev_path, lines=True)

    claim_dict = dict()

    for i,val in enumerate(docid_claimid_sentno):
        doc_id, claim_id, sentno = docid_claimid_sentno[i].split("{#--#}")
        claim_id = int(claim_id)
        if claim_id not in claim_dict:
            claim_dict[claim_id] = [{"probability": probabilities[i], "doc_id": doc_id, 
                                    "sentno": sentno, "org_sentence": org_sentence[i]}]
        else:
            claim_dict[claim_id].extend([{"probability": probabilities[i], "doc_id": doc_id, 
                                    "sentno": sentno, "org_sentence": org_sentence[i]}])
    
    file_data = []
    fever_data = []
    prob_count = 0
    # pdb.set_trace()
    for org_dev_claim_id, org_dev_claim_label, org_dev_claim, org_dev_evidence_list in zip(org_dev_data['id'], org_dev_data['label'], 
                                                           org_dev_data['claim'], org_dev_data['evidence']):
        temp_data = dict()
        fever_dict = dict()

        org_dev_claim_id = int(org_dev_claim_id)
        predicted_sentences = []
        if org_dev_claim_id not in claim_dict:
            # that claim id was not in the predictions, hence no predicted sentences
            predicted_sentences = []
        else:
            the_claim_dict = claim_dict[org_dev_claim_id]
            for value in the_claim_dict:
                #   if value['probability'] >= 0.5:
                # print (predicted_sentences)
                # print (value)
                prob_count +=1
                predicted_sentences.append([value['probability'], value['sentno'], value['org_sentence'], value['doc_id']])
                            
            sorted_predicted_sentences = sorted(predicted_sentences, key=lambda x: x[0], reverse=True)
                
            # for RTE .jsonl file
            temp_data['id'] = org_dev_claim_id
            temp_data['claim'] = org_dev_claim
            temp_data['sentences'] = [u[2] for u in sorted_predicted_sentences][:5]
            temp_data['page_ids'] = [v[3] for v in sorted_predicted_sentences][:5]
            temp_data['indices'] = [w[1] for w in sorted_predicted_sentences][:5]
            

            # for fever score
            fever_dict['label'] = org_dev_claim_label
            fever_dict['predicted_label'] = org_dev_claim_label
            fever_dict['predicted_evidence'] = [[x[3], int(x[1])] for x in sorted_predicted_sentences][:5]
            fever_dict['evidence'] = org_dev_evidence_list

            file_data.append(temp_data)
            fever_data.append(fever_dict)
    # pd.DataFrame(file_data).to_json(sen_preds_output_path, orient='records', lines=True)
    print('prob_count', prob_count)
    return file_data, fever_data

In [0]:
def evaluate(model, iterator, file):
  
    epoch_loss = 0

    # doc_ids = []
    # sentence_nos = []
    # claim_ids = []
    docid_claimid_sentno = []
    org_sentences = []
    predicted_sentences = []
    probabilities = []
    correct_predictions = 0
    total_claims = 0

    # epoch_acc = 0

  
  
    with torch.no_grad():
  
        for i, batch in enumerate(iterator):
            model.eval()
            
            claims, sentences = batch.claim, batch.sentence
            
            eval_predictions = model(claims, sentences)
            probability = tnf.softmax(eval_predictions, 1)
            correct_predictions += (torch.max(eval_predictions, 1)[1].view(batch.sent_label.size()) == batch.sent_label).sum().item()
            dev_loss = criterion(eval_predictions, batch.sent_label)
            
            epoch_loss += dev_loss.item()
            total_claims += batch.sent_label.size(0)
            # epoch_acc += acc.item()

            # predicted_sentences.extend(eval_predictions[:,1].tolist())
            probabilities.extend(probability[:,1].tolist())
            docid_claimid_sentno.extend(batch.docid_claimid_sentno)
            org_sentences.extend(batch.org_sentence)

        file_data, fever_data = get_score(probabilities, docid_claimid_sentno, org_sentences, dev_path)   
        average_accuracy = 100. * correct_predictions / total_claims
        print(f'Correct Predictions: {correct_predictions}')
        print(f'Total Claims: {total_claims}')
        print(f'Validation Loss: {epoch_loss/len(iterator)}')
        print(f'Average Accuracy: {average_accuracy}%')
        print(f'-----------------------------')

    return file_data, fever_data

In [0]:
import pdb


def train(model, optimizer, criterion, path, best_f1, epoch_num):
  
    if not model.training:
        model.train()

    epoch_loss = 0
  
    for i,batch in enumerate(train_iterator):
        model.train()
        
        optimizer.zero_grad()
        claims, sentences = batch.claim, batch.sentence
        predictions = model(claims, sentences)
        
        loss = criterion(predictions, batch.sent_label)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
        if (i+1)%5000 == 0:
            print(f'BATCH:  {i+1}')

    # if (i+1)%10000 == 0:
    #   pdb.set_trace()
    print("--------------------------------")  
    print(f'BATCH: {i+1}')
    print("loss", epoch_loss/(i+1))
    file_data, fever_data = evaluate(model, dev_iterator, dev_path)
    fever_val, accuracy, precision, recall, f1score = fever_score(fever_data)
    print(f'Fever Score: {fever_val} | Accuracy: {accuracy}')
    print(f'Precision: {precision} | Recall: {recall} | F1Score: {f1score}')

    # if f1score > best_f1:
    # best_f1 = f1score
    print(f'Saving Model. . . ')
    torch.save(model.state_dict(), model_path+f'{epoch_num}_{f1score:0.3f}.pt')
    print(f'Model Saved Successfully!')
    pd.DataFrame(fever_data).to_csv("/content/gdrive/My Drive/NLPWikiData/fever_data_output_E"+f'{epoch_num}.csv')
    pd.DataFrame(file_data).to_json("/content/gdrive/My Drive/NLPWikiData/sen_pred_train_E"+f'{epoch_num}.jsonl', orient='records', lines=True)
    print(f'jsonl file saved for RTE')
    print("--------------------------------")  
    print("--------------------------------")  
    

    return epoch_loss / len(train_iterator), best_f1

In [0]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [0]:
N_EPOCHS = 5
model_path = F"/content/gdrive/My Drive/sent_selec_E"
best_valid_loss = float('inf')
best_f1 = 0

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, best_f1 = train(model, optimizer, criterion, model_path, best_f1, epoch)
    # valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    # if valid_loss < best_valid_loss:
        # best_valid_loss = valid_loss
        # torch.save(model.state_dict(), path)
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    # print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    # print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

BATCH:  5000
BATCH:  10000
BATCH:  15000
BATCH:  20000
BATCH:  25000
--------------------------------
BATCH: 26148
loss 0.20737157987893148
prob_count 166897
Correct Predictions: 144729
Total Claims: 166897
Validation Loss: 0.3848355031612278
Average Accuracy: 86.71755633714207%
-----------------------------
Fever Score: 0.7739273927392739 | Accuracy: 1.0
Precision: 0.29384188418839774 | Recall: 0.7739273927392739 | F1Score: 0.4259577199333986
Saving Model. . . 
Model Saved Successfully!
jsonl file saved for RTE
--------------------------------
--------------------------------
Epoch: 01 | Epoch Time: 28m 13s
BATCH:  5000
BATCH:  10000
BATCH:  15000
BATCH:  20000
BATCH:  25000
--------------------------------
BATCH: 26148
loss 0.19112894987436438
prob_count 166897
Correct Predictions: 144249
Total Claims: 166897
Validation Loss: 0.3781695513546101
Average Accuracy: 86.42995380384309%
-----------------------------
Fever Score: 0.7716771677167716 | Accuracy: 1.0
Precision: 0.2922667266726

# **Model Testing** 

In [0]:
test_path = "/content/gdrive/My Drive/NLPWikiData/NEWprocessed_test_data.csv"

model_path = "/content/gdrive/My Drive/sent_selec_0.42889917068716255.pt"
sen_pred_test_path = "/content/gdrive/My Drive/NLPWikiData/sen_pred_test.jsonl"

In [0]:
TEXT = data.Field(include_lengths = True, tokenize='spacy')
# LABEL = data.LabelField()
OTHER = data.RawField()
OTHER.is_target = False

In [0]:
testset_fields = {"sentence":("sentence",TEXT), "claim":("claim", TEXT), 
                 "org_sentence":("org_sentence",OTHER), "docid_claimid_sentno":("docid_claimid_sentno",OTHER)}

In [0]:
with open("/content/gdrive/My Drive/TEXT_VOCAB_5EPOCH", "rb") as f:
    TEST_TEXT = dill.load(f)
    print("Text Load Successfull")
with open("/content/gdrive/My Drive/LABEL_VOCAB_5EPOCH", "rb") as f:
    TEST_LABEL = dill.load(f)
    print("Label Load Successfull")

Text Load Successfull
Label Load Successfull


In [0]:
testset = data.TabularDataset(test_path, format="CSV", fields=testset_fields, skip_header=False)

In [0]:
print(len(testset))
print(vars(testset.examples[0]))

341042
{'sentence': ['Henry', 'Spencer', 'is', 'a', 'Canadian', 'computer', 'programmer', 'and', 'space', 'enthusiast', '.'], 'claim': ['Henry', 'Spencer', 'is', 'played', 'by', 'a', 'Greek', 'actor', '.'], 'org_sentence': 'Henry Spencer is a Canadian computer programmer and space enthusiast .', 'docid_claimid_sentno': 'Henry_Spencer_-LRB-disambiguation-RRB-{#--#}89296{#--#}0'}


In [0]:
TEXT.build_vocab(testset)
# ,vectors="glove.6B.100d",unk_init=torch.Tensor.normal_)

In [0]:
LABEL.build_vocab(testset)

In [0]:
TEXT.vocab = TEST_TEXT.vocab
TEXT.vocab.itos = TEST_TEXT.vocab.itos
TEXT.vocab.stoi = TEST_TEXT.vocab.stoi

In [0]:
LABEL.vocab = TEST_LABEL.vocab
LABEL.vocab.itos = TEST_LABEL.vocab.itos
LABEL.vocab.stoi = TEST_LABEL.vocab.stoi

In [0]:
# vocabulary of training data (same to be used for dev and test)
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in TEXT vocabulary: 90622
Unique tokens in LABEL vocabulary: 2


In [0]:
print(TEXT.vocab.freqs.most_common(20))
print(TEXT.vocab.itos[:10])
print(vars(LABEL.vocab))

[('.', 6679593), (',', 5993860), ('the', 5457627), ('in', 3084025), ('and', 3053898), ('of', 2935330), ('a', 2786757), ('is', 2062525), ('was', 1467657), ('to', 1264993), ('The', 1254807), ('-LRB-', 1104248), ('-RRB-', 1104213), ('-', 1028424), ('for', 960542), ('as', 863462), ("'s", 799846), ('by', 771416), ('`', 761885), ('an', 748843)]
['<unk>', '<pad>', '.', ',', 'the', 'in', 'and', 'of', 'a', 'is']
{'freqs': Counter({'False': 3082707, 'True': 264198}), 'itos': ['False', 'True'], 'stoi': defaultdict(<function _default_unk_index at 0x7fde871c97b8>, {'False': 0, 'True': 1}), 'vectors': None}


In [0]:
BATCH_SIZE=128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("We are working with ", device)

We are working with  cuda


In [0]:
test_iterator = data.BucketIterator(
    testset, 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    sort_key = lambda x: (len(x.claim)),
    device = device)

In [0]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 2
N_LAYERS = 1
BIDIRECTIONAL = True
# DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = LSTM(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            PAD_IDX)

In [0]:
criterion = nn.CrossEntropyLoss()
model.load_state_dict(torch.load(model_path, map_location=device)) 
model = model.to(device)
criterion = criterion.to(device)

AttributeError: ignored

In [0]:
def get_score_test(probabilities, docid_claimid_sentno, org_sentence, test_path):
    org_test_data = pd.read_json(dev_path, lines=True)

    claim_dict = dict()

    for i,val in enumerate(docid_claimid_sentno):
        doc_id, claim_id, sentno = docid_claimid_sentno[i].split("{#--#}")
        claim_id = int(claim_id)
        if claim_id not in claim_dict:
            claim_dict[claim_id] = [{"probability": probabilities[i], "doc_id": doc_id, 
                                    "sentno": sentno, "org_sentence": org_sentence[i]}]
        else:
            claim_dict[claim_id].extend([{"probability": probabilities[i], "doc_id": doc_id, 
                                    "sentno": sentno, "org_sentence": org_sentence[i]}])
    
    file_data = []
    fever_data = []
    prob_count = 0
    # pdb.set_trace()
    for org_test_claim_id, org_test_claim, in zip(org_test_data['id'], org_test_data['claim']):
        temp_data = dict()
        fever_dict = dict()

        org_test_claim_id = int(org_test_claim_id)
        predicted_sentences = []
        if org_test_claim_id not in claim_dict:
            # that claim id was not in the predictions, hence no predicted sentences
            predicted_sentences = []
        else:
            the_claim_dict = claim_dict[org_test_claim_id]
            for value in the_claim_dict:
                #   if value['probability'] >= 0.5:
                # print (predicted_sentences)
                # print (value)
                prob_count +=1
                predicted_sentences.append([value['probability'], value['sentno'], value['org_sentence'], value['doc_id']])
                            
            sorted_predicted_sentences = sorted(predicted_sentences, key=lambda x: x[0], reverse=True)
                
            # for RTE .jsonl file
            temp_data['id'] = org_test_claim_id
            temp_data['claim'] = org_test_claim
            temp_data['sentences'] = [u[2] for u in sorted_predicted_sentences][:5]
            temp_data['page_ids'] = [v[3] for v in sorted_predicted_sentences][:5]
            temp_data['indices'] = [w[1] for w in sorted_predicted_sentences][:5]
            

            # for fever score
            # fever_dict['label'] = org_dev_claim_label
            # fever_dict['predicted_label'] = org_dev_claim_label
            # fever_dict['predicted_evidence'] = [[x[3], int(x[1])] for x in sorted_predicted_sentences][:5]
            # fever_dict['evidence'] = org_dev_evidence_list
            fever_dict = "No fever Data"

            file_data.append(temp_data)
            # fever_data.append(fever_dict)
    pd.DataFrame(file_data).to_json(sen_pred_test_path, orient='records', lines=True)
    print('prob_count', prob_count)
    return file_data, fever_data

In [0]:
test_file_data, test_fever_data = evaluate(model, test_iterator, test_path)
# test_fever_val, test_accuracy, test_precision, test_recall, f1score = test_fever_score(fever_data)
# print(f'Fever Score: {fever_val} | Accuracy: {accuracy}')
# print(f'Precision: {precision} | Recall: {recall} | F1Score: {f1score}')