In [1]:
import os
import sys

In [3]:
sys.path.append("./model_interface")

In [5]:
from model_interface.model import FakeNewsDetector

In [53]:
import torch
import torch.nn as nn
import re
import numpy as np
from transformers import BertTokenizer,BertModel


class BERTModel(nn.Module):
    
    def __init__(self):
        super(BERTModel,self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.2)
        self.out = nn.Linear(768,6)

    def forward(self,ids,mask,token_type_ids):
        _, o2 = self.bert(ids, attention_mask=mask,token_type_ids=token_type_ids)
        bo = self.dropout(o2)
        return self.out(bo)

class FakeNewsDetector():
    
    def __init__(self, model_path):
        self.model_path = model_path
        # self.model = torch.load(model_path, map_location='cpu')
        self.model = BERTModel()
        print("loading model")
        self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
        print("model loaded")
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                            do_lower_case=True)
        
        self.labels = {0:'disagree', 1:'agree', 2:'discuss', 3:'unrelated'}
        self.num_classes = len(self.labels)
        self.max_len = 512
        print("init done")
    
    def verifyClaim(self, claim, reference):
        
        #print("claim: ", claim)
        #print("reference: ", reference)
        
        #encode batch of sentences
        encoded_data = self.tokenizer(
            reference,
            claim,
            add_special_tokens=True,   
            max_length=self.max_len,
            truncation = True,
            padding = "longest",
            return_tensors = "pt"
        )   
        
        # get ids and attention masks
        ids = encoded_data['input_ids']
        token_type_ids = encoded_data['token_type_ids']
        mask = encoded_data['attention_mask']
        
       

        # pass through the model
        outputs = self.model(ids, mask, token_type_ids)
        
        # get probability using softmax
        print(outputs.shape)
        outputs = torch.softmax(outputs, dim= -1).cpu().detach().numpy()

        predicted_classes = np.argmax(outputs, axis = -1)
        confidence_scores = np.max(outputs, axis = -1)
        
  #      classes = self.labels[predicted_classes]
        
        return  predicted_classes, confidence_scores




In [54]:
path = os.path.join("./Resources", "model", "finetuned_BERT_epoch_5.pt")

In [55]:
F = FakeNewsDetector(path)

loading model
model loaded
init done


In [56]:
F.verifyClaim(["Hello I am good"], ["I am not good"])

torch.Size([1, 6])


(array([2], dtype=int64), array([0.9051874], dtype=float32))

In [27]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                            do_lower_case=True)

In [36]:
tokenizer(["Hello I am good","make good"], ["I am not good", "not so much"], return_tensors = "pt", padding = "longest")["input_ids"].shape

torch.Size([2, 11])