In [21]:
import tensorflow as tf
import numpy as np
from transformers import AutoTokenizer
from modeling.multi_output_distilbert import TFDistilBertForFakeNewsClassification

In [5]:
tokenizer = AutoTokenizer.from_pretrained('./model')

In [12]:
tokens = tokenizer(['Last year a mysterious shipment was caught smuggling Coronavirus from Canada. It was traced to Chinese agents working at a Canadian lab. Subsequent investigation by GreatGameIndia linked the agents to Chinese Biological Warfare Program from where the virus is suspected to have leaked causing the Wuhan Coronavirus outbreak.'], truncation=True, max_length=512, padding='max_length', return_tensors="tf")

In [8]:
model = TFDistilBertForFakeNewsClassification.from_pretrained('./model', num_labels_aggregate=3,num_labels_category=8)

num aggregate labels:  3
num category labels:  8
All model checkpoint layers were used when initializing TFDistilBertForFakeNewsClassification.

All the layers of TFDistilBertForFakeNewsClassification were initialized from the model checkpoint at ./model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForFakeNewsClassification for predictions without further training.


In [13]:
output = model(tokens)

In [18]:
output.logits[0]

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-1.3524209, -1.2631007,  1.9145561]], dtype=float32)>

In [19]:
aggregates = ['reliable', 'mixed', 'unreliable']
categories = ['conspiracy_pseudoscience', 'least_biased', 'left_bias', 'left_center_bias', 'questionable_source', 'right_bias', 'right_center_bias', 'NA']
def run(txt):
    tokens = tokenizer(txt, truncation=True, max_length=512, padding='max_length', return_tensors="tf")
    output = model(tokens)
    pred_aggregates = tf.nn.softmax(output.logits[0], axis=-1)
    pred_categories = tf.nn.softmax(output.logits[1], axis=-1)
    return aggregates[tf.argmax(pred_aggregates, axis=1)[0].numpy()], categories[tf.argmax(pred_categories, axis=1)[0].numpy()], 

In [55]:
run('''@ @ @ @ @ @ @''')

('reliable', 'left_center_bias')

In [40]:
from datasets import load_from_disk

In [41]:
data = load_from_disk('./datasets/NELA')

In [54]:
data['train'][8]

{'id': 'zerohedge--2019-12-30--The European Auto Industry Is Racing To Ditch Diesel',
 'date': '2019-12-30',
 'source': 'zerohedge',
 'title': 'The European Auto Industry Is Racing To Ditch Diesel',
 'content': 'As if the downturn due to a trade-war-induced slowdown in China were not enough , the European automotive industry is facing the challenge of a rapid switch from diesel to petrol engines that has been gathering pace for the last two years .\nAt the same time , the industry has also had to deal with the implementation of new legislation designed to reduce car makers ’ overall fleet emission levels .\nAn article in the Financial Times explains the impact of the new legislation on Europe ’ s automakers , an industry that supports some 14 million workers across @ @ @ @ @ @ @ an auto analyst at Bernstein , the article says each carmaker faces its own CO2 target based on the weight of its vehicles .\nA business selling smaller cars , such as PSA , therefore has a lower CO2 target tha