In [1]:
import tensorflow as tf
import numpy as np
from transformers import AutoTokenizer
from modeling.multi_output_distilbert import TFDistilBertForFakeNewsClassification

In [2]:
tokenizer = AutoTokenizer.from_pretrained('./model')

In [3]:
tokens = tokenizer(['Last year a mysterious shipment was caught smuggling Coronavirus from Canada. It was traced to Chinese agents working at a Canadian lab. Subsequent investigation by GreatGameIndia linked the agents to Chinese Biological Warfare Program from where the virus is suspected to have leaked causing the Wuhan Coronavirus outbreak.'], truncation=True, max_length=512, padding='max_length', return_tensors="tf")

In [7]:
model = TFDistilBertForFakeNewsClassification.from_pretrained('./model', num_labels_aggregate=3,num_labels_category=8)

num aggregate labels:  3
num category labels:  8


InternalError: Blas GEMM launch failed : a.shape=(15, 768), b.shape=(768, 768), m=15, n=768, k=768 [Op:MatMul]

In [None]:
output = model(tokens)

In [None]:
output

In [7]:
output.logits['aggregate']

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-1.3524224, -1.2631007,  1.9145572]], dtype=float32)>

In [65]:
aggregates = ['reliable', 'mixed', 'unreliable']
categories = ['conspiracy_pseudoscience', 'least_biased', 'left_bias', 'left_center_bias', 'questionable_source', 'right_bias', 'right_center_bias', 'NA']
def run(txt):
    tokens = tokenizer(txt, truncation=True, max_length=512, padding='max_length', return_tensors="tf")
    output = model(tokens)
    pred_aggregates = tf.nn.softmax(output.logits[0], axis=-1)
    pred_categories = tf.nn.softmax(output.logits[1], axis=-1)
    return aggregates[tf.argmax(pred_aggregates, axis=1)[0].numpy()], categories[tf.argmax(pred_categories, axis=1)[0].numpy()], pred_aggregates[0], pred_categories[0]

In [70]:
run('''
Editor's Note: Thomas L. Friedman, author, political commentator and weekly columnist for The New York Times, and Wang Huiyao, president of the Center for China and Globalization, discussed online the future of globalization and China-US relations on March 29. Following are excerpts from their conversation:

Wang: How do we view the new trend of globalization?

Friedman: So, the world today, actually, is flatter than ever. We have never connected more different nodes than we have today.

The world isn't just flat now. It's fragile. It's fragile because when you connect so many nodes, and then you speed up the connection between those nodes and you take the buffers out, you get fragility.

Wang: I think globalization is accelerating to some extent, thanks to technology. But the flow of capital and goods, and the flow of talents all have actually become faster and more voluminous than before. What do you think about the future trend?

Friedman: The world is fast, fused, deep and open.

When I say the world is fast now, what I mean is that there's been a change in the pace of change.

Second, the world isn't just flat now, it's fused. We're not just interconnected, we're now interdependent. We're fused by technology and by climate.

Third, the world's gotten deep. Deep is the most important word of this era. Because what we've done now is that we (have) put sensors everywhere. Now our knowledge of that is deep. It's very deep. That's why this word deep. We had to coin a new adjective－deep state, deep mind, deep medicine, deep research, deep fake－to describe the fact that this is going deep inside of me. I can sit here right now in Washington and look at publicly available satellite pictures of different parts of China from Google Earth, from the European space satellite.
''')

('unreliable',
 'conspiracy_pseudoscience',
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.00553261, 0.3343599 , 0.6601075 ], dtype=float32)>,
 <tf.Tensor: shape=(8,), dtype=float32, numpy=
 array([6.6673946e-01, 3.5311859e-05, 4.3421324e-02, 6.3523836e-03,
        2.2519405e-03, 9.6690387e-04, 2.8023264e-01, 9.8148383e-22],
       dtype=float32)>)

In [40]:
from datasets import load_from_disk

In [41]:
data = load_from_disk('./datasets/NELA')

In [54]:
data['train'][8]

{'id': 'zerohedge--2019-12-30--The European Auto Industry Is Racing To Ditch Diesel',
 'date': '2019-12-30',
 'source': 'zerohedge',
 'title': 'The European Auto Industry Is Racing To Ditch Diesel',
 'content': 'As if the downturn due to a trade-war-induced slowdown in China were not enough , the European automotive industry is facing the challenge of a rapid switch from diesel to petrol engines that has been gathering pace for the last two years .\nAt the same time , the industry has also had to deal with the implementation of new legislation designed to reduce car makers ’ overall fleet emission levels .\nAn article in the Financial Times explains the impact of the new legislation on Europe ’ s automakers , an industry that supports some 14 million workers across @ @ @ @ @ @ @ an auto analyst at Bernstein , the article says each carmaker faces its own CO2 target based on the weight of its vehicles .\nA business selling smaller cars , such as PSA , therefore has a lower CO2 target tha