In [4]:
# from google.colab import files
# uploaded = files.upload()

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch.utils.data as data_utils
import torch.optim as optim
import gc #garbage collector for gpu memory 
from tqdm import tqdm
from torchsummary import summary

In [None]:
# Preprocessing
#libraries
import matplotlib.pyplot as plt
import re

train = pd.read_csv('/content/train.csv').fillna(' ')
valid = pd.read_csv('/content/val.csv').fillna(' ')

In [7]:
# Create a list of punctuation marks
puncts = [',', '.', '"', ':', ')', '(', '-', '!', '?', '|', ';', "'", '$', '&', '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£', 
 '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…', 
 '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', 
 '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', 
 '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√','#']

# Code to replace punctuations with whitespaces
def clean_text(x):
    x = str(x)
    for punct in puncts:
        if punct in x:
            x = x.replace(punct, ' ')
    return x

# Cleaning URLs, twitter user_handles, punctuations, whitespaces and converting to lowercase
# Training  Data
train['tweet'] = train['tweet'].apply(lambda x: re.sub(r'http\S+', '', x))
train['tweet'] = train['tweet'].apply(lambda x: re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", '', x))
train['tweet'] = train['tweet'].apply(lambda x: clean_text(x))
# train['tweet'] = train['tweet'].str.lower()
# train['tweet'] = train['tweet'].apply(lambda x: x.split())

print(train.head())

# Validation  Data
valid['tweet'] = valid['tweet'].apply(lambda x: re.sub(r'http\S+', '', x))
valid['tweet'] = valid['tweet'].apply(lambda x: re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)*", '', x))
valid['tweet'] = valid['tweet'].apply(lambda x: clean_text(x))
# valid['tweet'] = valid['tweet'].str.lower()
# valid['tweet'] = valid['tweet'].apply(lambda x: x.split())


print(valid.head())

   id                                              tweet label
0   1  The CDC currently reports 99031 deaths In gene...  real
1   2  States reported 1121 deaths a small rise from ...  real
2   3  Politically Correct Woman Almost Uses Pandemic...  fake
3   4  IndiaFightsCorona We have 1524 COVID testing l...  real
4   5  Populous states can generate large case counts...  real
   id                                              tweet label
0   1  Chinese converting to Islam after realising th...  fake
1   2  11 out of 13 people from the Diamond Princess ...  fake
2   3  COVID19 Is Caused By A Bacterium Not Virus And...  fake
3   4  Mike Pence in RNC speech praises Donald Trumps...  fake
4   5  610 Skys EdConwaySky explains the latest COVID...  real


#### BERT package (transformers)

In [8]:
%%capture
!pip install transformers~=2.11.0
# !pip install transformers

#### BERT PyTorch

In [9]:
%%capture
from transformers import BertForSequenceClassification, BertTokenizer
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
train.columns = ['id','tweet','label']
del train['id']

#### This is a preview of the data once the irrelevant columns have been removed. 

In [11]:
train.head(10)

Unnamed: 0,tweet,label
0,The CDC currently reports 99031 deaths In gene...,real
1,States reported 1121 deaths a small rise from ...,real
2,Politically Correct Woman Almost Uses Pandemic...,fake
3,IndiaFightsCorona We have 1524 COVID testing l...,real
4,Populous states can generate large case counts...,real
5,Covid Act Now found on average each person in ...,real
6,If you tested positive for COVID19 and have no...,real
7,Obama Calls Trumps Coronavirus Response A Chao...,fake
8,Clearly the Obama administration did not leave...,fake
9,RetractionHydroxychloroquine or chloroquine wi...,fake


#### The transformers package comes with a tokenizer for each model. We'll use the BERT tokenizer here and a BERT base model where the text isn't modified for case.

In [12]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




#### Tokenizing the data so that each sentence is split into words and symbols. Also '[CLS]' and '[SEP]' to the beginning and end of every article.

In [13]:
tokenized_df = list(map(lambda t: ['[CLS]'] + tokenizer.tokenize(t)[:510] + ['[SEP]'], train['tweet']))
print(tokenized_df[4])

['[CLS]', 'Pop', '##ulous', 'states', 'can', 'generate', 'large', 'case', 'counts', 'but', 'if', 'you', 'look', 'at', 'the', 'new', 'cases', 'per', 'million', 'today', '9', 'smaller', 'states', 'are', 'showing', 'more', 'cases', 'per', 'million', 'than', 'California', 'or', 'Texas', 'AL', 'AR', 'ID', 'K', '##S', 'K', '##Y', 'LA', 'MS', 'N', '##V', 'and', 'SC', '[SEP]']


#### The max input length for a BERT algorithm is 512, so we'll have to pad each article to this length or cut it short.

In [14]:
totalpadlength = 512

#### We need to get the index for each token so that we can map them to be put in a matrix embedding.

In [15]:
indexed_tokens = list(map(tokenizer.convert_tokens_to_ids, tokenized_df))

In [16]:
index_padded = np.array([xi+[0]*(totalpadlength-len(xi)) for xi in indexed_tokens])
print(index_padded)

[[  101  1109  2891 ...     0     0     0]
 [  101  1311  2103 ...     0     0     0]
 [  101  6679  1193 ...     0     0     0]
 ...
 [  101   138  2112 ...     0     0     0]
 [  101   138 11787 ...     0     0     0]
 [  101  1135  1144 ...     0     0     0]]


#### Setting up an array with the binary target variable values
* 0 = FAKE
* 1 = REAL

In [17]:
target_variable = train['label'].values
target_variable = (target_variable=='real').astype(int)

#### The BERT algorithm relies on masking to help it learn and to prevent overfitting, so we'll add this to the model.

In [18]:
mask_variable = [[float(i>0) for i in ii] for ii in index_padded]

#### This loads the data into train and test dataloaders

In [19]:
BATCH_SIZE = 14
def format_tensors(text_data, mask, labels, batch_size):
    X = torch.from_numpy(text_data)
    X = X.long()
    mask = torch.tensor(mask)
    y = torch.from_numpy(labels)
    y = y.long()
    tensordata = data_utils.TensorDataset(X, mask, y)
    loader = data_utils.DataLoader(tensordata, batch_size=batch_size, shuffle=False)
    return loader
 
X_train, X_test, y_train, y_test = train_test_split(index_padded, target_variable, 
                                                    test_size=2, random_state=42)
 
train_masks, test_masks, _, _ = train_test_split(mask_variable, index_padded, 
                                                       test_size=2, random_state=42)
print(y_train)
trainloader = format_tensors(X_train, train_masks, y_train,BATCH_SIZE)
testloader = format_tensors(X_test, test_masks, y_test, BATCH_SIZE)

[1 0 1 ... 1 0 1]


#### This is a sample batch from the trainloader. The first tensor contains the embeddings for the articles, the second tensor contains the masking information, and the third tensor contains the target variables for each article.

In [20]:
next(iter(trainloader))

[tensor([[  101, 23599,  1164,  ...,     0,     0,     0],
         [  101,  1726,  3002,  ...,     0,     0,     0],
         [  101, 26982,  6926,  ...,     0,     0,     0],
         ...,
         [  101, 18732, 23314,  ...,     0,     0,     0],
         [  101,  3497,   140,  ...,     0,     0,     0],
         [  101,  1109,  1993,  ...,     0,     0,     0]]),
 tensor([[1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         ...,
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.]]),
 tensor([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1])]

#### This is a BERT base-cased model with 12 BERT transformer layers, 768 hidden layers, 12 heads, 110M parameters, and is pre-trained on cased English text.


In [21]:
model = BertForSequenceClassification.from_pretrained('bert-base-cased')
model

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435779157.0, style=ProgressStyle(descri…




BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

#### Function to compute the accuracy after each epoch

In [22]:
def compute_accuracy(model, dataloader, device):
    tqdm()
    model.eval()
    correct_preds, num_samples = 0,0
    with torch.no_grad():
        for i, batch in enumerate(tqdm(dataloader)):
            token_ids, masks, labels = tuple(t.to(device) for t in batch)
            _, yhat = model(input_ids=token_ids, attention_mask=masks, labels=labels)
            prediction = (torch.sigmoid(yhat[:,1]) > 0.5).long()
            num_samples += labels.size(0)
            correct_preds += (prediction==labels.long()).sum()
            del token_ids, masks, labels #memory
        torch.cuda.empty_cache() #memory
        gc.collect() # memory
        return correct_preds.float()/num_samples*100

#### Since BERT is pre-trained, we keep the learning rate low and only perform a few epochs. This prevents it from overfitting.

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache() #memory
gc.collect() #memory
NUM_EPOCHS = 5
loss_function = nn.BCEWithLogitsLoss()
losses = []
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=3e-6)
for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    iteration = 0
    for i, batch in enumerate(trainloader):
        iteration += 1
        token_ids, masks, labels = tuple(t.to(device) for t in batch)
        optimizer.zero_grad()
        loss, yhat = model(input_ids=token_ids, attention_mask=masks, labels=labels)
        loss.backward()
        optimizer.step()
        running_loss += float(loss.item())
        del token_ids, masks, labels #memory
    
        if not i%25:
            print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} | '
                  f'Batch {i+1:03d}/{len(trainloader):03d} | '
                  f'Average Loss in last {iteration} iteration(s): {(running_loss/iteration):.4f}')
            running_loss = 0.0
            iteration = 0
        torch.cuda.empty_cache() #memory
        gc.collect() #memory
        losses.append(float(loss.item()))
    with torch.set_grad_enabled(False):
        print(f'\nTraining Accuracy: '
              f'{compute_accuracy(model, trainloader, device):.2f}%')
        


Epoch: 001/005 | Batch 001/459 | Average Loss in last 1 iteration(s): 0.6478
Epoch: 001/005 | Batch 026/459 | Average Loss in last 25 iteration(s): 0.6785
Epoch: 001/005 | Batch 051/459 | Average Loss in last 25 iteration(s): 0.6554
Epoch: 001/005 | Batch 076/459 | Average Loss in last 25 iteration(s): 0.6037
Epoch: 001/005 | Batch 101/459 | Average Loss in last 25 iteration(s): 0.5727
Epoch: 001/005 | Batch 126/459 | Average Loss in last 25 iteration(s): 0.5143
Epoch: 001/005 | Batch 151/459 | Average Loss in last 25 iteration(s): 0.4821
Epoch: 001/005 | Batch 176/459 | Average Loss in last 25 iteration(s): 0.4538
Epoch: 001/005 | Batch 201/459 | Average Loss in last 25 iteration(s): 0.4244
Epoch: 001/005 | Batch 226/459 | Average Loss in last 25 iteration(s): 0.3600
Epoch: 001/005 | Batch 251/459 | Average Loss in last 25 iteration(s): 0.3359
Epoch: 001/005 | Batch 276/459 | Average Loss in last 25 iteration(s): 0.3406
Epoch: 001/005 | Batch 301/459 | Average Loss in last 25 iteratio

0it [00:00, ?it/s]
100%|██████████| 459/459 [01:50<00:00,  4.16it/s]



Training Accuracy: 92.35%
Epoch: 002/005 | Batch 001/459 | Average Loss in last 1 iteration(s): 0.4803
Epoch: 002/005 | Batch 026/459 | Average Loss in last 25 iteration(s): 0.2693
Epoch: 002/005 | Batch 051/459 | Average Loss in last 25 iteration(s): 0.2487
Epoch: 002/005 | Batch 076/459 | Average Loss in last 25 iteration(s): 0.2555
Epoch: 002/005 | Batch 101/459 | Average Loss in last 25 iteration(s): 0.2030
Epoch: 002/005 | Batch 126/459 | Average Loss in last 25 iteration(s): 0.1871
Epoch: 002/005 | Batch 151/459 | Average Loss in last 25 iteration(s): 0.1795
Epoch: 002/005 | Batch 176/459 | Average Loss in last 25 iteration(s): 0.2314
Epoch: 002/005 | Batch 201/459 | Average Loss in last 25 iteration(s): 0.2151
Epoch: 002/005 | Batch 226/459 | Average Loss in last 25 iteration(s): 0.1575
Epoch: 002/005 | Batch 251/459 | Average Loss in last 25 iteration(s): 0.1687
Epoch: 002/005 | Batch 276/459 | Average Loss in last 25 iteration(s): 0.1846
Epoch: 002/005 | Batch 301/459 | Avera

0it [00:00, ?it/s]
100%|██████████| 459/459 [01:50<00:00,  4.16it/s]



Training Accuracy: 96.29%
Epoch: 003/005 | Batch 001/459 | Average Loss in last 1 iteration(s): 0.2610
Epoch: 003/005 | Batch 026/459 | Average Loss in last 25 iteration(s): 0.1629
Epoch: 003/005 | Batch 051/459 | Average Loss in last 25 iteration(s): 0.1577
Epoch: 003/005 | Batch 076/459 | Average Loss in last 25 iteration(s): 0.1583
Epoch: 003/005 | Batch 101/459 | Average Loss in last 25 iteration(s): 0.1610
Epoch: 003/005 | Batch 126/459 | Average Loss in last 25 iteration(s): 0.1048
Epoch: 003/005 | Batch 151/459 | Average Loss in last 25 iteration(s): 0.1053
Epoch: 003/005 | Batch 176/459 | Average Loss in last 25 iteration(s): 0.1391
Epoch: 003/005 | Batch 201/459 | Average Loss in last 25 iteration(s): 0.1587
Epoch: 003/005 | Batch 226/459 | Average Loss in last 25 iteration(s): 0.0931
Epoch: 003/005 | Batch 251/459 | Average Loss in last 25 iteration(s): 0.1254
Epoch: 003/005 | Batch 276/459 | Average Loss in last 25 iteration(s): 0.1272
Epoch: 003/005 | Batch 301/459 | Avera

0it [00:00, ?it/s]
100%|██████████| 459/459 [01:50<00:00,  4.16it/s]



Training Accuracy: 98.40%
Epoch: 004/005 | Batch 001/459 | Average Loss in last 1 iteration(s): 0.3167
Epoch: 004/005 | Batch 026/459 | Average Loss in last 25 iteration(s): 0.1009
Epoch: 004/005 | Batch 051/459 | Average Loss in last 25 iteration(s): 0.0914
Epoch: 004/005 | Batch 076/459 | Average Loss in last 25 iteration(s): 0.1037
Epoch: 004/005 | Batch 101/459 | Average Loss in last 25 iteration(s): 0.0999
Epoch: 004/005 | Batch 126/459 | Average Loss in last 25 iteration(s): 0.0686
Epoch: 004/005 | Batch 151/459 | Average Loss in last 25 iteration(s): 0.0580
Epoch: 004/005 | Batch 176/459 | Average Loss in last 25 iteration(s): 0.0672
Epoch: 004/005 | Batch 201/459 | Average Loss in last 25 iteration(s): 0.1113
Epoch: 004/005 | Batch 226/459 | Average Loss in last 25 iteration(s): 0.0559
Epoch: 004/005 | Batch 251/459 | Average Loss in last 25 iteration(s): 0.0604
Epoch: 004/005 | Batch 276/459 | Average Loss in last 25 iteration(s): 0.0941
Epoch: 004/005 | Batch 301/459 | Avera

0it [00:00, ?it/s]
100%|██████████| 459/459 [01:50<00:00,  4.16it/s]



Training Accuracy: 99.25%
Epoch: 005/005 | Batch 001/459 | Average Loss in last 1 iteration(s): 0.2976
Epoch: 005/005 | Batch 026/459 | Average Loss in last 25 iteration(s): 0.0621
Epoch: 005/005 | Batch 051/459 | Average Loss in last 25 iteration(s): 0.0447
Epoch: 005/005 | Batch 076/459 | Average Loss in last 25 iteration(s): 0.0706
Epoch: 005/005 | Batch 101/459 | Average Loss in last 25 iteration(s): 0.0626
Epoch: 005/005 | Batch 126/459 | Average Loss in last 25 iteration(s): 0.0397
Epoch: 005/005 | Batch 151/459 | Average Loss in last 25 iteration(s): 0.0419
Epoch: 005/005 | Batch 176/459 | Average Loss in last 25 iteration(s): 0.0639
Epoch: 005/005 | Batch 201/459 | Average Loss in last 25 iteration(s): 0.0702
Epoch: 005/005 | Batch 226/459 | Average Loss in last 25 iteration(s): 0.0388
Epoch: 005/005 | Batch 251/459 | Average Loss in last 25 iteration(s): 0.0323
Epoch: 005/005 | Batch 276/459 | Average Loss in last 25 iteration(s): 0.0606
Epoch: 005/005 | Batch 301/459 | Avera

0it [00:00, ?it/s]
100%|██████████| 459/459 [01:50<00:00,  4.16it/s]



Training Accuracy: 99.67%


#### Testing the final model on the test set

#### Validation set preprocessing

In [24]:
valid.columns = ['id', 'tweet', 'label']
del valid['id']
tokenized_df_val = list(map(lambda t: ['[CLS]'] + tokenizer.tokenize(t)[:510] + ['[SEP]'], valid['tweet']))
indexed_tokens_val = list(map(tokenizer.convert_tokens_to_ids, tokenized_df_val))
index_padded_val = np.array([xi+[0]*(totalpadlength-len(xi)) for xi in indexed_tokens_val])
target_variable_val = valid['label'].values
target_variable_val = (target_variable_val=='real').astype(int)
mask_variable_val = [[float(i>0) for i in ii] for ii in index_padded_val]
 
BATCH_SIZE = 16
def format_tensors(text_data, mask, labels, batch_size):
    X = torch.from_numpy(text_data)
    X = X.long()
    mask = torch.tensor(mask)
    y = torch.from_numpy(labels)
    y = y.long()
    tensordata = data_utils.TensorDataset(X, mask, y)
    loader = data_utils.DataLoader(tensordata, batch_size=batch_size, shuffle=False)
    return loader
 
X_train, X_test, y_train, y_test = train_test_split(index_padded_val, target_variable_val, 
                                                    test_size=0.99, random_state=42)
 
train_masks, test_masks, _, _ = train_test_split(mask_variable_val, index_padded_val, 
                                                       test_size=0.99, random_state=42)
print(y_train)
trainloader = format_tensors(X_train, train_masks, y_train,BATCH_SIZE)
testloader = format_tensors(X_test, test_masks, y_test, BATCH_SIZE)
 
# print (testloader)
#print(valid)
# print (X_test.shape)

[0 1 1 1 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 1 1]


In [25]:
with torch.set_grad_enabled(False):
  print(f'\n\nTest Accuracy:'
  f'{compute_accuracy(model, testloader, device):.2f}%')

0it [00:00, ?it/s]
100%|██████████| 133/133 [00:37<00:00,  3.52it/s]




Test Accuracy:95.52%


#### We then do some error analysis by gathering the articles that were incorrectly predicted and analyzing the text of the articles.

In [26]:
test_predictions = torch.zeros((len(y_test),1))
test_predictions_percent = torch.zeros((len(y_test),1))
with torch.no_grad():
  for i, batch in enumerate(tqdm(testloader)):
    token_ids, masks, labels = tuple(t.to(device) for t in batch)
    _, yhat = model(input_ids=token_ids, attention_mask=masks, labels=labels)
    prediction = (torch.sigmoid(yhat[:,1]) > 0.5).long().view(-1,1)
    test_predictions[i*BATCH_SIZE:(i+1)*BATCH_SIZE] = prediction
    test_predictions_percent[i*BATCH_SIZE:(i+1)*BATCH_SIZE] = torch.sigmoid(yhat[:,1]).view(-1,1)

100%|██████████| 133/133 [00:38<00:00,  3.48it/s]


In [27]:
X_train_words, X_test_words, y_train_words, y_test_words = train_test_split(valid['tweet'], target_variable_val, 
                                                    test_size=0.99, random_state=42)

In [28]:
final_results = X_test_words.to_frame().reset_index(drop=True)
final_results['predicted'] = np.array(test_predictions.reshape(-1), dtype=int).tolist()
final_results['percent'] = np.array(test_predictions_percent.reshape(-1), dtype=float).tolist()
final_results['actual'] = y_test_words
wrong_results = final_results.loc[final_results['predicted']!=final_results['actual']].copy()


In [29]:
print("No. of test examples : ", X_test.shape[0])
print('Number of incorrectly classified articles:', len(wrong_results))

No. of test examples :  2119
Number of incorrectly classified articles: 95


#### This displays the incorrectly predicted articles along with the percent confidence the algorithm had in each instance. The threshold for classification is 50%. Instances closer to 100% are more confident it's real news and instances closer to 0% are more confident it's fake news.

In [30]:
wrong_results.loc[:,'text_short'] = wrong_results.loc[:,'tweet'].apply(lambda x: x[:500])
wrong_results.loc[:,('text_short', 'percent','predicted','actual')].style.set_properties(subset=['text_short'], **{'width': '1000px', 'white-space':'pre-wrap'})

Unnamed: 0,text_short,percent,predicted,actual
24,Coronavirus testing in the UK has been cobbled together by a government that doesnt really understand the science behind it all says profkarolsikora,0.3434,0,1
27,33 at the start was reasonably necessary But its much more difficult now the huge numbers of coronavirus cases have reduced significantly and theres little community transmission to argue you still need those borders shut Quote source,0.958168,1,0
40,Noel Gallagher says he refuses to wear a face mask while shopping as he goes against laws brought in during the pandemic to curb the spread of coronavirus,0.076028,0,1
142,WHO reports record daily increase in global coronavirus cases up over 292000,0.784781,1,0
147,BREAKING Families of NHS workers who die of covid19 to be offered 60k compensation or a couple of thousand sets of PPE that well have by then,0.914779,1,0
166,Whether countries with high rates of childhood vaccines were hit hardest by coronavirus,0.628757,1,0
183,ThePollDude ktwopines I always adhere to science The FDA stated that it was no longer reasonable to believe that hydroxychloroquine and chloroquine were effective in treating COVID19 and revoked their Emergency Use Authorization for these medications 1,0.821028,1,0
197,ZuckOff TakeItFromADoctor and a video with false claims on mask wearing and hydroxychloroquine,0.046024,0,1
209,If we stopped testing right now wed have very few cases if any,0.5778,1,0
231,Q How do I clean fruits and vegetables during COVID19 A Do NOT wash produce with soap disinfectant or any other chemical Rinse fruits and vegetables under cold running tap water Scrub uncut firm produce with a clean brush,0.296824,0,1
