In [1]:
!pip install transformers datasets -q

In [23]:
import pandas as pd
import re
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np

In [3]:
# Load data
df = pd.read_csv('/content/CL-II-MisinformationData - Sheet1.csv')
print("Original dataset shape:", df.shape)
df.head()

Original dataset shape: (10600, 2)


Unnamed: 0,tweet,label
0,The CDC currently reports 99031 deaths. In gen...,real
1,States reported 1121 deaths a small rise from ...,real
2,Politically Correct Woman (Almost) Uses Pandem...,fake
3,#IndiaFightsCorona: We have 1524 #COVID testin...,real
4,Populous states can generate large case counts...,real


In [4]:
# Train/Val/Test Split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))
print("Test size:", len(test_df))

Train size: 7632
Validation size: 848
Test size: 2120


In [5]:
def preprocess_text(text):
    def remove_emojis(text):
        return re.sub(r'[^\x00-\x7F]+', '', text)
    def replace_urls(text):
        return re.sub(r'http\S+|www\S+', '<url>', text)
    def replace_mentions(text):
        return re.sub(r'@\w+', '<user>', text)
    def replace_hashtags(text):
        return re.sub(r'#\w+', lambda match: match.group(0)[1:], text)
    def remove_special_characters(text):
        text = re.sub(r'[^A-Za-z0-9\s<>#@url]', '', text)
        return text
    text = text.lower()

    text = remove_emojis(text)
    text = replace_urls(text)
    text = replace_mentions(text)
    text = replace_hashtags(text)
    text = remove_special_characters(text)

    return text.strip()
  # Apply preprocessing
train_df['tweet'] = train_df['tweet'].apply(preprocess_text)
val_df['tweet']   = val_df['tweet'].apply(preprocess_text)
test_df['tweet']  = test_df['tweet'].apply(preprocess_text)

# Check preprocessing results
print(train_df.head())

                                                  tweet label
821   olympics postponement has messed up drug cheat...  fake
7870  new autopsy reports suggest jeffrey epstein mo...  fake
9094  <user> kia ora the auckland region is at alert...  real
1911  coronavirusupdate indiafightscorona india cont...  real
3813      president of sri lanka has contracted covid19  fake


In [6]:
# Label encoding: Map "fake" to 0 and "real" to 1
label2id = {"fake": 0, "real": 1}
id2label = {0: "fake", 1: "real"}
train_df['label'] = train_df['label'].map(label2id)
val_df['label']   = val_df['label'].map(label2id)
test_df['label']  = test_df['label'].map(label2id)

In [7]:
# Convert pandas DataFrames to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset   = Dataset.from_pandas(val_df)
test_dataset  = Dataset.from_pandas(test_df)
print(train_dataset)

Dataset({
    features: ['tweet', 'label', '__index_level_0__'],
    num_rows: 7632
})


In [16]:
def compute_metrics(p):
    preds = p.predictions.argmax(-1)  # Convert logits to class predictions
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

In [10]:
# 1. Model: BERT-base-uncased
bert_model_name = "bert-base-uncased"
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
bert_model = AutoModelForSequenceClassification.from_pretrained(
    bert_model_name,
    num_labels=2,
    label2id=label2id,
    id2label=id2label
)

def tokenize_function_bert(examples):
    return bert_tokenizer(examples["tweet"], padding="max_length", truncation=True, max_length=128)

tokenized_train_bert = train_dataset.map(tokenize_function_bert, batched=True)
tokenized_val_bert   = val_dataset.map(tokenize_function_bert, batched=True)
tokenized_test_bert  = test_dataset.map(tokenize_function_bert, batched=True)

tokenized_train_bert.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_val_bert.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_test_bert.set_format("torch", columns=["input_ids", "attention_mask", "label"])

training_args_bert = TrainingArguments(
    output_dir="./results-bert-base-uncased",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs_bert",
    logging_steps=50,
    run_name="bert_fake_news_run",
    fp16=True,
    disable_tqdm=False
)

trainer_bert = Trainer(
    model=bert_model,
    args=training_args_bert,
    train_dataset=tokenized_train_bert,
    eval_dataset=tokenized_val_bert,
    compute_metrics=compute_metrics,
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7632 [00:00<?, ? examples/s]

Map:   0%|          | 0/848 [00:00<?, ? examples/s]

Map:   0%|          | 0/2120 [00:00<?, ? examples/s]



In [11]:
# 2. Model: SocBERT-base
socbert_model_id = "sarkerlab/SocBERT-base"
socbert_tokenizer = AutoTokenizer.from_pretrained(socbert_model_id)
socbert_model = AutoModelForSequenceClassification.from_pretrained(
    socbert_model_id,
    num_labels=2,
    label2id=label2id,
    id2label=id2label
)

def tokenize_function_socbert(examples):
    return socbert_tokenizer(examples["tweet"], padding="max_length", truncation=True, max_length=128)

tokenized_train_socbert = train_dataset.map(tokenize_function_socbert, batched=True)
tokenized_val_socbert   = val_dataset.map(tokenize_function_socbert, batched=True)
tokenized_test_socbert  = test_dataset.map(tokenize_function_socbert, batched=True)

tokenized_train_socbert.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_val_socbert.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_test_socbert.set_format("torch", columns=["input_ids", "attention_mask", "label"])

training_args_socbert = TrainingArguments(
    output_dir="./results-socbert-base",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs_socbert",
    logging_steps=50,
    run_name="socbert_fake_news_run",
    fp16=True,
    disable_tqdm=False
)

trainer_socbert = Trainer(
    model=socbert_model,
    args=training_args_socbert,
    train_dataset=tokenized_train_socbert,
    eval_dataset=tokenized_val_socbert,
    compute_metrics=compute_metrics,
)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at sarkerlab/SocBERT-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7632 [00:00<?, ? examples/s]

Map:   0%|          | 0/848 [00:00<?, ? examples/s]

Map:   0%|          | 0/2120 [00:00<?, ? examples/s]



In [12]:
# 3. Model: Twitter/TWHIN-BERT-Base
model3_name = "twitter/twhin-bert-base"  # Confirm exact model name on Hugging Face
model3_tokenizer = AutoTokenizer.from_pretrained(model3_name)
model3_model = AutoModelForSequenceClassification.from_pretrained(
    model3_name,
    num_labels=2,
    label2id=label2id,
    id2label=id2label
)

def tokenize_function_model3(examples):
    return model3_tokenizer(examples["tweet"], padding="max_length", truncation=True, max_length=128)

tokenized_train_model3 = train_dataset.map(tokenize_function_model3, batched=True)
tokenized_val_model3   = val_dataset.map(tokenize_function_model3, batched=True)
tokenized_test_model3  = test_dataset.map(tokenize_function_model3, batched=True)

tokenized_train_model3.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_val_model3.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_test_model3.set_format("torch", columns=["input_ids", "attention_mask", "label"])

training_args_model3 = TrainingArguments(
    output_dir="./results-twitter-twhin-bert-base",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs_model3",
    logging_steps=50,
    run_name="twitter_twhin_bert_run",
    fp16=True,
    disable_tqdm=False
)

trainer_model3 = Trainer(
    model=model3_model,
    args=training_args_model3,
    train_dataset=tokenized_train_model3,
    eval_dataset=tokenized_val_model3,
    compute_metrics=compute_metrics,
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at twitter/twhin-bert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7632 [00:00<?, ? examples/s]

Map:   0%|          | 0/848 [00:00<?, ? examples/s]

Map:   0%|          | 0/2120 [00:00<?, ? examples/s]



In [13]:
# 4. Model: COVID-Twitter-BERT
covid_twitter_model_id = "digitalepidemiologylab/covid-twitter-bert"  # Example model ID
covid_twitter_tokenizer = AutoTokenizer.from_pretrained(covid_twitter_model_id)
covid_twitter_model = AutoModelForSequenceClassification.from_pretrained(
    covid_twitter_model_id,
    num_labels=2,
    label2id=label2id,
    id2label=id2label
)

def tokenize_function_covid_twitter(examples):
    return covid_twitter_tokenizer(examples["tweet"], padding="max_length", truncation=True, max_length=128)

tokenized_train_covid_twitter = train_dataset.map(tokenize_function_covid_twitter, batched=True)
tokenized_val_covid_twitter   = val_dataset.map(tokenize_function_covid_twitter, batched=True)
tokenized_test_covid_twitter  = test_dataset.map(tokenize_function_covid_twitter, batched=True)

tokenized_train_covid_twitter.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_val_covid_twitter.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_test_covid_twitter.set_format("torch", columns=["input_ids", "attention_mask", "label"])

training_args_covid_twitter = TrainingArguments(
    output_dir="./results-covid-twitter-bert",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs_covid_twitter",
    logging_steps=50,
    run_name="covid_twitter_bert_run",
    fp16=True,
    disable_tqdm=False
)

trainer_covid_twitter = Trainer(
    model=covid_twitter_model,
    args=training_args_covid_twitter,
    train_dataset=tokenized_train_covid_twitter,
    eval_dataset=tokenized_val_covid_twitter,
    compute_metrics=compute_metrics,
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at digitalepidemiologylab/covid-twitter-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7632 [00:00<?, ? examples/s]

Map:   0%|          | 0/848 [00:00<?, ? examples/s]

Map:   0%|          | 0/2120 [00:00<?, ? examples/s]



In [17]:
# Train each model (uncomment if you want to train; be aware of runtime)
trainer_bert.train()
trainer_socbert.train()
trainer_model3.train()
trainer_covid_twitter.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.0898,0.204051,0.957547,0.957818,0.957547,0.95755
2,0.028,0.195484,0.964623,0.964707,0.964623,0.964617
3,0.0159,0.214988,0.962264,0.962348,0.962264,0.962258


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.1567,0.21523,0.942217,0.94453,0.942217,0.942105
2,0.0517,0.259209,0.949292,0.950358,0.949292,0.949241
3,0.0377,0.249176,0.956368,0.956776,0.956368,0.956347




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.255,0.169328,0.955189,0.955269,0.955189,0.955181
2,0.0723,0.157055,0.966981,0.966981,0.966981,0.966981
3,0.0817,0.186571,0.966981,0.966989,0.966981,0.96698


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2013,0.134387,0.962264,0.962618,0.962264,0.962248
2,0.0239,0.152616,0.975236,0.975297,0.975236,0.975233
3,0.0098,0.198191,0.975236,0.975239,0.975236,0.975236


TrainOutput(global_step=2862, training_loss=0.0868607830597253, metrics={'train_runtime': 939.7949, 'train_samples_per_second': 24.363, 'train_steps_per_second': 3.045, 'total_flos': 5334375123689472.0, 'train_loss': 0.0868607830597253, 'epoch': 3.0})

In [18]:
# Evaluate on validation sets
eval_metrics_bert = trainer_bert.evaluate(eval_dataset=tokenized_val_bert)
eval_metrics_socbert = trainer_socbert.evaluate(eval_dataset=tokenized_val_socbert)
eval_metrics_model3 = trainer_model3.evaluate(eval_dataset=tokenized_val_model3)
eval_metrics_covid_twitter = trainer_covid_twitter.evaluate(eval_dataset=tokenized_val_covid_twitter)

df_metrics_bert = pd.DataFrame.from_dict(eval_metrics_bert, orient="index", columns=["BERT Metrics"])
df_metrics_socbert = pd.DataFrame.from_dict(eval_metrics_socbert, orient="index", columns=["SocBERT Metrics"])
df_metrics_model3 = pd.DataFrame.from_dict(eval_metrics_model3, orient="index", columns=["Twitter TWHIN BERT Metrics"])
df_metrics_covid_twitter = pd.DataFrame.from_dict(eval_metrics_covid_twitter, orient="index", columns=["COVID-Twitter BERT Metrics"])


In [19]:
print("BERT Evaluation Metrics:")
print(df_metrics_bert)
print("\nSocBERT Evaluation Metrics:")
print(df_metrics_socbert)
print("\nTwitter TWHIN BERT Evaluation Metrics:")
print(df_metrics_model3)
print("\nCOVID-Twitter BERT Evaluation Metrics:")
print(df_metrics_covid_twitter)

BERT Evaluation Metrics:
                         BERT Metrics
eval_loss                    0.214988
eval_accuracy                0.962264
eval_precision               0.962348
eval_recall                  0.962264
eval_f1                      0.962258
eval_runtime                 1.767400
eval_samples_per_second    479.795000
eval_steps_per_second       59.974000
epoch                        3.000000

SocBERT Evaluation Metrics:
                         SocBERT Metrics
eval_loss                       0.249176
eval_accuracy                   0.956368
eval_precision                  0.956776
eval_recall                     0.956368
eval_f1                         0.956347
eval_runtime                    1.664500
eval_samples_per_second       509.458000
eval_steps_per_second          63.682000
epoch                           3.000000

Twitter TWHIN BERT Evaluation Metrics:
                         Twitter TWHIN BERT Metrics
eval_loss                                  0.186571
eval_accurac

In [21]:
# 6. Define single-sample prediction functions for each model

def predict_label_bert(text):
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    if torch.cuda.is_available():
        inputs = {k: v.to(torch.device("cuda")) for k, v in inputs.items()}
        bert_model.to(torch.device("cuda"))
    with torch.no_grad():
        outputs = bert_model(**inputs)
    logits = outputs.logits
    predicted_class_id = logits.argmax(dim=-1).item()
    return id2label[predicted_class_id]

def predict_label_socbert(text):
    inputs = socbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    if torch.cuda.is_available():
        inputs = {k: v.to(torch.device("cuda")) for k, v in inputs.items()}
        socbert_model.to(torch.device("cuda"))
    with torch.no_grad():
        outputs = socbert_model(**inputs)
    logits = outputs.logits
    predicted_class_id = logits.argmax(dim=-1).item()
    return id2label[predicted_class_id]

def predict_label_model3(text):
    inputs = model3_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    if torch.cuda.is_available():
        inputs = {k: v.to(torch.device("cuda")) for k, v in inputs.items()}
        model3_model.to(torch.device("cuda"))
    with torch.no_grad():
        outputs = model3_model(**inputs)
    logits = outputs.logits
    predicted_class_id = logits.argmax(dim=-1).item()
    return id2label[predicted_class_id]

def predict_label_covid_twitter(text):
    inputs = covid_twitter_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    if torch.cuda.is_available():
        inputs = {k: v.to(torch.device("cuda")) for k, v in inputs.items()}
        covid_twitter_model.to(torch.device("cuda"))
    with torch.no_grad():
        outputs = covid_twitter_model(**inputs)
    logits = outputs.logits
    predicted_class_id = logits.argmax(dim=-1).item()
    return id2label[predicted_class_id]

sample_text = "Covid vaccines are effective against new variants!"
print("BERT Predicted label:", predict_label_bert(sample_text))
print("SocBERT Predicted label:", predict_label_socbert(sample_text))
print("Twitter TWHIN BERT Predicted label:", predict_label_model3(sample_text))
print("COVID-Twitter BERT Predicted label:", predict_label_covid_twitter(sample_text))


BERT Predicted label: fake
SocBERT Predicted label: fake
Twitter TWHIN BERT Predicted label: fake
COVID-Twitter BERT Predicted label: real


In [24]:
# 7. Run predictions on the entire test dataset for all models
# For BERT
predictions_output_bert = trainer_bert.predict(tokenized_test_bert)
predicted_class_ids_bert = np.argmax(predictions_output_bert.predictions, axis=-1)
predicted_labels_bert = [id2label[class_id] for class_id in predicted_class_ids_bert]

# For SocBERT
predictions_output_socbert = trainer_socbert.predict(tokenized_test_socbert)
predicted_class_ids_socbert = np.argmax(predictions_output_socbert.predictions, axis=-1)
predicted_labels_socbert = [id2label[class_id] for class_id in predicted_class_ids_socbert]

# For Twitter TWHIN BERT
predictions_output_model3 = trainer_model3.predict(tokenized_test_model3)
predicted_class_ids_model3 = np.argmax(predictions_output_model3.predictions, axis=-1)
predicted_labels_model3 = [id2label[class_id] for class_id in predicted_class_ids_model3]

# For COVID-Twitter BERT
predictions_output_covid_twitter = trainer_covid_twitter.predict(tokenized_test_covid_twitter)
predicted_class_ids_covid_twitter = np.argmax(predictions_output_covid_twitter.predictions, axis=-1)
predicted_labels_covid_twitter = [id2label[class_id] for class_id in predicted_class_ids_covid_twitter]


In [25]:
# 8. Add predictions to the original test DataFrame and display

# Remove any extra prediction column if exists
for col in ["predicted_label", "predicted_label_bert", "predicted_label_socbert", "predicted_label_model3", "predicted_label_covid_twitter"]:
    if col in test_df.columns:
        test_df = test_df.drop(col, axis=1)

test_df['predicted_label_bert'] = predicted_labels_bert
test_df['predicted_label_socbert'] = predicted_labels_socbert
test_df['predicted_label_model3'] = predicted_labels_model3
test_df['predicted_label_covid_twitter'] = predicted_labels_covid_twitter

print("Sample Predictions:")
print(test_df[['predicted_label_bert', 'predicted_label_socbert', 'predicted_label_model3', 'predicted_label_covid_twitter']].head())

# Save only the prediction columns to CSV
test_df[['predicted_label_bert', 'predicted_label_socbert', 'predicted_label_model3', 'predicted_label_covid_twitter']].to_csv("test_predictions.csv", index=False)
print("Saved all predictions to 'test_predictions.csv'")

Sample Predictions:
     predicted_label_bert predicted_label_socbert predicted_label_model3  \
5815                 fake                    fake                   fake   
3647                 fake                    fake                   fake   
7709                 real                    real                   real   
9661                 fake                    fake                   fake   
2483                 fake                    fake                   fake   

     predicted_label_covid_twitter  
5815                          fake  
3647                          fake  
7709                          real  
9661                          fake  
2483                          fake  
Saved all predictions to 'test_predictions.csv'


In [26]:
from transformers import AutoTokenizer, AutoModel
import torch

# Define the models to process along with their Hugging Face IDs
model_dict = {
    "BERT-base-uncased": "bert-base-uncased",
    "SocBERT-base": "sarkerlab/SocBERT-base",
    "Twitter TWHIN-BERT": "twitter/twhin-bert-base",
    "COVID-Twitter-BERT": "digitalepidemiologylab/covid-twitter-bert"
}

# Sample text for embedding extraction
text = "Sample text for embedding"

# Dictionary to store outputs for each model
model_outputs = {}

# Iterate over each model in the dictionary
for model_label, model_name in model_dict.items():
    print(f"\nProcessing model: {model_label}")

    # Load tokenizer and model (without classification head)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)

    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt")

    # Get hidden representations (disable gradient computation for inference)
    with torch.no_grad():
        outputs = model(**inputs)
        # Last hidden state shape: (batch_size, sequence_length, hidden_size)
        last_hidden_state = outputs.last_hidden_state

        # Pooling: Mean over the token embeddings to form a sentence-level embedding
        pooled_embedding = torch.mean(last_hidden_state, dim=1)

    # Store the outputs in the dictionary
    model_outputs[model_label] = {
        "last_hidden_state": last_hidden_state,
        "pooled_embedding": pooled_embedding
    }

    # Print the shapes of the outputs for verification
    print(f"  Last hidden state shape: {last_hidden_state.shape}")
    print(f"  Pooled embedding shape (mean pooling): {pooled_embedding.shape}")

# Now, model_outputs contains the hidden representations for each model.



Processing model: BERT-base-uncased
  Last hidden state shape: torch.Size([1, 8, 768])
  Pooled embedding shape (mean pooling): torch.Size([1, 768])

Processing model: SocBERT-base


Some weights of RobertaModel were not initialized from the model checkpoint at sarkerlab/SocBERT-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Last hidden state shape: torch.Size([1, 8, 768])
  Pooled embedding shape (mean pooling): torch.Size([1, 768])

Processing model: Twitter TWHIN-BERT


Some weights of BertModel were not initialized from the model checkpoint at twitter/twhin-bert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Last hidden state shape: torch.Size([1, 9, 768])
  Pooled embedding shape (mean pooling): torch.Size([1, 768])

Processing model: COVID-Twitter-BERT
  Last hidden state shape: torch.Size([1, 8, 1024])
  Pooled embedding shape (mean pooling): torch.Size([1, 1024])
