In [32]:
import pandas as pd
from transformers import BertTokenizer, BertModel, BertForSequenceClassification
import torch
import numpy as np
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
file_path = '/content/drive/My Drive/data.csv'
data = pd.read_csv(file_path)

In [34]:
# Prepare text
col1 = data["Disinfo_cases_en"].astype(str)
col2 = data["Fakes"].astype(str)
text = col1 + " " + col2

In [35]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

In [36]:
def get_bert_embeddings(text):
    if isinstance(text, str):
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
        with torch.no_grad():
            outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[:, 0, :]
        return cls_embedding.squeeze().numpy()
    else:
        return np.nan

In [37]:
embeddings = [get_bert_embeddings(t) for t in text]
data['embedding'] = embeddings

In [38]:
print(data[['Disinfo_cases_en', 'Fakes', 'embedding']])

                                      Disinfo_cases_en  \
0    Kyiv forces continue to bomb the territories o...   
1    After the Maidan putsch in April 2014, Kyiv se...   
2    Because of the Maidan coup and the thousands o...   
3    A civil war has been raging there since a coup...   
4    Of course, Biden knows who initiated the 2014 ...   
..                                                 ...   
618  US intelligence analysts had already concluded...   
619    in Ukraine we have witnessed a real coup d'état   
620  In Sunday's referendum, 96.6 percent of Crimes...   
621              Ukraine, how a 'modern' coup is made    
622   "MADE IN USA" COUP STRATEGY…the US has invest...   

                                                 Fakes  \
0     Kyiv ignores fulfillment ot the Minsk Agreements   
1    Maidan led to the separation of Donetsk and Lu...   
2    Maidan led to the separation of Donetsk and Lu...   
3    Maidan led to the separation of Donetsk and Lu...   
4    Maidan l

In [39]:
model_classification = BertForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
inputs_classification = tokenizer(text.tolist(), return_tensors="pt", truncation=True, padding=True, max_length=128)

with torch.no_grad():
    classification_outputs = model_classification(**inputs_classification)

logits = classification_outputs.logits
probabilities = torch.nn.functional.softmax(logits, dim=-1)

print("Probabilities for each class:", probabilities.numpy())

Probabilities for each class: [[0.47929278 0.24985263 0.17323753 0.06029387 0.03732326]
 [0.2953638  0.2499476  0.22712862 0.13673626 0.0908236 ]
 [0.32371065 0.2905383  0.24154565 0.09388333 0.05032207]
 ...
 [0.3371575  0.3127644  0.2257449  0.08557009 0.03876317]
 [0.44345367 0.2712132  0.16219382 0.06942202 0.05371735]
 [0.58948857 0.22774903 0.10985631 0.04051752 0.03238861]]
