In [1]:
#importing libraries
import pandas as pd
import numpy as np
import os
#from google.colab import drive
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
!pip install datasets
from datasets import Dataset, DatasetDict
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from transformers import EvalPrediction

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [2]:
file_path = "data/EN/subtask-2-annotations.txt"
df = pd.read_csv(file_path, sep="\t", header=None, names=["Document_ID", "High_Level_Narratives", "Sub_Narratives"])

In [3]:
df

Unnamed: 0,Document_ID,High_Level_Narratives,Sub_Narratives
0,EN_CC_100013.txt,CC: Criticism of climate movement,CC: Criticism of climate movement: Ad hominem ...
1,EN_UA_300009.txt,Other,Other
2,EN_UA_300017.txt,Other,Other
3,EN_CC_100021.txt,Other,Other
4,EN_UA_300041.txt,Other,Other
...,...,...,...
394,EN_CC_200022.txt,CC: Criticism of institutions and authorities;...,CC: Criticism of institutions and authorities:...
395,EN_CC_100028.txt,Other,Other
396,EN_CC_300010.txt,CC: Amplifying Climate Fears,CC: Amplifying Climate Fears: Other
397,EN_UA_013257.txt,URW: Russia is the Victim;URW: Blaming the war...,URW: Russia is the Victim: Russia actions in U...


In [4]:
# Group by Document_ID and create lists of narratives
df = df.groupby("Document_ID").agg({
    "High_Level_Narratives": lambda x: list(set(";".join(x).split(";"))),
    "Sub_Narratives": lambda x: list(set(";".join(x).split(";")))
}).reset_index()

df.rename(columns={
    "High_Level_Narratives": "High_Level_Narratives_List",
    "Sub_Narratives": "Sub_Narratives_List"
}, inplace=True)


In [5]:
#drive.mount('/content/drive')
documents_folder = 'data/EN/raw-documents'


def load_documents(folder_path):
    documents = {}
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):  # Ensure only text files are read
            doc_id = filename  # Extract Document_ID from filename
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                documents[doc_id] = file.read().strip()
    return documents

# Load document texts into a dictionary
document_texts = load_documents(documents_folder)

# Map document texts to the DataFrame
df['Text'] = df['Document_ID'].map(document_texts)

# Check if all documents are successfully mapped
print(f"Number of missing documents: {df['Text'].isnull().sum()}")
df


Mounted at /content/drive
Number of missing documents: 0


Unnamed: 0,Document_ID,High_Level_Narratives_List,Sub_Narratives_List,Text
0,EN_CC_100000.txt,[CC: Hidden plots by secret schemes of powerfu...,[CC: Hidden plots by secret schemes of powerfu...,Pentagon plans to serve LAB-GROWN MEAT to troo...
1,EN_CC_100002.txt,[CC: Hidden plots by secret schemes of powerfu...,[CC: Hidden plots by secret schemes of powerfu...,Oxford Residents Mount Resistance Against the ...
2,EN_CC_100003.txt,[CC: Criticism of institutions and authorities...,[CC: Criticism of institutions and authorities...,"Fonda Heads To Canada For Oil Sands Protest, M..."
3,EN_CC_100004.txt,[CC: Criticism of institutions and authorities...,[CC: Controversy about green technologies: Ren...,A Tesla Owner Just Exposed A Sick Secret About...
4,EN_CC_100005.txt,[CC: Criticism of climate movement],"[CC: Criticism of climate movement: Other, CC:...",Climate Crazies Fail in Attempt to Vandalize A...
...,...,...,...,...
394,EN_UA_DEV_100028.txt,[URW: Negative Consequences for the West],[URW: Negative Consequences for the West: Othe...,European gas prices surge 20% as Russia's late...
395,EN_UA_DEV_216.txt,"[URW: Negative Consequences for the West, URW:...",[URW: Negative Consequences for the West: Sanc...,"EU 'biggest loser' in Ukraine conflicts, Hunga..."
396,EN_UA_DEV_23.txt,"[URW: Praise of Russia, URW: Distrust towards ...",[URW: Praise of Russia: Praise of Russian mili...,What is the current trajectory of the evil emp...
397,EN_UA_DEV_24.txt,"[URW: Negative Consequences for the West, URW:...",[URW: Negative Consequences for the West: Sanc...,Europe ‘Shot Itself in the Lungs’ With Sanctio...


In [6]:
urw_narratives = [
    "URW: Amplifying war-related fears",
    "URW: Blaming the war on others rather than the invader",
    "URW: Discrediting Ukraine",
    "URW: Discrediting the West, Diplomacy",
    "URW: Distrust towards Media",
    "URW: Hidden plots by secret schemes of powerful groups",
    "URW: Negative Consequences for the West",
    "URW: Overpraising the West",
    "URW: Praise of Russia",
    "URW: Russia is the Victim",
    "URW: Speculating war outcomes",
]

In [7]:
df

Unnamed: 0,Document_ID,High_Level_Narratives_List,Sub_Narratives_List,Text
0,EN_CC_100000.txt,[CC: Hidden plots by secret schemes of powerfu...,[CC: Hidden plots by secret schemes of powerfu...,Pentagon plans to serve LAB-GROWN MEAT to troo...
1,EN_CC_100002.txt,[CC: Hidden plots by secret schemes of powerfu...,[CC: Hidden plots by secret schemes of powerfu...,Oxford Residents Mount Resistance Against the ...
2,EN_CC_100003.txt,[CC: Criticism of institutions and authorities...,[CC: Criticism of institutions and authorities...,"Fonda Heads To Canada For Oil Sands Protest, M..."
3,EN_CC_100004.txt,[CC: Criticism of institutions and authorities...,[CC: Controversy about green technologies: Ren...,A Tesla Owner Just Exposed A Sick Secret About...
4,EN_CC_100005.txt,[CC: Criticism of climate movement],"[CC: Criticism of climate movement: Other, CC:...",Climate Crazies Fail in Attempt to Vandalize A...
...,...,...,...,...
394,EN_UA_DEV_100028.txt,[URW: Negative Consequences for the West],[URW: Negative Consequences for the West: Othe...,European gas prices surge 20% as Russia's late...
395,EN_UA_DEV_216.txt,"[URW: Negative Consequences for the West, URW:...",[URW: Negative Consequences for the West: Sanc...,"EU 'biggest loser' in Ukraine conflicts, Hunga..."
396,EN_UA_DEV_23.txt,"[URW: Praise of Russia, URW: Distrust towards ...",[URW: Praise of Russia: Praise of Russian mili...,What is the current trajectory of the evil emp...
397,EN_UA_DEV_24.txt,"[URW: Negative Consequences for the West, URW:...",[URW: Negative Consequences for the West: Sanc...,Europe ‘Shot Itself in the Lungs’ With Sanctio...


In [8]:
df = df[df["High_Level_Narratives_List"].apply(lambda narratives: any("URW:" in narrative for narrative in narratives))]


In [9]:
def preprocess_multi_label(data, narratives):
    label_vectors = []
    for narratives_list in data['High_Level_Narratives_List']:
        vector = [1 if narrative in narratives_list else 0 for narrative in narratives]
        label_vectors.append(vector)
    data['Labels'] = label_vectors
    return data

In [10]:
df

Unnamed: 0,Document_ID,High_Level_Narratives_List,Sub_Narratives_List,Text
179,EN_UA_000923.txt,"[URW: Speculating war outcomes, URW: Discredit...","[URW: Discrediting the West, Diplomacy: The EU...",Boris Johnson demands Putin ‘steps back from t...
180,EN_UA_001032.txt,"[URW: Discrediting the West, Diplomacy]","[URW: Discrediting the West, Diplomacy: Diplom...",Russia-Ukraine war map: Where are Russian troo...
181,EN_UA_001052.txt,[URW: Blaming the war on others rather than th...,[URW: Blaming the war on others rather than th...,NATO ‘Cautiously Optimistic’ Amid Reports of R...
184,EN_UA_002668.txt,"[URW: Speculating war outcomes, URW: Amplifyin...",[URW: Speculating war outcomes: Russian army i...,Putin may ABANDON siege of Kyiv and try to bli...
186,EN_UA_003579.txt,[URW: Negative Consequences for the West],[URW: Negative Consequences for the West: Sanc...,International agencies call for urgent aid to ...
...,...,...,...,...
394,EN_UA_DEV_100028.txt,[URW: Negative Consequences for the West],[URW: Negative Consequences for the West: Othe...,European gas prices surge 20% as Russia's late...
395,EN_UA_DEV_216.txt,"[URW: Negative Consequences for the West, URW:...",[URW: Negative Consequences for the West: Sanc...,"EU 'biggest loser' in Ukraine conflicts, Hunga..."
396,EN_UA_DEV_23.txt,"[URW: Praise of Russia, URW: Distrust towards ...",[URW: Praise of Russia: Praise of Russian mili...,What is the current trajectory of the evil emp...
397,EN_UA_DEV_24.txt,"[URW: Negative Consequences for the West, URW:...",[URW: Negative Consequences for the West: Sanc...,Europe ‘Shot Itself in the Lungs’ With Sanctio...


In [11]:
df = preprocess_multi_label(df, urw_narratives)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Labels'] = label_vectors


In [12]:
def calculate_class_weights(labels):
    label_sums = np.sum(labels, axis=0)
    total_samples = len(labels)
    class_weights = [total_samples / (len(label_sums) * count) for count in label_sums]
    return torch.tensor(class_weights, dtype=torch.float)

In [13]:
labels = np.array(df['Labels'].tolist(), dtype=np.float32)
class_weights = calculate_class_weights(labels)

In [14]:
def handle_rare_classes(data, labels, min_samples=15):
    label_sums = np.sum(labels, axis=0)
    rare_classes = np.where(label_sums < min_samples)[0]

    for rare_class in rare_classes:
        rare_indices = [i for i, lbl in enumerate(labels) if lbl[rare_class] == 1]
        if len(rare_indices) > 0:
            duplicate_data = data.iloc[rare_indices]
            data = pd.concat([data] + [duplicate_data] * (min_samples - len(rare_indices)), ignore_index=True)
            labels = np.vstack([labels] + [labels[rare_indices]] * (min_samples - len(rare_indices)))
    return data, labels


In [15]:
df, labels = handle_rare_classes(df, labels)

In [16]:
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['Text'], labels, test_size=0.2, random_state=42)


In [17]:
train_dataset = Dataset.from_dict({"text": train_texts.tolist(), "labels": train_labels.tolist()})
test_dataset = Dataset.from_dict({"text": test_texts.tolist(), "labels": test_labels.tolist()})
datasets = DatasetDict({"train": train_dataset, "test": test_dataset})

In [18]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
base_model = AutoModelForSequenceClassification.from_pretrained(
    "roberta-base", num_labels=labels.shape[1], problem_type="multi_label_classification"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
from torch.nn import BCEWithLogitsLoss
import torch.nn as nn

class CustomLossRobertaModel(nn.Module):
    def __init__(self, model, class_weights):
        super(CustomLossRobertaModel, self).__init__()
        self.model = model
        self.loss_fn = BCEWithLogitsLoss(pos_weight=class_weights)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits  # Extract logits
        loss = None
        if labels is not None:
            # Compute the loss if labels are provided
            loss = self.loss_fn(logits, labels.float())
        return {"loss": loss, "logits": logits}


In [21]:
model = CustomLossRobertaModel(base_model, class_weights)

In [22]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

# Tokenize the dataset
tokenized_datasets = datasets.map(tokenize_function, batched=True)


Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Map:   0%|          | 0/43 [00:00<?, ? examples/s]

In [23]:
from sklearn.metrics import precision_score, recall_score, f1_score
from transformers import EvalPrediction

def compute_metrics(p: EvalPrediction):
    # Apply sigmoid activation to model predictions (logits)
    sigmoid_preds = torch.sigmoid(torch.tensor(p.predictions)).numpy()

    # Convert probabilities to binary predictions using a threshold of 0.5
    preds = (sigmoid_preds > 0.5).astype(int)

    # Ground-truth labels
    labels = p.label_ids

    # Calculate weighted precision, recall, and F1-score
    precision = precision_score(labels, preds, average="weighted", zero_division=0)
    recall = recall_score(labels, preds, average="weighted", zero_division=0)
    f1 = f1_score(labels, preds, average="weighted", zero_division=0)

    # Return the computed metrics
    return {"precision": precision, "recall": recall, "f1": f1}


In [24]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [25]:

training_args = TrainingArguments(
    output_dir='./results',                    # Directory to store checkpoints and final model
    num_train_epochs=50,                       # Total number of training epochs
    learning_rate=2e-5,                        # Fine-tuning learning rate
    per_device_train_batch_size=4,             # Batch size per device during training
    per_device_eval_batch_size=4,              # Batch size for evaluation
    evaluation_strategy='epoch',               # Evaluate at the end of each epoch
    save_strategy='epoch',                     # Save model at the end of each epoch
    load_best_model_at_end=True,               # Load the best model at the end of training
    metric_for_best_model='f1',                # Use F1 score to evaluate the best model
    greater_is_better=True,                    # Higher F1 is better
    logging_dir='./logs',                      # Directory for storing logs
    logging_steps=100,                         # Log every 100 steps
    save_total_limit=3,                        # Limit the total amount of checkpoints
    seed=42,
    weight_decay=0.01,                         # Seed for reproducibility
    fp16=True
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [26]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [27]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,No log,0.340683,0.0,0.0,0.0
2,No log,0.29463,0.052805,0.059406,0.055911
3,0.361700,0.249219,0.218492,0.227723,0.222332
4,0.361700,0.234777,0.339289,0.287129,0.282063
5,0.245100,0.211349,0.360946,0.287129,0.294607
6,0.245100,0.199099,0.778322,0.366337,0.412672
7,0.182400,0.190071,0.915402,0.435644,0.508459
8,0.182400,0.183673,0.827827,0.415842,0.472207
9,0.182400,0.171326,0.843234,0.485149,0.561479
10,0.135600,0.166743,0.94758,0.554455,0.647611


TrainOutput(global_step=2150, training_loss=0.07741987289384354, metrics={'train_runtime': 1177.7053, 'train_samples_per_second': 7.26, 'train_steps_per_second': 1.826, 'total_flos': 0.0, 'train_loss': 0.07741987289384354, 'epoch': 50.0})

In [28]:
def predict_with_threshold(trainer, dataset, threshold=0.7):
    predictions = trainer.predict(dataset)
    logits = predictions.predictions
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.tensor(logits))
    return (probs > threshold).int()

# Predict and return results
results = predict_with_threshold(trainer, tokenized_datasets["test"])
print("Predicted Labels:", results)

Predicted Labels: tensor([[0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
        [1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0],
        [1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0],
        [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [1, 0, 0, 1, 0, 0, 0, 1, 0, 

In [46]:
# Save the tokenizer and base model
base_model.save_pretrained('./saved_model')  # Save the underlying base model
tokenizer.save_pretrained('./saved_model')   # Save the tokenizer
torch.save(class_weights, './saved_model/class_weights.pt')  # Save class weights


In [47]:
import os
print(os.listdir('./saved_model'))


['training_args.bin', 'config.json', 'tokenizer_config.json', 'model.safetensors', 'merges.txt', 'vocab.json', 'special_tokens_map.json', 'class_weights.pt', 'tokenizer.json']


In [48]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('./saved_model')

# Load base model
base_model = AutoModelForSequenceClassification.from_pretrained('./saved_model')

# Load class weights
class_weights_tensor = torch.load('./saved_model/class_weights.pt')

# Reconstruct the custom model
model = CustomLossRobertaModel(base_model, class_weights_tensor)

# Set the model to evaluation mode
model.eval()
print("Custom model loaded successfully.")


Custom model loaded successfully.


  class_weights_tensor = torch.load('./saved_model/class_weights.pt')


In [54]:
def predict_high_level_narratives(text, model, tokenizer, narratives, threshold=0.5):
    # Tokenize the input text
    inputs = tokenizer(text, padding="max_length", truncation=True, max_length=512, return_tensors="pt")

    # Get model predictions
    with torch.no_grad():  # Disable gradient computation for inference
        outputs = model(**inputs)
        logits = outputs["logits"]  # Access logits from the dictionary
        probs = torch.sigmoid(logits).squeeze().numpy()  # Apply sigmoid to get probabilities

    # Apply threshold to convert probabilities to binary predictions
    predictions = (probs > threshold).astype(int)

    # Map binary predictions to high-level narratives
    predicted_narratives = [narratives[i] for i, pred in enumerate(predictions) if pred == 1]

    return predicted_narratives


In [55]:
test_data = pd.DataFrame({
    "text": test_dataset["text"],
    "labels": test_dataset["labels"]
})


In [56]:
test_data.head()

Unnamed: 0,text,labels
0,The Downing of Malaysian Airlines MH17: The Qu...,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
1,"""Russia does not feel threatened"": Estonian Pr...","[1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, ..."
2,"At Last, a Credible Explanation of How Kakhovk...","[1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, ..."
3,"Zelensky Seeks To Cancel Russia At UN, Asks Wh...","[1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
4,World policy-makers agree on uncompromising co...,"[0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, ..."


In [52]:
text = """ The Downing of Malaysian Airlines MH17: The Quest for Truth and Justice. Review of the Evidence by Prof Michel Chossudovsky, Global Research: Eight years ago, on 17 July 2014, Malaysia Airlines Flight 17 (MH17) was shot down in Eastern Ukraine. In recent development, A Netherlands Court on November 17, 2022 found the Kremlin responsible for the alleged attack: two Russians and a separatist Ukrainian” were identified as “guilty of mass murder for their involvement in the downing of Malaysia Airlines Flight 17”. The media has highlighted the downing of MH17 in the context of the war in Ukraine. The Court ruling is fraudulent, based on fabricated evidence. TRUTH LIVES on at https://sgtreport.tv/ *** This background text was prepared in the context of the Kuala Lumpur MH17 Conference entitled MH17: The Quest for Justice, organized by JUST, the PGPF and the CRG in August 2019. This comprehensive report provides detailed evidence that Russia was not involved in the downing of MH17. It also confirms the insidious role of Ukraine’s intelligence agency. **** The underlying objective is to examine the evidence, reveal the truth and uphold the rule of law. Today, July 29th 2021, our thoughts are with the families of the victims of the Malaysian Airlines MH17 tragedy. The 2019 Conference was dedicated to the memory of the victims. It is also a national tragedy for the people of Malaysia. The downing of MH17 with 283 passengers and 15 crew on board, took place barely a few months following the mysterious disappearance of Malaysian Airlines flight MH370 after departing on March 8, 2014 from Kuala Lumpur for Beijing, with 227 passengers and 12 crew members on board. It is worth recalling that immediately after the MH17 plane crash on July 17 2014, prior to the conduct of a preliminary investigation, Secretary of State John Kerry and US Ambassador to the UN Samantha Power pointed their finger at Moscow without a shred of evidence. In turn, the allegations directed against Russia were used to justify the imposition of sweeping economic sanctions against the Russian Federation. According to President Obama (hours after the tragedy): “… the downing of MH17 should be “a wake-up call” to Europe to get serious about confronting Russia over Ukraine after EU leaders have proved reluctant to impose tought sanctions.” (Telegraph, July 18,2019) The Wall Street Journal reports (July 18, 2014) that “Obama is getting his wish and Brussels is now weighing new sanctions”: European governments, jolted by the downing of a passenger plane over eastern Ukraine that killed nearly 300 people, are contemplating a major expansion of sanctions on Russia as early as next week. European Union leaders decided in recent days to expand the penalties to a broad new category of people and companies. But the apparent shooting down of a plane carrying more than 200 EU citizens has intensified a desire to act quickly and forcefully, including sanctions against oligarchs with ties to the Kremlin. In Brussels, some diplomats described the incident as a game-changer. “It would have major consequences if it was certain it came from the rebels— major consequences,” said one official. (WSJ, July 18, 2014) On July 22, 2014, The European Union decided to expand its sanctions blacklist against Moscow including Vladimir Putin’s inner circle. EU foreign ministers decided to “draw up further broad measures including an arms embargo and financial restrictions on Russian businesses, … following the downing of Malaysia Airlines Flight MH17.” (Guardian, July 22, 2019) Michel Chossudovsky, August 2019, updated July 17, 2020, July 16, 2022 """

In [57]:
predicted_narratives = predict_high_level_narratives(
    text=text,
    model=model,
    tokenizer=tokenizer,
    narratives=urw_narratives,
    threshold=0.5  # Adjust as needed
)
print("Predicted High-Level Narratives:", predicted_narratives)

Predicted High-Level Narratives: ['URW: Distrust towards Media', 'URW: Russia is the Victim']
