In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification
from transformers import AutoModelForCausalLM
from transformers import pipeline
import torch
from huggingface_hub import notebook_login
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
# 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
notebook_login()

print(torch.cuda.is_available())
device = torch.device("cuda")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

True


In [2]:


# Load Dataset
def load_dataset(file_path):
    data = pd.read_csv(file_path, sep="\t")
    return data

# Stratified Split
def stratified_split(data, label_col, test_size=0.1):
    return train_test_split(
        data, test_size=test_size, stratify=data[label_col], random_state=42
    )

# Fine-tuning Setup
def fine_tune_model(model_name, train_texts, train_labels, num_labels):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = num_labels)
    model.to("cuda")
    train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128)

    class Dataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels

        def __len__(self):
            return len(self.labels)

        def __getitem__(self, idx):
            if idx >= len(self.labels) or idx < 0:
                raise IndexError(f"Index {idx} is out of range for dataset of size {len(self.labels)}")
            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
            item["labels"] = torch.tensor(self.labels[idx])
            return item

    train_dataset = Dataset(train_encodings, train_labels)
    training_args = TrainingArguments(
        output_dir="./results",
        eval_strategy="no",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        logging_dir="./logs",
        save_strategy="no",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        processing_class=tokenizer,
    )

    trainer.train()

    trainer.save_model("./fine_tuned_model")
    return model, tokenizer

# Zero-shot Inference
# def zero_shot_inference(model_name, texts, labels, label_map):
#     classifier = pipeline("zero-shot-classification", model=model_name,
#                           device = "cuda")
#     results = []
#     for text, true_label in zip(texts, labels):
#         preds = classifier(text, candidate_labels=list(label_map.values()))
#         max_index = preds["scores"].index(max(preds["scores"]))
#         predicted_label = list(label_map.keys())[max_index]  # Map back to 0/1
#         results.append(predicted_label)
#     return classification_report(labels, results)



In [14]:

# Main Execution

# Load the dataset
file_path = "orientation-tr-train.tsv"
data = load_dataset(file_path)
data.reset_index(drop=True, inplace=True)
# y = data["label"]

# Drop rows with NaN values in the 'label' column
data = data.dropna(subset=['label'])  # This line added to remove NaN values


train_task1, test_task1 = stratified_split(data, "label")


In [None]:
# Task 1: Ideology Prediction
fine_tuned_model_task1, tokenizer_task1 = fine_tune_model(
    "xlm-roberta-base", train_task1["text_en"].to_list(), train_task1["label"].to_list(), num_labels=2
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
500,0.6019
1000,0.4989
1500,0.4233
2000,0.3767
2500,0.3289


In [17]:
modelPath = "/content/drive/MyDrive/fine_tuned_modelTask1"

model = AutoModelForSequenceClassification.from_pretrained(modelPath, local_files_only=True)

trainer = Trainer(model=model)
trainer.model = model.cuda()
# Evaluate Fine-tuned Model for Task 1
# Convert the 'text_en' column to a list of strings
input_texts = test_task1["text_en"].tolist()
tokenizer = AutoTokenizer.from_pretrained(modelPath, local_files_only=True)
# Tokenize the input texts
input_encodings = tokenizer(
    test_task1["text_en"].tolist(),  # Ensure this column exists and is clean
    truncation=True,
    padding=True,
    return_tensors="pt"
)

# Move the input encodings to the same device as the model
input_encodings = {key: tensor.to("cuda") for key, tensor in input_encodings.items()}

# Perform inference
with torch.no_grad():  # Disable gradient calculation during inference
    outputs = model(**input_encodings)
    logits = outputs.logits

# Get the predicted labels
predicted_labels = logits.argmax(axis=1).cpu().tolist()  # Move predictions back to CPU

# Print the classification report
from sklearn.metrics import classification_report
print(classification_report(test_task1["label"].tolist(), predicted_labels))

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.37 GiB. GPU 0 has a total capacity of 14.75 GiB of which 593.06 MiB is free. Process 5872 has 14.17 GiB memory in use. Of the allocated memory 13.14 GiB is allocated by PyTorch, and 918.53 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
def zero_shot_llama_inference(model_name, texts, labels, label_map, batch_size=8):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})

    # Initialize empty weights for the model
    with init_empty_weights():
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16
        )

    # Load the model using checkpointing and dispatching
    model = load_checkpoint_and_dispatch(
        model,
        model_name,
        device_map="auto",
        # offload_folder="offload",  # Optional: specify a folder for offloading
        # dtype=torch.float16  # Optional: specify the data type for offloading
    )

    predicted_labels = []

    # Process texts in batches
    for i in range(0, len(texts), batch_size):
        print(f"Processing batch {i // batch_size + 1}/{len(texts) // batch_size + 1}")
        batch_texts = texts[i:i + batch_size]
        prompts = [
            f"Text: \"{text}\"\nQuestion: \"Is this text {', '.join(label_map.values())}?\"\nChoices: {', '.join(label_map.values())}\nAnswer:"
            for text in batch_texts
        ]

        # Tokenize batch
        inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=512)
        inputs = inputs.to("cuda")

        # Create a full attention mask where all tokens attend to all other tokens
        # This is often needed for causal language modeling tasks.
        inputs["attention_mask"] = torch.ones_like(inputs["input_ids"], dtype=torch.long, device="cuda")

        # Generate predictions
        outputs = model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_length=inputs.input_ids.shape[1] + 10,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode and process outputs
        decoded_outputs = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
        for output_text in decoded_outputs:
            for label in label_map.values():
                if label in output_text:
                    predicted_labels.append(label)
                    break

    # Map predictions to numeric labels
    predicted_numeric = [list(label_map.keys())[list(label_map.values()).index(label)] for label in predicted_labels]

    print("Classification Report:")
    print(classification_report(labels, predicted_numeric, target_names=list(label_map.values())))

    return predicted_numeric

In [None]:
print(zero_shot_llama_inference("meta-llama/Llama-3.1-8B-Instruct", test_task1["text_en"].to_list(), test_task1["label"], {0: "left", 1: "right"}))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



ValueError: At least one of the model submodule will be offloaded to disk, please pass along an `offload_folder`.

In [None]:
print(zero_shot_llama_inference("meta-llama/Llama-3.1-8B-Instruct", test_task1["text"].to_list(), test_task1["label"], {0: "left", 1: "right"}))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



RuntimeError: The weights trying to be saved contained shared tensors [{'model.layers.17.self_attn.q_proj.weight', 'model.layers.5.self_attn.q_proj.weight', 'model.layers.27.self_attn.o_proj.weight', 'model.layers.23.self_attn.q_proj.weight', 'model.layers.16.self_attn.q_proj.weight', 'model.layers.1.self_attn.q_proj.weight', 'model.layers.14.self_attn.q_proj.weight', 'model.layers.31.self_attn.o_proj.weight', 'model.layers.0.self_attn.o_proj.weight', 'model.layers.24.self_attn.q_proj.weight', 'model.layers.5.self_attn.o_proj.weight', 'model.layers.26.self_attn.q_proj.weight', 'model.layers.3.self_attn.q_proj.weight', 'model.layers.4.self_attn.q_proj.weight', 'model.layers.3.self_attn.o_proj.weight', 'model.layers.9.self_attn.o_proj.weight', 'model.layers.10.self_attn.q_proj.weight', 'model.layers.12.self_attn.q_proj.weight', 'model.layers.12.self_attn.o_proj.weight', 'model.layers.30.self_attn.q_proj.weight', 'model.layers.20.self_attn.q_proj.weight', 'model.layers.2.self_attn.o_proj.weight', 'model.layers.22.self_attn.o_proj.weight', 'model.layers.29.self_attn.o_proj.weight', 'model.layers.7.self_attn.o_proj.weight', 'model.layers.28.self_attn.o_proj.weight', 'model.layers.29.self_attn.q_proj.weight', 'model.layers.11.self_attn.o_proj.weight', 'model.layers.13.self_attn.q_proj.weight', 'model.layers.21.self_attn.o_proj.weight', 'model.layers.18.self_attn.q_proj.weight', 'model.layers.25.self_attn.q_proj.weight', 'model.layers.19.self_attn.o_proj.weight', 'model.layers.10.self_attn.o_proj.weight', 'model.layers.18.self_attn.o_proj.weight', 'model.layers.15.self_attn.o_proj.weight', 'model.layers.24.self_attn.o_proj.weight', 'model.layers.15.self_attn.q_proj.weight', 'model.layers.22.self_attn.q_proj.weight', 'model.layers.27.self_attn.q_proj.weight', 'model.layers.11.self_attn.q_proj.weight', 'model.layers.0.self_attn.q_proj.weight', 'model.layers.13.self_attn.o_proj.weight', 'model.layers.26.self_attn.o_proj.weight', 'model.layers.31.self_attn.q_proj.weight', 'model.layers.20.self_attn.o_proj.weight', 'model.layers.19.self_attn.q_proj.weight', 'model.layers.23.self_attn.o_proj.weight', 'model.layers.1.self_attn.o_proj.weight', 'model.layers.21.self_attn.q_proj.weight', 'model.layers.8.self_attn.q_proj.weight', 'model.layers.14.self_attn.o_proj.weight', 'model.layers.2.self_attn.q_proj.weight', 'model.layers.6.self_attn.q_proj.weight', 'model.layers.4.self_attn.o_proj.weight', 'model.layers.7.self_attn.q_proj.weight', 'model.layers.30.self_attn.o_proj.weight', 'model.layers.28.self_attn.q_proj.weight', 'model.layers.6.self_attn.o_proj.weight', 'model.layers.17.self_attn.o_proj.weight', 'model.layers.8.self_attn.o_proj.weight', 'model.layers.16.self_attn.o_proj.weight', 'model.layers.9.self_attn.q_proj.weight', 'model.layers.25.self_attn.o_proj.weight'}, {'model.layers.28.self_attn.v_proj.weight', 'model.layers.11.self_attn.v_proj.weight', 'model.layers.22.self_attn.k_proj.weight', 'model.layers.7.self_attn.k_proj.weight', 'model.layers.19.self_attn.k_proj.weight', 'model.layers.25.self_attn.k_proj.weight', 'model.layers.8.self_attn.k_proj.weight', 'model.layers.25.self_attn.v_proj.weight', 'model.layers.30.self_attn.k_proj.weight', 'model.layers.17.self_attn.v_proj.weight', 'model.layers.3.self_attn.v_proj.weight', 'model.layers.31.self_attn.v_proj.weight', 'model.layers.16.self_attn.v_proj.weight', 'model.layers.6.self_attn.v_proj.weight', 'model.layers.15.self_attn.k_proj.weight', 'model.layers.5.self_attn.v_proj.weight', 'model.layers.2.self_attn.k_proj.weight', 'model.layers.2.self_attn.v_proj.weight', 'model.layers.31.self_attn.k_proj.weight', 'model.layers.26.self_attn.v_proj.weight', 'model.layers.26.self_attn.k_proj.weight', 'model.layers.29.self_attn.k_proj.weight', 'model.layers.0.self_attn.k_proj.weight', 'model.layers.10.self_attn.v_proj.weight', 'model.layers.12.self_attn.k_proj.weight', 'model.layers.10.self_attn.k_proj.weight', 'model.layers.24.self_attn.v_proj.weight', 'model.layers.1.self_attn.v_proj.weight', 'model.layers.6.self_attn.k_proj.weight', 'model.layers.0.self_attn.v_proj.weight', 'model.layers.7.self_attn.v_proj.weight', 'model.layers.13.self_attn.v_proj.weight', 'model.layers.22.self_attn.v_proj.weight', 'model.layers.4.self_attn.v_proj.weight', 'model.layers.27.self_attn.k_proj.weight', 'model.layers.30.self_attn.v_proj.weight', 'model.layers.23.self_attn.v_proj.weight', 'model.layers.11.self_attn.k_proj.weight', 'model.layers.21.self_attn.k_proj.weight', 'model.layers.27.self_attn.v_proj.weight', 'model.layers.4.self_attn.k_proj.weight', 'model.layers.20.self_attn.k_proj.weight', 'model.layers.9.self_attn.k_proj.weight', 'model.layers.24.self_attn.k_proj.weight', 'model.layers.5.self_attn.k_proj.weight', 'model.layers.15.self_attn.v_proj.weight', 'model.layers.18.self_attn.k_proj.weight', 'model.layers.21.self_attn.v_proj.weight', 'model.layers.14.self_attn.v_proj.weight', 'model.layers.14.self_attn.k_proj.weight', 'model.layers.17.self_attn.k_proj.weight', 'model.layers.1.self_attn.k_proj.weight', 'model.layers.19.self_attn.v_proj.weight', 'model.layers.8.self_attn.v_proj.weight', 'model.layers.16.self_attn.k_proj.weight', 'model.layers.3.self_attn.k_proj.weight', 'model.layers.13.self_attn.k_proj.weight', 'model.layers.20.self_attn.v_proj.weight', 'model.layers.18.self_attn.v_proj.weight', 'model.layers.12.self_attn.v_proj.weight', 'model.layers.29.self_attn.v_proj.weight', 'model.layers.23.self_attn.k_proj.weight', 'model.layers.28.self_attn.k_proj.weight', 'model.layers.9.self_attn.v_proj.weight'}, {'model.layers.3.mlp.gate_proj.weight', 'model.layers.19.mlp.down_proj.weight', 'model.layers.29.mlp.up_proj.weight', 'model.layers.0.mlp.down_proj.weight', 'model.layers.0.mlp.up_proj.weight', 'model.layers.2.mlp.gate_proj.weight', 'model.layers.18.mlp.up_proj.weight', 'model.layers.24.mlp.up_proj.weight', 'model.layers.16.mlp.down_proj.weight', 'model.layers.19.mlp.up_proj.weight', 'model.layers.8.mlp.gate_proj.weight', 'model.layers.12.mlp.up_proj.weight', 'model.layers.11.mlp.down_proj.weight', 'model.layers.14.mlp.down_proj.weight', 'model.layers.24.mlp.gate_proj.weight', 'model.layers.20.mlp.up_proj.weight', 'model.layers.5.mlp.gate_proj.weight', 'model.layers.8.mlp.up_proj.weight', 'model.layers.23.mlp.gate_proj.weight', 'model.layers.28.mlp.up_proj.weight', 'model.layers.7.mlp.up_proj.weight', 'model.layers.28.mlp.gate_proj.weight', 'model.layers.21.mlp.down_proj.weight', 'model.layers.25.mlp.down_proj.weight', 'model.layers.6.mlp.down_proj.weight', 'model.layers.10.mlp.down_proj.weight', 'model.layers.8.mlp.down_proj.weight', 'model.layers.9.mlp.gate_proj.weight', 'model.layers.20.mlp.gate_proj.weight', 'model.layers.31.mlp.down_proj.weight', 'model.layers.30.mlp.gate_proj.weight', 'model.layers.13.mlp.down_proj.weight', 'model.layers.4.mlp.up_proj.weight', 'model.layers.31.mlp.gate_proj.weight', 'model.layers.15.mlp.down_proj.weight', 'model.layers.26.mlp.gate_proj.weight', 'model.layers.17.mlp.gate_proj.weight', 'model.layers.23.mlp.up_proj.weight', 'model.layers.17.mlp.down_proj.weight', 'model.layers.9.mlp.up_proj.weight', 'model.layers.19.mlp.gate_proj.weight', 'model.layers.24.mlp.down_proj.weight', 'model.layers.27.mlp.down_proj.weight', 'model.layers.1.mlp.up_proj.weight', 'model.layers.0.mlp.gate_proj.weight', 'model.layers.26.mlp.down_proj.weight', 'model.layers.13.mlp.up_proj.weight', 'model.layers.7.mlp.down_proj.weight', 'model.layers.29.mlp.gate_proj.weight', 'model.layers.10.mlp.up_proj.weight', 'model.layers.4.mlp.down_proj.weight', 'model.layers.27.mlp.up_proj.weight', 'model.layers.10.mlp.gate_proj.weight', 'model.layers.16.mlp.gate_proj.weight', 'model.layers.12.mlp.gate_proj.weight', 'model.layers.1.mlp.down_proj.weight', 'model.layers.22.mlp.gate_proj.weight', 'model.layers.5.mlp.up_proj.weight', 'model.layers.25.mlp.gate_proj.weight', 'model.layers.22.mlp.down_proj.weight', 'model.layers.13.mlp.gate_proj.weight', 'model.layers.9.mlp.down_proj.weight', 'model.layers.30.mlp.up_proj.weight', 'model.layers.18.mlp.gate_proj.weight', 'model.layers.28.mlp.down_proj.weight', 'model.layers.4.mlp.gate_proj.weight', 'model.layers.15.mlp.up_proj.weight', 'model.layers.21.mlp.up_proj.weight', 'model.layers.6.mlp.gate_proj.weight', 'model.layers.22.mlp.up_proj.weight', 'model.layers.23.mlp.down_proj.weight', 'model.layers.14.mlp.gate_proj.weight', 'model.layers.27.mlp.gate_proj.weight', 'model.layers.30.mlp.down_proj.weight', 'model.layers.6.mlp.up_proj.weight', 'model.layers.26.mlp.up_proj.weight', 'model.layers.15.mlp.gate_proj.weight', 'model.layers.7.mlp.gate_proj.weight', 'model.layers.1.mlp.gate_proj.weight', 'model.layers.11.mlp.up_proj.weight', 'model.layers.29.mlp.down_proj.weight', 'model.layers.2.mlp.down_proj.weight', 'model.layers.2.mlp.up_proj.weight', 'model.layers.25.mlp.up_proj.weight', 'model.layers.14.mlp.up_proj.weight', 'model.layers.20.mlp.down_proj.weight', 'model.layers.18.mlp.down_proj.weight', 'model.layers.3.mlp.up_proj.weight', 'model.layers.31.mlp.up_proj.weight', 'model.layers.17.mlp.up_proj.weight', 'model.layers.11.mlp.gate_proj.weight', 'model.layers.12.mlp.down_proj.weight', 'model.layers.5.mlp.down_proj.weight', 'model.layers.16.mlp.up_proj.weight', 'model.layers.21.mlp.gate_proj.weight', 'model.layers.3.mlp.down_proj.weight'}, {'model.layers.24.input_layernorm.weight', 'model.layers.16.input_layernorm.weight', 'model.layers.19.post_attention_layernorm.weight', 'model.layers.8.post_attention_layernorm.weight', 'model.layers.0.input_layernorm.weight', 'model.layers.10.input_layernorm.weight', 'model.layers.4.post_attention_layernorm.weight', 'model.layers.29.post_attention_layernorm.weight', 'model.layers.6.post_attention_layernorm.weight', 'model.layers.2.post_attention_layernorm.weight', 'model.norm.weight', 'model.layers.26.input_layernorm.weight', 'model.layers.27.input_layernorm.weight', 'model.layers.28.post_attention_layernorm.weight', 'model.layers.27.post_attention_layernorm.weight', 'model.layers.9.post_attention_layernorm.weight', 'model.layers.1.post_attention_layernorm.weight', 'model.layers.11.input_layernorm.weight', 'model.layers.16.post_attention_layernorm.weight', 'model.layers.14.post_attention_layernorm.weight', 'model.layers.13.post_attention_layernorm.weight', 'model.layers.21.input_layernorm.weight', 'model.layers.0.post_attention_layernorm.weight', 'model.layers.23.input_layernorm.weight', 'model.layers.5.input_layernorm.weight', 'model.layers.5.post_attention_layernorm.weight', 'model.layers.20.input_layernorm.weight', 'model.layers.31.input_layernorm.weight', 'model.layers.22.input_layernorm.weight', 'model.layers.3.post_attention_layernorm.weight', 'model.layers.18.post_attention_layernorm.weight', 'model.layers.31.post_attention_layernorm.weight', 'model.layers.26.post_attention_layernorm.weight', 'model.layers.15.post_attention_layernorm.weight', 'model.layers.25.input_layernorm.weight', 'model.layers.22.post_attention_layernorm.weight', 'model.layers.30.post_attention_layernorm.weight', 'model.layers.1.input_layernorm.weight', 'model.layers.3.input_layernorm.weight', 'model.layers.12.input_layernorm.weight', 'model.layers.14.input_layernorm.weight', 'model.layers.19.input_layernorm.weight', 'model.layers.6.input_layernorm.weight', 'model.layers.30.input_layernorm.weight', 'model.layers.8.input_layernorm.weight', 'model.layers.7.input_layernorm.weight', 'model.layers.17.post_attention_layernorm.weight', 'model.layers.2.input_layernorm.weight', 'model.layers.11.post_attention_layernorm.weight', 'model.layers.20.post_attention_layernorm.weight', 'model.layers.24.post_attention_layernorm.weight', 'model.layers.10.post_attention_layernorm.weight', 'model.layers.12.post_attention_layernorm.weight', 'model.layers.13.input_layernorm.weight', 'model.layers.23.post_attention_layernorm.weight', 'model.layers.9.input_layernorm.weight', 'model.layers.28.input_layernorm.weight', 'model.layers.4.input_layernorm.weight', 'model.layers.17.input_layernorm.weight', 'model.layers.7.post_attention_layernorm.weight', 'model.layers.25.post_attention_layernorm.weight', 'model.layers.18.input_layernorm.weight', 'model.layers.15.input_layernorm.weight', 'model.layers.29.input_layernorm.weight', 'model.layers.21.post_attention_layernorm.weight'}] that are mismatching the transformers base configuration. Try saving using `safe_serialization=False` or remove this tensor sharing.

In [None]:
# Zero-shot Inference for Task 1
print(zero_shot_inference("facebook/bart-large-mnli", test_task1["text"].to_list(), test_task1["label"], {0: "left", 1: "right"}))

Device set to use cuda


              precision    recall  f1-score   support

           0       0.42      1.00      0.59       675
           1       0.00      0.00      0.00       939

    accuracy                           0.42      1614
   macro avg       0.21      0.50      0.29      1614
weighted avg       0.17      0.42      0.25      1614



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
# Task 2: Power Prediction
file_path2 = "power-tr-train.tsv"
data2 = load_dataset(file_path2)
data2.reset_index(drop=True, inplace=True)
y = data2["label"]
print(y)

train_task2, test_task2 = stratified_split(data2, "label")
fine_tuned_model_task2, tokenizer_task2 = fine_tune_model(
  "xlm-roberta-base", train_task2["text"].to_list(), train_task2["label"].to_list(), num_labels=2
)


0        0
1        0
2        0
3        0
4        1
        ..
17379    0
17380    1
17381    0
17382    1
17383    1
Name: label, Length: 17384, dtype: int64


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
500,0.6704
1000,0.5685
1500,0.4635
2000,0.4272
2500,0.3593


In [None]:
# Evaluate Fine-tuned Model for Task 2
modelPath = "/content/drive/MyDrive/fine_tuned_modelTask2"

model = AutoModelForSequenceClassification.from_pretrained(modelPath, local_files_only=True)

trainer = Trainer(model=model)
trainer.model = model.cuda()
# Evaluate Fine-tuned Model for Task 1
# Convert the 'text_en' column to a list of strings
input_texts = test_task2["text_en"].tolist()
tokenizer = AutoTokenizer.from_pretrained(modelPath, local_files_only=True)
# Tokenize the input texts
input_encodings = tokenizer(
    test_task2["text_en"].tolist(),  # Ensure this column exists and is clean
    truncation=True,
    padding=True,
    return_tensors="pt"
)

# Move the input encodings to the same device as the model
input_encodings = {key: tensor.to("cuda") for key, tensor in input_encodings.items()}

# Perform inference
with torch.no_grad():  # Disable gradient calculation during inference
    outputs = model(**input_encodings)
    logits = outputs.logits

# Get the predicted labels
predicted_labels = logits.argmax(axis=1).cpu().tolist()  # Move predictions back to CPU

# Print the classification report
from sklearn.metrics import classification_report
print(classification_report(test_task1["label"].tolist(), predicted_labels))


In [None]:
# Zero-shot Inference for Task 2
print(zero_shot_inference("facebook/bart-large-mnli", test_task2["text"], test_task2["label_task2"], {0: "coalition", 1: "opposition"}))