In [1]:
!pip install sentence_transformers



In [2]:
import pandas as pd
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from sentence_transformers import SentenceTransformer, util

In [3]:
data = pd.read_csv('train_en.txt', sep='\t')

In [4]:
model_name = 't5-base'

In [5]:
sentences = data['Sentence'].values.tolist()
labels = data['Label'].values.tolist()
labels = ['toxic' if label==1 else 'non-toxic' for label in labels]

In [6]:
labels[:3], labels[-3:]

(['toxic', 'toxic', 'toxic'], ['non-toxic', 'non-toxic', 'non-toxic'])

# Задача 1: zero-shot

In [7]:
zero_shot_classifier = pipeline("zero-shot-classification", model=model_name, device='cuda', max_length=20)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


In [8]:
options = ['toxic', 'non-toxic']

In [9]:
import random

In [10]:
data_pairs = list(zip(sentences, labels))
random.shuffle(data_pairs)

In [11]:
for sentence, label in data_pairs[:3]:
  print(sentence)
  result = zero_shot_classifier(sentence, options)
  classification_index = result['scores'].index(max(result['scores']))
  classification = result['labels'][classification_index]
  print(f'Classifcation: {classification}')


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


You know what part, almost everything you said was made up, now stop talking to me. Douche
Classifcation: non-toxic
The greater good, kinda the best bet these days
Classifcation: non-toxic
Useless Congress getting nothing done again.
Classifcation: non-toxic


In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [13]:
predictions = []

# Класификација само на тренинг множество бидејќи секако немаме тренирање на модел

In [14]:
sentences_random = [pair[0] for pair in data_pairs]
labels_random = [pair[1] for pair in data_pairs]

In [23]:
results = zero_shot_classifier(sentences_random, candidate_labels=options)

In [24]:
predictions = []

In [25]:
for result in results:
  classification_index = result['scores'].index(max(result['scores']))
  classification = result['labels'][classification_index]
  predictions.append(classification)

In [26]:
print(f'Zero-shot accuracy: {accuracy_score(labels_random, predictions)}')
print(f'Zero-shot precision: {precision_score(labels_random, predictions, average="binary", pos_label="toxic")}')
print(f'Zero-shot recall: {recall_score(labels_random, predictions, average="binary", pos_label="toxic")}')
print(f'Zero-shot f1: {f1_score(labels_random, predictions, average="binary", pos_label="toxic")}')

Zero-shot accuracy: 0.4864267676767677
Zero-shot precision: 0.4902846814279259
Zero-shot recall: 0.6849747474747475
Zero-shot f1: 0.5715038188043192


# Задача 2 - Few-shot

In [15]:
zero_shot_classifier = pipeline("zero-shot-classification", model="t5-base", device='cuda', max_length=20)

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


In [16]:
options = ["toxic", "non-toxic"]

In [17]:
def create_few_shot_prompt(num_examples, options, sentences, labels):
    prompt = ""
    for i in range(num_examples):
        prompt += f"Example {i+1}: Sentence: {sentences[i]} Label: {labels[i]}\n"

    prompt += "\nClassify the following sentence into toxic or non-toxic:\n"
    return prompt

In [21]:
def few_shot_classification(sentences, num_examples):
    results = []
    for i in range(0, len(sentences), 8):
        batch_sentences = sentences[i:i + 8]
        prompts = [create_few_shot_prompt(num_examples, options, sentences, labels) + f"Sentence to classify: {sentence}" for sentence in batch_sentences]

        batch_results = zero_shot_classifier(prompts, candidate_labels=options, max_length=20)

        for result in batch_results:
            classification_index = result['scores'].index(max(result['scores']))
            classification = result['labels'][classification_index]
            results.append(classification)

    return results

In [19]:
def print_metrics(labels_true, labels_pred):
    print(f"Accuracy: {accuracy_score(labels_true, labels_pred):.4f}")
    print(f"Precision: {precision_score(labels_true, labels_pred, pos_label='toxic', average='binary'):.4f}")
    print(f"Recall: {recall_score(labels_true, labels_pred, pos_label='toxic', average='binary'):.4f}")
    print(f"F1 Score: {f1_score(labels_true, labels_pred, pos_label='toxic', average='binary'):.4f}")

In [22]:
for num_examples in [1, 2, 5, 10]:
    print(f"Few-shot classification with {num_examples} examples:")
    predictions = few_shot_classification(sentences, num_examples)

    print(f"\nMetrics for {num_examples} examples:")
    print_metrics(labels, predictions)
    print("\n" + "-"*50)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Few-shot classification with 1 examples:


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



Metrics for 1 examples:
Accuracy: 0.5069
Precision: 0.6019
Recall: 0.0410
F1 Score: 0.0768

--------------------------------------------------
Few-shot classification with 2 examples:

Metrics for 2 examples:
Accuracy: 0.5666
Precision: 0.6435
Recall: 0.2986
F1 Score: 0.4079

--------------------------------------------------
Few-shot classification with 5 examples:

Metrics for 5 examples:
Accuracy: 0.5000
Precision: 0.5000
Recall: 1.0000
F1 Score: 0.6667

--------------------------------------------------
Few-shot classification with 10 examples:

Metrics for 10 examples:
Accuracy: 0.5000
Precision: 0.5000
Recall: 1.0000
F1 Score: 0.6667

--------------------------------------------------
