# Humor detection (binary classification) на английском языке, используя BERT

Задача распознавания юмора довольно сложная задача для ЛЛМ-ок, в данной работе предпринята попытка улучшения качества распознавания юмора при zero- и few-shot с улучшением RAG подходом.

Ссылка на корпус: https://huggingface.co/datasets/CreativeLang/ColBERT_Humor_Detection

## Zero-shot

In [None]:
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelWithLMHead

In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset

ds = load_dataset("CreativeLang/ColBERT_Humor_Detection")

In [None]:
df = pd.DataFrame(data=[ds['train']['text'], ds['train']['humor']], index=['Text', 'Binary']).T

In [None]:
df

In [None]:
MODEL_NAME = "roberta-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelWithLMHead.from_pretrained(MODEL_NAME).to("cuda")

In [None]:
pattern_1 = "Analyze the given text and determine whether it is 'Humorous' or 'Not.' {0}'. Is it humorous? Your response: <mask>."
pattern_2 = "Here is a joke: {0}. Is it funny? Your response: <mask>."
pattern_3 = 'Here is a text: {0}. Say: is it good or bad? Your opinion: <mask>.'

pattern_4 = "I heard a joke: {0}. What's you opinion? Your response: it's <mask> funny."

pattern_5 = "I'm writing a comedy show. The joke: {0}. Do you find this joke funny? My friend says it's <mask>."
pattern_6 = '- Adam, here is a joke: {0}. Is it funny? - Bill, it is <mask>.'
pattern_7 = "- Adam, here is a joke: {0}. Is it funny? - It's really <mask>."

In [None]:
def format_with_pattern(tokenizer, pattern, text):
    augmented_text = pattern.format(text).replace("<mask>", tokenizer.mask_token)
    tokenization = tokenizer(augmented_text)["input_ids"]
    mask_index = tokenization.index(tokenizer.mask_token_id)
    return tokenization, mask_index

In [None]:
def score_with_model(tokenization, index, device="cuda"):
    tensor = torch.LongTensor([tokenization]).to(device)
    with torch.no_grad():
        model_output = model(tensor)
    logits = model_output.logits[0]
    log_probs = torch.log_softmax(logits[index], dim=-1)
    return log_probs

In [None]:
!pip install spacy

In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

Сравнение косинусной близостм выдаваемых результатов со "стандартом" для решения, классифицирует ли модель подаваемые результаты как положительные или нет.

In [None]:
def cosine_similarity(input_phrase, add_info=None):
    input = nlp(input_phrase)
    vector = input.vector
    if add_info is None:
        standard = ['funny', 'good', 'hilarious', 'amusing', 'beautiful', 'amazing']
        not_standard = ['stupid', 'sad', 'bad', 'awful', 'dreary', 'depressing', 'tragic', 'sad', 'disturbing', 'frustrating']
    else:
        standard = add_info[0]
        not_standard = add_info[1]
    # standard = ['positive', 'good']
    # not_standard = ['negative', 'bad']
    standard_vectors = [nlp(phrase).vector for phrase in standard]
    not_standard_vectors = [nlp(phrase).vector for phrase in not_standard]
    cos_sim_scores = [np.dot(vector, sv) / (np.linalg.norm(vector) * np.linalg.norm(sv)) for sv in standard_vectors]
    cos_not_sim_scores = [np.dot(vector, sv) / (np.linalg.norm(vector) * np.linalg.norm(sv)) for sv in not_standard_vectors]
    return 1 if np.mean(cos_sim_scores) > np.mean(cos_not_sim_scores) else 0

Протестируем способность модели на первых 200 примерах.

Для начала пройдемся по базовым промптам:



```
pattern_1 = "Analyze the given text and determine whether it is 'Humorous' or 'Not.' {0}'. Is it humorous? Your response: <mask>."
pattern_2 = "Here is a joke: {0}. Is it funny? Your response: <mask>."
pattern_3 = 'Here is a text: {0}. Say: is it good or bad? Your opinion: <mask>.'
```



In [None]:
corpus, labels = list(df['Text'][:200]), [int(x) for x in df['Binary'][:200]]

def basic_prompt_prediction(pattern, corpus, add_info=None):
    preds = []
    for text, label in zip(corpus, labels):
        tokenization, mask_index = format_with_pattern(tokenizer, pattern, text)
        log_probs = score_with_model(tokenization, mask_index)
        top_probs, top_indexes = torch.topk(log_probs, k=5, dim=-1)
        pos_prob, neg_prob = 0, 0
        for log_prob, index in zip(top_probs, top_indexes):
            probability = np.exp(log_prob.item())
            input = tokenizer.decode([index]).replace(' ', '')
            if input.lower() in ['no', 'not', 'bad']:
                neg_prob += probability
            elif input.lower() in ['yes', 'yeah', 'good']:
                pos_prob += probability
            else:
                if cosine_similarity(input.replace(" ", ""), add_info):
                    pos_prob += probability
                else:
                    neg_prob += probability
        preds.append(int(pos_prob > neg_prob))
    return preds

In [None]:
preds_1 = basic_prompt_prediction(pattern_1, corpus)

In [None]:
preds_2 = basic_prompt_prediction(pattern_2, corpus)

In [None]:
preds_3 = basic_prompt_prediction(pattern_3, corpus)

In [None]:
from sklearn.metrics import classification_report
target_names = ['False', 'True']
print(classification_report(labels, preds_1, target_names=target_names, zero_division=0))

In [None]:
print(classification_report(labels, preds_2, target_names=target_names, zero_division=0))

In [None]:
print(classification_report(labels, preds_3, target_names=target_names, zero_division=0))

Как я и предполагала, модель не умеет воспринимать нормально юмор. Она либо все считает несмешным (как в первом пропмте), либо просто некачественно работает.

Есть идея попробовать, во-первых, маскировать слово перед funny: она там предскажет либо *not*, либо усиляющий какой-нибудь типа *really*



```
pattern_4 = "I heard a joke: {0}. What's you opinion? Your response: it's <mask> funny."
```



In [None]:
corpus, labels = list(df['Text'][:200]), [int(x) for x in df['Binary'][0:200]]
preds_4 = basic_prompt_prediction(pattern_4, corpus, add_info=[['really', 'rather', 'still', 'even'], ['not', 'no']])

In [None]:
print(classification_report(labels, preds_4, target_names=target_names, zero_division=0))

Еще одна идея заключается в более "живых" промптах. То есть использовать креативную сторону модели, как бы давая ей диалог, где нужно угадать маскированное слово.

```
pattern_5 = "I'm writing a comedy show. The joke: {0}. Do you find this joke funny? My friend says it's <mask>."
pattern_6 = '- Adam, here is a joke: {0}. Is it funny? - Bill, it is <mask>.'
pattern_7 = "- Adam, here is a joke: {0}. Is it funny? - It's really <mask>."
```



In [None]:
corpus, labels = list(df['Text'][:200]), [int(x) for x in df['Binary'][:200]]

def advanced_prompt_prediction(pattern, corpus, add_info=None):
    preds = []
    for text, label in zip(corpus, labels):
        tokenization, mask_index = format_with_pattern(tokenizer, pattern, text)
        log_probs = score_with_model(tokenization, mask_index)
        top_probs, top_indexes = torch.topk(log_probs, k=5, dim=-1)
        pos_prob, neg_prob = 0, 0
        for log_prob, index in zip(top_probs, top_indexes):
            probability = np.exp(log_prob.item())
            input = tokenizer.decode([index]).replace(' ', '')
            if cosine_similarity(input.replace(" ", ""), add_info):
                pos_prob += probability
            else:
                neg_prob += probability
        preds.append(int(pos_prob > neg_prob))
    return preds

In [None]:
preds_5 = advanced_prompt_prediction(pattern_5, corpus)

In [None]:
print(classification_report(labels, preds_5, target_names=target_names, zero_division=0))

In [None]:
preds_6 = advanced_prompt_prediction(pattern_6, corpus)

In [None]:
print(classification_report(labels, preds_6, target_names=target_names, zero_division=0))

In [None]:
preds_7 = advanced_prompt_prediction(pattern_6, corpus)

In [None]:
print(classification_report(labels, preds_7, target_names=target_names, zero_division=0))

Качество очень случайное, в зависимости от промта у модели bias то в сторону отсутствия юмора, то нет. Но здесь стоит еще проверить более эвристическим методом, возможно, проблема с векторами spacy – зачастую странно высчитывается косинусная близость. Попробуем смотреть по самому вероятному слову, так как промпты подразумевают грамматичность как *not*, так и синонимов *funny*.



In [None]:
def new_advanced_prompt_prediction(pattern, corpus):
    preds = []
    for text, label in zip(corpus, labels):
        tokenization, mask_index = format_with_pattern(tokenizer, pattern, text)
        log_probs = score_with_model(tokenization, mask_index)
        top_probs, top_indexes = torch.topk(log_probs, k=1, dim=-1)
        for log_prob, index in zip(top_probs, top_indexes):
            probability = np.exp(log_prob.item())
            input = tokenizer.decode([index]).replace(' ', '')
            if input == 'not':
                pred = 0
            elif input in ['funny', 'hilarious', 'amusing', 'humorous', 'comical']:
                pred = 1
            else:
                print(text)
                print(input, probability)
                print()
                pred = cosine_similarity(input.replace(" ", ""))
                print(pred)
        preds.append(pred)
        # preds.append(int(pos_prob > neg_prob))
    return preds

In [None]:
corpus, labels = list(df['Text'][:200]), [int(x) for x in df['Binary'][:200]]
preds_5_new = new_advanced_prompt_prediction(pattern_5, corpus)

In [None]:
print(classification_report(labels, preds_5_new, target_names=target_names, zero_division=0))

In [None]:
preds_6_new = new_advanced_prompt_prediction(pattern_6, corpus)

In [None]:
print(classification_report(labels, preds_6_new, target_names=target_names, zero_division=0))

In [None]:
preds_7_new = new_advanced_prompt_prediction(pattern_7, corpus)

In [None]:
print(classification_report(labels, preds_7_new, target_names=target_names, zero_division=0))

В итоге получается, что качество все равно скорее случайно, лучше всего работает 5 паттерн

`"I'm writing a comedy show. The joke: {0}. Do you find this joke funny? My friend says it's <mask>."`

In [None]:
from sklearn.metrics import f1_score
y_true = labels
f_1 = f1_score(labels, preds_1, average='micro')
f_2 = f1_score(labels, preds_2, average='micro')
f_3 = f1_score(labels, preds_3, average='micro')
f_4 = f1_score(labels, preds_4, average='micro')
f_5 = f1_score(labels, preds_5_new, average='micro')
f_6 = f1_score(labels, preds_6_new, average='micro')
f_7 = f1_score(labels, preds_7_new, average='micro')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig, ax = plt.subplots()
x = np.array(["Pattern 1", "Pattern 2", "Pattern 3", "Pattern 4", "Pattern 5", "Pattern 6", "Pattern 7"])
y = np.array([f_1, f_2, f_3, f_4, f_5, f_6, f_7])

ax.bar(x, y)
ax.set_title('Результаты на выборке в 200 примеров')
plt.show()

In [None]:
preds_1_test, preds_2_test, preds_3_test = [], [], []
preds_4_test, preds_5_test, preds_6_test, preds_7_test = [], [], [], []
test_corpus, labels = list(df['Text'][199980:]), [int(x) for x in df['Binary'][199980:]]
for text in test_corpus:
      preds_1_test.extend(basic_prompt_prediction(pattern_1, [text]))
      preds_2_test.extend(basic_prompt_prediction(pattern_2, [text]))
      preds_3_test.extend(basic_prompt_prediction(pattern_3, [text]))
      preds_4_test.extend(basic_prompt_prediction(pattern_4, [text]))
      preds_5_test.extend(new_advanced_prompt_prediction(pattern_5, [text]))
      preds_6_test.extend(new_advanced_prompt_prediction(pattern_6, [text]))
      preds_7_test.extend(new_advanced_prompt_prediction(pattern_7, [text]))

In [None]:
y_true = labels
f_1 = f1_score(labels, preds_1_test, average='micro')
f_2 = f1_score(labels, preds_2_test, average='micro')
f_3 = f1_score(labels, preds_3_test, average='micro')
f_4 = f1_score(labels, preds_4_test, average='micro')
f_5 = f1_score(labels, preds_5_test, average='micro')
f_6 = f1_score(labels, preds_6_test, average='micro')
f_7 = f1_score(labels, preds_7_test, average='micro')

In [None]:
results = pd.DataFrame(data=[test_corpus, labels, preds_1_test, preds_2_test, preds_3_test,
                             preds_4_test, preds_5_test, preds_6_test, preds_7_test],
                       index=['Text', 'True', 1, 2, 3, 4, 5, 6, 7]).T

In [None]:
results

In [None]:
fig, ax = plt.subplots()

fruits = ['f_1', 'f_2', 'f_3', 'f_4', 'f_5', 'f_6', 'f_7']
counts = [f_1, f_2, f_3, f_4, f_5, f_6, f_7]
bar_labels = ['darkgreen', 'lightgreen', '#fdaa48','#6890F0','#A890F0','#fdaa48','#6890F0']
bar_colors = ['lightblue', 'lightgreen', 'darkgreen', 'grey', 'lightgrey', 'marn']
c = ['#1b9e77', '#a9f971', '#fdaa48','#6890F0','#A890F0','#fdaa48','#6890F0','#A890F0']

ax.bar(fruits, counts, label=bar_labels, color=c)

# ax.set_ylabel('F-мера')
ax.set_title('F-мера')
# ax.legend(title='Fruit color')

plt.show()

## Few-shot

In [None]:
test_prompt = """
I'm writing a comedy show. Below are some jokes I've already tested:

The joke: "Why don't skeletons fight each other? They don’t have the guts" It's funny.
The joke: "I told my wife she should embrace her mistakes. She hugged me" It's funny.
The joke: "Abortion goes front and center in alabama senate race" It's not funny.

New joke: "{0}"
Is it funny? My friend says it's <mask>.

"""

In [None]:
for text, label in zip(results['Text'], results['True']):
    pred = new_advanced_prompt_prediction(test_prompt, [text])
    print(f'Text: {text}\nLabel: {label}\nPredicted label: {pred[0]}\n')

In [None]:
new_prompt = """
I'm writing a comedy show. Below are some jokes I've already tested:

The joke: "{0}" It's funny.
The joke: "{1}. She hugged me" It's funny.
The joke: "{2}" It's not funny.

New joke: "{3}"
Is it funny? My friend says it's <mask>.

"""

RAG для подбора примеров для промпта

In [None]:
!pip install chromadb

In [None]:
df

In [None]:
corpus = [str(text) + ". It's " + ['not funny.', 'funny.'][int(label)] for text, label in zip(df['Text'][201:199980], df['Binary'][201:199980])]

In [None]:
bert_model = AutoModelWithLMHead.from_pretrained(MODEL_NAME).to("cuda")

In [None]:
import chromadb
from sentence_transformers import SentenceTransformer

client = chromadb.Client()

collection_name = "corpus_collection"
jokes_collection = client.create_collection(name=collection_name)
model = SentenceTransformer('all-MiniLM-L6-v2')

# corpus = [str(text) + ' ' + str(label) for text, label in zip(df['Texts'][201:199980], df['Binary'][201:199980])]

for i, text in enumerate(corpus):
    embedding = model.encode(text).tolist()
    jokes_collection.add(
        ids=[f"text-{i}"],
        documents=[text],
        embeddings=[embedding],
        metadatas=[{"text": text}]
    )


In [None]:
def retrieve_similar_texts(query, model, collection, top_k=3):
    query_embedding = model.encode(query).tolist()

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    similar_texts = results['documents'][0]
    return similar_texts

query = "Mcdonald's will officially kick off all-day breakfast on october 6"
retrieved_texts = retrieve_similar_texts(query, model, jokes_collection)
print(retrieved_texts)

In [None]:
for text, label in zip(results['Text'], results['True']):
    print(text)
    print(retrieve_similar_texts(text, model, jokes_collection))

Так как лучшее качество было на 5 промпте:



```
pattern_5 = "I'm writing a comedy show. The joke: {0}. Do you find this joke funny? My friend says it's <mask>."
```

Я решила попробовать сначала докрутить его.


In [None]:
def create_prompt(text, retrieval):
    retrieved_texts = "\n".join(
        [f"The joke: '{joke}'" for joke in retrieval]
    )

    prompt = f"""I'm writing a comedy show. Below are some jokes I've already tested:
    {retrieved_texts}

    New joke: "{text}"
    Is it funny? My friend says it's <mask>.
    """
    return prompt

In [None]:
for text, label in zip(results['Text'], results['True']):
    print(text)
    prompt = create_prompt(text, retrieve_similar_texts(text, model, jokes_collection))
    print(prompt)

In [None]:
def score_with_model(tokenization, index, device="cuda"):
    tensor = torch.LongTensor([tokenization]).to(device)
    with torch.no_grad():
        model_output = bert_model(tensor)
    logits = model_output.logits[0]
    log_probs = torch.log_softmax(logits[index], dim=-1)
    return log_probs

In [None]:
def few_shot_prompt_prediction(prompt):
    preds = []
    tokenization, mask_index = format_with_pattern(tokenizer, prompt, text)
    log_probs = score_with_model(tokenization, mask_index)
    top_probs, top_indexes = torch.topk(log_probs, k=1, dim=-1)
    for log_prob, index in zip(top_probs, top_indexes):
        probability = np.exp(log_prob.item())
        input = tokenizer.decode([index]).replace(' ', '')
        if input == 'not':
            pred = 0
        elif input in ['funny', 'hilarious', 'amusing', 'humorous', 'comical']:
            pred = 1
        else:
            print(text)
            print(input, probability)
            print()
            pred = cosine_similarity(input.replace(" ", ""))
            print(pred)
    preds.append(pred)
        # preds.append(int(pos_prob > neg_prob))
    return preds

In [None]:
for text, label in zip(results['Text'], results['True']):
    prompt = create_prompt(text, retrieve_similar_texts(text, model, jokes_collection))
    print(text)
    print(label, few_shot_prompt_prediction(prompt)[0])

Однако тут, кажется, и без анализа, если честно, видно, что промпт очень неудачный, потому что происходит сильный перекос в юмор (вероятно из-за количества слова 'funny' в примерах, данных в подводке). Можно попробовать просить модель распределять либо на юмор, либо на журналистику (все тексты что-то типа новостных).

Попробуем более классический паттерн.



```
Analyze the given text and determine whether it is 'Journalistic'.' Here are some examples. {0}
Now classify this one: {1}. The text is <mask>.
```



In [None]:
def create_prompt(prompt, text, retrieval):
    retrieved_texts = "\n".join(
        [f"Text: '{retrieved}'" for retrieved in retrieval]
    )

    prompt = prompt.format(retrieved_texts, text)
    return prompt

In [None]:
pattern_new = """Analyze the given text and determine whether it is 'Journalistic'.' Here are some examples. {0}
Now classify this one: {1}. The text is <mask>."""

In [None]:
corpus = [str(text) + " The text is " + ['jounalistic.', 'humorous.'][int(label)] for text, label in zip(df['Text'][10000:25000], df['Binary'][10000:25000])]

In [None]:
client = chromadb.Client()

collection_name = "my_collection"
jokes_collection = client.create_collection(name=collection_name)
model = SentenceTransformer('all-MiniLM-L6-v2')

for i, text in enumerate(corpus):
    embedding = model.encode(text).tolist()
    jokes_collection.add(
        ids=[f"text-{i}"],
        documents=[text],
        embeddings=[embedding],
        metadatas=[{"text": text}]
    )


In [None]:
preds_fs = []
for text, label in zip(results['Text'], results['True']):
    prompt = create_prompt(pattern_new, text, retrieve_similar_texts(text, model, jokes_collection))
    tokenization, mask_index = format_with_pattern(tokenizer, prompt, text)
    log_probs = score_with_model(tokenization, mask_index)
    top_probs, top_indexes = torch.topk(log_probs, k=5, dim=-1)
    print(text)
    for log_prob, index in zip(top_probs, top_indexes):
        word = tokenizer.decode([index]).replace(' ', '')
        prob = np.exp(log_prob.item())
        # print(word)
        if word == 'humorous':
            pred = 1
            break
        elif word == 'journalistic':
            pred = 0
            break
        else:
            pred = cosine_similarity(word, add_info=[['humorous'], ['journalistic']])
    preds_fs.append(pred)
    print(label, pred)
    print("")

In [None]:
y_true = labels
f_few = f1_score(labels, preds_fs, average='micro')
print(f'Результаты на мини-выборке: {f_few}')

In [None]:
final_corpus, final_labels = list(df['Text'][:200]), [int(x) for x in df['Binary'][:200]]
def final_results(final_corpus, pattern_new):
    preds_fs = []
    for text in final_corpus:
        prompt = create_prompt(pattern_new, text, retrieve_similar_texts(text, model, jokes_collection))
        tokenization, mask_index = format_with_pattern(tokenizer, prompt, text)
        log_probs = score_with_model(tokenization, mask_index)
        top_probs, top_indexes = torch.topk(log_probs, k=5, dim=-1)
        for log_prob, index in zip(top_probs, top_indexes):
            word = tokenizer.decode([index]).replace(' ', '')
            prob = np.exp(log_prob.item())
            if word == 'humorous':
                pred = 1
                break
            elif word == 'journalistic':
                pred = 0
                break
            else:
                pred = cosine_similarity(word, add_info=[['humorous'], ['journalistic']])
        preds_fs.append(pred)
        # print(label, pred)
        # print("")
    return preds_fs

In [None]:
predictions = final_results(final_corpus, pattern_new)

In [None]:
f_few_final = f1_score(final_labels, predictions, average='micro')
print(f'Финальная f-мера на выборке в 200 примеров: {f_few_final}')
print()
print(classification_report(final_labels, predictions, target_names=target_names, zero_division=0))

In [None]:
f_5 = f1_score(final_labels, preds_5_new, average='micro')
print(f'Лучшая f-мера на zero-shot: {f_5}')

**Выводы:**


*   На zero-shot качество было все-таки чуть хуже, чем на few-shot.
*   Для задач zero-shot и few-shot пришлось выбирать разные промпты для лучшего качества; для zero-shot приходится немножко сильнее креативить и даже по сути чуток обманывать модель. Для few-shot лучше подходит примитивный промпт, то есть задачу лучше решать как обычную классификацию.
*   Сама задача, кажется, довольно сложная для zero-shot, существует много статей о том, что для ЛЛМ очень сложно восприятие юмора.
*   Для бинарной классификации по юмору, видимо, не стоит давать модели подводку типа "юмор vs не-юмор", так как это сильно перетягивает bias модели в сторону юмора.



