In [1]:
import numpy as np
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
val_y_path = "C:/Users/Mia/Desktop/FINKI/NLP/nlp/data/shakespeare/valid.original.nltktok"
val_x_path = "C:/Users/Mia/Desktop/FINKI/NLP/nlp/data/shakespeare/valid.modern.nltktok"
train_y_path = "C:/Users/Mia/Desktop/FINKI/NLP/nlp/data/shakespeare/train.original.nltktok"
train_x_path = "C:/Users/Mia/Desktop/FINKI/NLP/nlp/data/shakespeare/train.modern.nltktok"
test_y_path = "C:/Users/Mia/Desktop/FINKI/NLP/nlp/data/shakespeare/test.original.nltktok"
test_x_path = "C:/Users/Mia/Desktop/FINKI/NLP/nlp/data/shakespeare/test.modern.nltktok"

In [3]:
val_y = pd.read_table(val_y_path, header=None, names=["Sentences"]).values.tolist()
val_x = pd.read_table(val_x_path, header=None, names=["Sentences"]).values.tolist()
test_y = pd.read_table(test_y_path, header=None, names=["Sentences"]).values.tolist()
test_x = pd.read_table(test_x_path, header=None, names=["Sentences"]).values.tolist()
train_y = pd.read_table(train_y_path, header=None, names=["Sentences"]).values.tolist()
train_x = pd.read_table(train_x_path, header=None, names=["Sentences"]).values.tolist()

In [4]:
model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-base')
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-base')

In [6]:
from evaluate import load
from nltk.translate import meteor
from nltk import word_tokenize
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Mia\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [50]:
from nltk.translate.meteor_score import single_meteor_score

In [64]:
def evaluate_meteor_bleu(pred, ref, prompt_type, train_test_or_val):
    print(train_test_or_val + ": " + prompt_type)
    
    # BLEUscore = nltk.translate.bleu_score.sentence_bleu([ref], pred, weights = (0.5, 0.5))
    metric = load('bleu')
    results = metric.compute(predictions=pred, references=ref)
    score = results['bleu']
    print(f'BLEU score: {score}')

    m_score = 0.0
    for hyp, r in zip(pred, ref):
        m_score += round(meteor([word_tokenize(hyp[0])], word_tokenize(r[0])), 4)
    # m_score = single_meteor_score(ref, pred)

    print(f'Meteor score: {m_score}\n')

Испробајте ги следните prompts:
1. „Here is a text: {text}, which is {label1}. Here is a rewrite of the text: {text}, which 
is {label}. Rewrite the following text: {text} in {label}.“.
2. „Here is a text: {text}, which is {label1}. Here is a rewrite of the text: {text}, which 
is not {label1}. Rewrite the following text: {text} in {label}.“.
3. „Here is a text: {text}, which is not {label2}. Here is a rewrite of the text: {text}, 
which is {label2}. Rewrite the following text: {text} in {label}.

In [55]:
def prompt_with_prompt_type_1(sample_x, sample_y, no_of_examples):
    pred_labels = []

    for sample in sample_x:
        example = []

        label1 = 'english'
        label2 = 'shakespearean'
       
        for i in range(no_of_examples):
            english_example = sample_x[i]
            shakespearean_example = sample_y[i]
            result_example = f'Here is a text: {english_example}, which is {label1}. Here is a rewrite of the text: {shakespearean_example}, which is {label2}.\n'
            example.append(result_example)

        prompt = f'{example}\nRewrite the following text: {sample} in {label2}.'
        
        # print(prompt)

        input_data = tokenizer(prompt, return_tensors='pt')
        input_ids = input_data.input_ids
        
        output = model.generate(input_ids)
        pred_label = tokenizer.decode(output[0])

        pred_labels.append(pred_label)
    
    return pred_labels

In [56]:
def prompt_with_prompt_type_2(sample_x, sample_y, no_of_examples):
    pred_labels = []

    for sample in sample_x:
        example = []

        label1 = 'english'
        label2 = 'shakespearean'
       
        for i in range(no_of_examples):
            english_example = sample_x[i]
            shakespearean_example = sample_y[i]
            result_example = f'Here is a text: {english_example}, which is {label1}. Here is a rewrite of the text: {shakespearean_example}, which is not {label1}.\n'
            example.append(result_example)

        prompt = f'{example}\nRewrite the following text: {sample} into {label2}.'

        input_data = tokenizer(prompt, return_tensors='pt')
        input_ids = input_data.input_ids
        
        output = model.generate(input_ids)
        pred_label = tokenizer.decode(output[0])

        pred_labels.append(pred_label)
    
    return pred_labels

In [57]:
def prompt_with_prompt_type_3(sample_x, sample_y, no_of_examples):
    pred_labels = []

    for sample in sample_x:
        example = []

        label2 = 'shakespearean'
       
        for i in range(no_of_examples):
            english_example = sample_x[i]
            shakespearean_example = sample_y[i]
            result_example = f'Here is a text: {english_example}, which is not {label2}. Here is a rewrite of the text: {shakespearean_example}, which is {label2}.\n'
            example.append(result_example)

        prompt = f'{example}\nRewrite the following text: {sample} into {label2}.'

        input_data = tokenizer(prompt, return_tensors='pt')
        input_ids = input_data.input_ids
        
        output = model.generate(input_ids)
        pred_label = tokenizer.decode(output[0])

        pred_labels.append(pred_label)
    
    return pred_labels

In [58]:
import re

def clean_prediction(pred_label):
    pattern = re.compile('<.*?>')
    pred_list = []

    for pred in pred_label:
        pred = re.sub(pattern, '', pred)
        pred = pred.strip()
        pred = pred.lower()
        # print(pred)
        pred_list.append(pred)

    return pred_list

    Prompt Type 1


In [67]:
def predict_with_few_shot_prompting_prompt_type_1(sample_x, sample_y, train_test_or_val):
    pred_labels_n_1 = prompt_with_prompt_type_1(sample_x, sample_y, 1) 
    pred_labels_n_2 = prompt_with_prompt_type_1(sample_x, sample_y, 2)
    pred_labels_n_3 = prompt_with_prompt_type_1(sample_x, sample_y, 3) 
    pred_labels_n_5 = prompt_with_prompt_type_1(sample_x, sample_y, 5) 
    pred_labels_n_10 = prompt_with_prompt_type_1(sample_x, sample_y, 10) 
    
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_1), sample_y, "N 1", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_2), sample_y, "N 2", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_3), sample_y, "N 3", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_5), sample_y, "N 5", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_10), sample_y, "N 10", train_test_or_val)

    Prompt Type 2

In [68]:
def predict_with_few_shot_prompting_prompt_type_2(sample_x, sample_y, train_test_or_val):

    pred_labels_n_1 = prompt_with_prompt_type_2(sample_x, sample_y, 1) 
    pred_labels_n_2 = prompt_with_prompt_type_2(sample_x, sample_y, 2)
    pred_labels_n_3 = prompt_with_prompt_type_2(sample_x, sample_y, 3) 
    pred_labels_n_5 = prompt_with_prompt_type_2(sample_x, sample_y, 5) 
    pred_labels_n_10 = prompt_with_prompt_type_2(sample_x, sample_y, 10) 

    evaluate_meteor_bleu(clean_prediction(pred_labels_n_1), sample_y, "N 1", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_2), sample_y, "N 2", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_3), sample_y, "N 3", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_5), sample_y, "N 5", train_test_or_val)
    evaluate_meteor_bleu(clean_prediction(pred_labels_n_10), sample_y, "N 10", train_test_or_val)

    Prompt Type 3

In [59]:
def predict_with_few_shot_prompting_prompt_type_3(sample_x, sample_y, train_test_or_val):

    return prompt_with_prompt_type_3(sample_x, sample_y, 1), prompt_with_prompt_type_3(sample_x, sample_y, 2), prompt_with_prompt_type_3(sample_x, sample_y, 3), prompt_with_prompt_type_3(sample_x, sample_y, 5), prompt_with_prompt_type_3(sample_x, sample_y, 10) 
 
    # evaluate_meteor_bleu(clean_prediction(pred_labels_n_1), sample_y, "N 1", train_test_or_val)
    # evaluate_meteor_bleu(clean_prediction(pred_labels_n_2), sample_y, "N 2", train_test_or_val)
    # evaluate_meteor_bleu(clean_prediction(pred_labels_n_3), sample_y, "N 3", train_test_or_val)
    # evaluate_meteor_bleu(clean_prediction(pred_labels_n_5), sample_y, "N 5", train_test_or_val)
    # evaluate_meteor_bleu(clean_prediction(pred_labels_n_10), sample_y, "N 10", train_test_or_val)

In [69]:
predict_with_few_shot_prompting_prompt_type_1(train_x[:100], train_y[:100], "Train")



In [None]:
predict_with_few_shot_prompting_prompt_type_2(train_x[:100], train_y[:100], "Train")

In [61]:
pred_labels_n_1, pred_labels_n_2, pred_labels_n_3, pred_labels_n_5, pred_labels_n_10 = predict_with_few_shot_prompting_prompt_type_3(train_x[:100], train_y[:100], "Train")



In [65]:
evaluate_meteor_bleu(clean_prediction(pred_labels_n_1), train_y[:100], "N 1", "Train")
evaluate_meteor_bleu(clean_prediction(pred_labels_n_2), train_y[:100], "N 2", "Train")
evaluate_meteor_bleu(clean_prediction(pred_labels_n_3), train_y[:100], "N 3", "Train")
evaluate_meteor_bleu(clean_prediction(pred_labels_n_5), train_y[:100], "N 5", "Train")
evaluate_meteor_bleu(clean_prediction(pred_labels_n_10), train_y[:100], "N 10", "Train")

Train: N 1
BLEU score: 0.09310531794506956
Meteor score: 2.8653

Train: N 2
BLEU score: 0.022241370569023488
Meteor score: 0.0

Train: N 3
BLEU score: 0.07677909691148554
Meteor score: 2.3707

Train: N 5
BLEU score: 0.08471203815341927
Meteor score: 2.7043999999999997

Train: N 10
BLEU score: 0.0891663972632699
Meteor score: 3.0614999999999997

