# Spoiler generation

In [1]:
from spoiler_generation.utils.dataset_class import Dataset
import pandas as pd
from spoiler_generation.utils.stats import prepare_stats, calculate_bleu
import mlflow
from datasets import load_dataset

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Mateusz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Mateusz\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Mateusz\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
# The dataset is private
dataset = load_dataset("MateuszW/clickbait_spoiling_test")
test_df = pd.DataFrame(dataset["test"])

In [12]:
test_df.shape

(1000, 6)

In [None]:
models_outputs = load_dataset(
    "MateuszW/spoiler_generation",
    data_files={
        "baseline": "models_output/deberta-baseline_output.csv",
        "deberta_paqott": "models_output/deberta-paqott_output.csv",
        "llama_pwot": "models_output/llama-pwot_output.csv",
        "vicuna_pwot": "models_output/vicuna-pwot_output.csv",
        "opt_pwot": "models_output/opt-pwot_output.csv",
        "llama_pwt": "models_output/llama-pwt_output.csv",
        "vicuna_pwt": "models_output/vicuna-pwt_output.csv",
        "opt_pwt": "models_output/opt-pwt_output.csv",
        "vicuna_ppt": "models_output/vicuna-ppt_output.csv",
    },
)

In [5]:
models_outputs

DatasetDict({
    baseline: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    deberta_paqott: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    llama_pwot: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    vicuna_pwot: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    llama_pwt: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    vicuna_pwt: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    opt_pwt: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
    vicuna_ppt: Dataset({
        features: ['id', 'spoiler'],
        num_rows: 1000
    })
})

In [6]:
for output_name in models_outputs.keys():
    print(output_name, calculate_bleu(test_df, pd.DataFrame(models_outputs[output_name])))

baseline 0.3789261019434709
deberta_paqott 0.38212501050366765
llama_pwot 0.3210898228482464
vicuna_pwot 0.3305139476447337
llama_pwt 0.3758915129293975
vicuna_pwt 0.3869613288383001
opt_pwt 0.33757424975291617
vicuna_ppt 0.39901931587160233


### Per type

In [None]:
phrase_df = test_df.loc[test_df["type"] == "phrase"]
prepare_stats(phrase_df, pd.DataFrame(models_outputs["baseline"]))

In [12]:
phrase_df.shape

(423, 14)

In [10]:
bert_score_val = calculate_bertscore(phrase_df, output)
bert_metrics_mean(bert_score_val)

{'precision': 0.9423626642982447,
 'recall': 0.9435541247926987,
 'f1': 0.9425771849375244}

In [11]:
passage_df = test_df.loc[test_df["tags"] == "passage"]
calculate_bleu(passage_df, output)

0.23160457153151287

In [12]:
bert_score_val = calculate_bertscore(passage_df, output)
bert_metrics_mean(bert_score_val)

{'precision': 0.8977760011445796,
 'recall': 0.874899429363885,
 'f1': 0.8857718703172934}

In [13]:
multi_df = test_df.loc[test_df["tags"] == "multi"]
calculate_bleu(multi_df, output)

0.0766925771830039

In [14]:
bert_score_val = calculate_bertscore(multi_df, output)
bert_metrics_mean(bert_score_val)

{'precision': 0.8889400486288399,
 'recall': 0.8302655946249249,
 'f1': 0.8579819113358684}

## Simple transformers

In [10]:
import json

with open("/home/mateusz15wozny/master_thesis/data/test.json", "r") as f:
    test_data = json.load(f)

with open("/home/mateusz15wozny/master_thesis/data/st_output.json", "r") as f:
    output = json.load(f)

In [11]:
spoilers = []

for i in range(len(test_data)):
    spoilers.append(test_data[i]["qas"][0]["answers"][0]["text"])

In [12]:
test_df = pd.DataFrame(spoilers, columns=["spoiler"])

In [13]:
pred_spoilers_lists = []
for i in range(len(output)):
    pred_spoilers_lists.append(sorted(output[i]["answer"], key=lambda x: len(x)))

In [14]:
pred_spoilers = []
map_dict = {"phrase": 0, "passage": 1, "multi": 2}
for i in range(len(output)):
    index = min(map_dict[test_data[i]["tag"]], len(pred_spoilers_lists[i]) - 1)
    if pred_spoilers_lists[i][index] == "" and len(pred_spoilers_lists[i]) < index + 1:
        index += 1
    spoiler = pred_spoilers_lists[i][index]
    pred_spoilers.append(spoiler)

In [15]:
pred_df = pd.DataFrame(pred_spoilers, columns=["spoiler"])

In [17]:
# with mlflow.start_run(run_name="Simple transformers") as run:
#     run_id = run.info.run_id
stats = prepare_stats(test_df, pred_df)
log_to_mlflow("", stats, "afee9b1e11d148c28cbbff407896a53c")
# stats



## OPT

### OPT 1.3B peft

In [16]:
test = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl")
test_df = test.df
output = pd.read_csv("/home/mateusz15wozny/master_thesis/data/opt_generation/opt-1.3B_peft_output.csv").fillna("")

In [10]:
from stats import bert_metrics_mean, calculate_bertscore

with mlflow.start_run(run_name="OPT 1.3B peft") as run:
    run_id = run.info.run_id
    mlflow.log_param("output_dir", "/home/mateusz15wozny/master_thesis/models/opt-peft-v2")
stats = bert_metrics_mean(calculate_bertscore(test_df, output))
log_to_mlflow("", stats, run_id)

{'precision': 0.7948054869174958,
 'recall': 0.7785088661909103,
 'f1': 0.7825760175585746}

In [17]:
meteor = calculate_meteor(test_df, output)
log_to_mlflow("", {"meteor": meteor}, "b665a1fe3e3c49fa9bf54417bc52e7d1")

### OPT 13B peft

In [19]:
test = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl")
test_df = test.df
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/opt-peft-13B/output_v2.csv").fillna("")

In [16]:
log_to_mlflow(
    "/home/mateusz15wozny/master_thesis/models/opt-peft-13B",
    prepare_stats(test_df, output),
)
# prepare_stats(test_df, output)

In [20]:
meteor = calculate_meteor(test_df, output)
log_to_mlflow("/home/mateusz15wozny/master_thesis/models/opt-peft-13B", {"meteor": meteor})

### OPT 13B with type

In [2]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json")
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/opt-13b-with-type/output.csv")

In [3]:
test["spoiler"] = test["output"]

In [4]:
prepare_stats(test, output)

{'bleu': 0.33858605370621186,
 'precision': 0.8977188437581062,
 'recall': 0.8937850190401078,
 'f1': 0.8950101483464241,
 'exact_match': 0.248,
 'meteor': 0.4345849540181727}

In [5]:
log_to_mlflow("models/opt-13b-with-type", prepare_stats(test, output))

## Llama 13B

In [21]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json")
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/llama-13b-finetuned/output.csv")

In [22]:
test["spoiler"] = test["output"]

In [9]:
log_to_mlflow("models/llama-13b-finetuned", prepare_stats(test, output))



{'bleu': 0.3770354374520436,
 'precision': 0.8946512819528579,
 'recall': 0.9012328633069993,
 'f1': 0.897447925388813,
 'exact_match': 0.268}

#### without type

In [2]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json")
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/llama-13b-without-type/output.csv")
test["spoiler"] = test["output"]

In [4]:
prepare_stats(test, output)

{'bleu': 0.32212797292509027,
 'precision': 0.8899407665133476,
 'recall': 0.8877390567660332,
 'f1': 0.8882314289808273,
 'exact_match': 0.243,
 'meteor': 0.40783805965104264}

In [5]:
log_to_mlflow("models/llama-13b-without-type", prepare_stats(test, output))

## Vicuna 13B

In [24]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json")
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
test["spoiler"] = test["output"]

#### without typee

In [6]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json")
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-without-type/output.csv")
test["spoiler"] = test["output"]

In [7]:
from stats import calculate_bleu

calculate_bleu(test, output)

0.40015284106570886

In [4]:
prepare_stats(test, output)



{'bleu': 0.33163400043252533,
 'precision': 0.8863940492272377,
 'recall': 0.8869956572651863,
 'f1': 0.8860925446748733,
 'exact_match': 0.234,
 'meteor': 0.41757068006087084}

In [5]:
log_to_mlflow("models/vicuna-13b-without-type", prepare_stats(test, output))



#### new prompt

In [26]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json")
output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
test["spoiler"] = test["output"]

In [3]:
from stats import calculate_bleu

calculate_bleu(test, output)

0.40015284106570886

In [4]:
log_to_mlflow("models/vicuna-13b-new-prompt", prepare_stats(test, output))



In [27]:
meteor = calculate_meteor(test, output)
log_to_mlflow("models/vicuna-13b-new-prompt", {"meteor": meteor})

In [11]:
from stats import calculate_bleu

for typ in ["phrase", "passage", "multi"]:
    bleus["vicuna"].update({typ: calculate_bleu(test[test["type"] == typ], output)})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  true_spoiler["spoiler"] = true_spoiler["spoiler"].apply(preprocess_func)


In [46]:
vicuna_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
original_df = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl").df
output_dir = "/home/mateusz15wozny/master_thesis/models/deberta-v3-trial-3"
deberta_output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)

In [45]:
mixed = original_df.loc[test["id"] - 1].reset_index(drop=True)
vicuna_output = vicuna_output.loc[test["id"] - 1].reset_index(drop=True)
deberta_output = deberta_output.loc[test["id"] - 1].reset_index(drop=True)

In [66]:
merged_df = pd.concat(
    [
        deberta_output[mixed["tags"] != "multi"]["spoiler"],
        vicuna_output[mixed["tags"] == "multi"]["spoiler"],
    ]
).to_frame()

In [67]:
calculate_bleu(mixed, merged_df.sort_index())

0.39447341398784613

In [5]:
df = pd.read_csv("/home/mateusz15wozny/master_thesis/results/all_spoilers_with_gt.csv")

In [12]:
df["Vicuna"] = output["spoiler"]
df["type"] = test["type"]
df.to_csv("/home/mateusz15wozny/master_thesis/results/all_spoilers_with_gt.csv", index=False)

## Roberta

### Roberta squad v2 no Vicuna

In [28]:
import pandas as pd
from stats import prepare_stats, log_to_mlflow

test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test.json")
output_dir = "/home/mateusz15wozny/master_thesis/models/roberta-base-squad2-finetuned-no-vicuna"
output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)

In [30]:
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [15]:
stats = prepare_stats(df, output)
log_to_mlflow(output_dir, stats)

In [31]:
meteor = calculate_meteor(df, output)
log_to_mlflow(output_dir, {"meteor": meteor})

### Roberta squad with vicuna

In [32]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
output_dir = "/home/mateusz15wozny/master_thesis/models/roberta-base-squad2-finetuned"
output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)

In [33]:
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [18]:
stats = prepare_stats(df, output)
log_to_mlflow(output_dir, stats)

In [34]:
meteor = calculate_meteor(df, output)
log_to_mlflow(output_dir, {"meteor": meteor})

### Roberta with concatenated vicuna and clickbait

In [35]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test_concatenated.json")
output_dir = "/home/mateusz15wozny/master_thesis/models/roberta-base_concatenated-v2"
output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)

In [36]:
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [37]:
meteor = calculate_meteor(df, output)
log_to_mlflow(output_dir, {"meteor": meteor})

In [21]:
stats = prepare_stats(df, output)
log_to_mlflow(output_dir, stats)

### Roberta without finetuning

In [50]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
output = pd.read_json(
    "/home/mateusz15wozny/master_thesis/data/hf_qa/roberta_deepset_base_output.jsonl",
    lines=True,
)

In [51]:
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [52]:
# with mlflow.start_run(run_name="Roberta base without finetuning") as run:
#     run_id = run.info.run_id
meteor = calculate_meteor(df, output)
log_to_mlflow("", {"meteor": meteor}, "f8632407526f4f9a87d3e34a97e39381")
# log_to_mlflow("", stats, run_id)

## Deberta

### Deberta with vicuna

In [41]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
original_df = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl").df
output_dir = "/home/mateusz15wozny/master_thesis/models/deberta-v3-trial-3"
output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)

In [6]:
mixed = original_df.loc[test["id"] - 1].reset_index(drop=True)
prepare_stats(mixed[mixed["tags"] != "multi"], output)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  true_spoiler["spoiler"] = true_spoiler["spoiler"].apply(preprocess_func)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  true_spoiler["spoiler"] = true_spoiler["spoiler"].apply(preprocess_func)


{'bleu': 0.42424742287009065,
 'precision': 0.9133387114989963,
 'recall': 0.906181343211003,
 'f1': 0.9091662232940262,
 'exact_match': 0.32186732186732187}

In [40]:
output

Unnamed: 0,uuid,spoiler
0,1,soap
1,2,gwyneth paltrow
2,3,filming the next nicholas spark film
3,4,javale mcgee
4,5,cora
...,...,...
983,996,all along the watchtower
984,997,1 catch it on a good day
985,998,total lunar eclipse
986,999,tami erin


In [33]:
output

Unnamed: 0,uuid,spoiler
0,1,soap
1,2,gwyneth paltrow
2,3,filming the next nicholas spark film
3,4,javale mcgee
4,5,cora
...,...,...
983,996,all along the watchtower
984,997,1 catch it on a good day
985,998,total lunar eclipse
986,999,tami erin


In [42]:
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [29]:
stats = prepare_stats(df, output)
log_to_mlflow(output_dir, stats)

In [43]:
meteor = calculate_meteor(df, output)
log_to_mlflow(output_dir, {"meteor": meteor})

### Deberta without vicuna

In [44]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test.json")
output_dir = "/home/mateusz15wozny/master_thesis/models/deberta-v3-no-vicuna"
output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [52]:
stats = prepare_stats(df, output)
log_to_mlflow(output_dir, stats)

In [45]:
meteor = calculate_meteor(df, output)
log_to_mlflow(output_dir, {"meteor": meteor})

### Deberta with concatenated vicuna and clickbait

In [2]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test_concatenated.json")
output_dir = "/home/mateusz15wozny/master_thesis/models/deberta-base-from-zero"
output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [7]:
with mlflow.start_run(run_name="Deberta from zero with conc post and clickbait") as run:
    pass
log_to_mlflow("", prepare_stats(df, output), run.info.run_id)

### Not multi deberta

In [65]:
test_not_multi = pd.read_json("/home/mateusz15wozny/master_thesis/data/type_based_clf/not_multi/test.json")
output = pd.read_json(
    "/home/mateusz15wozny/master_thesis/models/not-multi-deberta/checkpoint-1293/output.jsonl",
    lines=True,
)
test_multi = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")

test_not_multi["spoiler"] = test_not_multi["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [69]:
vicuna_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
test_multi = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
original_df = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl").df
original_df = original_df.loc[test_multi["id"] - 1].reset_index(drop=True)
vicuna_output = vicuna_output.loc[test_multi["id"] - 1].reset_index(drop=True)[original_df["tags"] == "multi"]
test_multi = test_multi[original_df["tags"] == "multi"]
test_multi["spoiler"] = test_multi["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [None]:
output["spoiler"], vicuna_output["spoiler"].reset_index(drop=True)

In [70]:
ref = pd.concat([test_not_multi["spoiler"], test_multi["spoiler"]]).to_frame().reset_index(drop=True)
pred = pd.concat([output["spoiler"], vicuna_output["spoiler"]]).to_frame().reset_index(drop=True)

In [71]:
prepare_stats(ref, pred)

{'bleu': 0.4227937201349781,
 'precision': 0.9102973761828804,
 'recall': 0.908094981903972,
 'f1': 0.9086293058115461,
 'exact_match': 0.3026315789473684,
 'meteor': 0.5082325180947119}

In [52]:
log_to_mlflow("models/not-multi-deberta", {"meteor": calculate_meteor(ref, pred)})

### Not multi  deberta on full dataset

In [58]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test.json")
output = pd.read_json(
    "/home/mateusz15wozny/master_thesis/models/not-multi-deberta-v2/full_output.jsonl",
    lines=True,
)
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [61]:
results = prepare_stats(df, pd.DataFrame(output, columns=["spoiler"]))
search_df = pd.DataFrame([results])
path = "/home/mateusz15wozny/master_thesis/results/tables/qa_models_with_generated_questions.csv"
df = pd.read_csv(path)
search_df["model_name"] = "deberta-finetuned-with-post-question"
search_df["description"] = "Model finetune with clickbait post, generated questions and article"
search_df["use_type"] = True
search_df = search_df[["model_name", "bleu", "meteor", "exact_match", "recall", "f1", "precision", "description", "use_type"]]

pd.concat([df, search_df], ignore_index=True).to_csv(path, index=False)

### Deberta without finetuning

In [48]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
output = pd.read_json(
    "/home/mateusz15wozny/master_thesis/data/hf_qa/deberta_deepset_base_output.jsonl",
    lines=True,
)
df = pd.DataFrame()
df["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

In [49]:
meteor = calculate_meteor(df, output)
log_to_mlflow("", {"meteor": meteor}, "ce5e2977a5b14ff3bc2aed760d9060eb")

In [47]:
with mlflow.start_run(run_name="Deberta deepset without finetuning") as run:
    run_id = run.info.run_id
stats = prepare_stats(df, output)
log_to_mlflow("", stats, run_id)

# Regressor

In [72]:
from spoiler_generation.utils.dataset_class import Dataset
import pandas as pd

In [94]:
vicuna_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
vicunav2_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
llama_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/llama-13b-finetuned/output.csv")
opt_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/opt-13b-with-type/output.csv").fillna("")

test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
original_df = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl").df
output_dir = "/home/mateusz15wozny/master_thesis/models/deberta-v3-trial-3"
deberta_output = pd.read_json(f"{output_dir}/output.jsonl", lines=True)
deberta_conc = pd.read_json(
    "/home/mateusz15wozny/master_thesis/models/not-multi-deberta-v2/full_output.jsonl",
    lines=True,
)

In [7]:
deberta_baseline = pd.read_json(
    "/home/mateusz15wozny/master_thesis/data/baseline/baseline2_output.jsonl",
    lines=True,
)
deberta_baseline = deberta_baseline.loc[test["id"] - 1].reset_index(drop=True)

In [97]:
test["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))
vicuna_output = vicuna_output.loc[test["id"] - 1].reset_index(drop=True)
vicunav2_output = vicunav2_output.loc[test["id"] - 1].reset_index(drop=True)
llama_output = llama_output.loc[test["id"] - 1].reset_index(drop=True)
opt_output = opt_output.loc[test["id"] - 1].reset_index(drop=True)

In [76]:
deberta_conc

Unnamed: 0,uuid,spoiler
0,1,soap
1,2,gwyneth paltrow
2,3,hiddleswift are actually just filming the next...
3,4,javale mcgee
4,5,cora
...,...,...
983,996,all along the watchtower
984,997,1 catch it on a good day
985,998,total lunar eclipse
986,999,tami erin


In [104]:
merged_df = pd.DataFrame(
    zip(
        llama_output["spoiler"].apply(Dataset.preprocess_func),
        vicuna_output["spoiler"].apply(Dataset.preprocess_func),
        opt_output["spoiler"].apply(Dataset.preprocess_func),
        # vicunav2_output["spoiler"].apply(Dataset.preprocess_func)
    )
)

In [99]:
merged_df.shape

(988, 4)

### Selected using regressor between not multi deberta, llama and vicuna

In [117]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test_2.json")
test_not_multi = pd.read_json("/home/mateusz15wozny/master_thesis/data/type_based_clf/not_multi/test.json")

not_multi_vicunav2_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
not_multi_vicunav2_output = (
    not_multi_vicunav2_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] != "multi"].reset_index(drop=True)
)
multi_vicunav2_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
multi_vicunav2_output = multi_vicunav2_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] == "multi"].reset_index(drop=True)
multi_vicuna_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
multi_vicuna_output = multi_vicuna_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] == "multi"].reset_index(drop=True)
multi_llama_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/llama-13b-finetuned/output.csv")
multi_llama_output = multi_llama_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] == "multi"].reset_index(drop=True)
not_multi_output = pd.read_json(
    "/home/mateusz15wozny/master_thesis/models/not-multi-deberta/checkpoint-1293/output.jsonl",
    lines=True,
)

In [124]:
multi_merged_df = pd.DataFrame(
    zip(
        multi_vicuna_output["spoiler"].apply(Dataset.preprocess_func),
        multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func),
        # multi_llama_output["spoiler"].apply(Dataset.preprocess_func),
    )
)

In [125]:
not_multi_merged_df = pd.DataFrame(
    zip(
        not_multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func),
        not_multi_output["spoiler"].apply(Dataset.preprocess_func),
    )
)

##### Check best score selecting by max bleu per example

In [2]:
from evaluate import load

bleu = load("bleu")

In [21]:
test_not_multi["spoiler"] = test_not_multi["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))
not_multi_merged_df = pd.DataFrame(
    zip(
        not_multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func),
        not_multi_output["spoiler"].apply(Dataset.preprocess_func),
        test_not_multi["spoiler"],
    )
)


def calc_bleu(true_spoiler, predicted_spoiler):
    bleu_score = []
    for reference, hypothesis in zip(true_spoiler, predicted_spoiler):
        try:
            val = bleu.compute(
                predictions=[hypothesis],
                references=[[reference]],
                max_order=min(4, len(reference.split(" "))),
            )["bleu"]
        except ZeroDivisionError:
            val = 0
        bleu_score.append(val)
    return bleu_score


best_bleu_not_multi = pd.DataFrame(
    zip(
        calc_bleu(test_not_multi["spoiler"], not_multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func)),
        calc_bleu(test_not_multi["spoiler"], not_multi_output["spoiler"].apply(Dataset.preprocess_func)),
    )
)
best_bleu_multi = pd.DataFrame(
    zip(
        calc_bleu(test_multi["spoiler"], multi_vicuna_output["spoiler"].apply(Dataset.preprocess_func)),
        calc_bleu(test_multi["spoiler"], multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func)),
        calc_bleu(test_multi["spoiler"], multi_llama_output["spoiler"].apply(Dataset.preprocess_func)),
    )
)

In [23]:
best_bleu_not_multi.mean(axis=0), best_bleu_multi.mean(axis=0)

(0    0.434904
 1    0.458596
 dtype: float64,
 0    0.255524
 1    0.260252
 2    0.255637
 dtype: float64)

In [22]:
best_bleu_not_multi.max(axis=1).mean(), best_bleu_multi.max(axis=1).mean()

(0.5816430706710656, 0.35554409568734235)

In [25]:
best_possible_bleu = pd.concat([best_bleu_not_multi.max(axis=1), best_bleu_multi.max(axis=1)]).mean()

##### Best possible bleu to achive with ~multi deberta and vicuna and multi vicuna v1,v2 and llama

0.5418240204208956

In [89]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer


model_path = "/home/mateusz15wozny/master_thesis/spoiler_generation/regressor/deberta-base-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=1).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [79]:
PROMPT = "For given question:\n {} \nanswer:\n {} \ncontext:\n{}"

In [126]:
import torch
from tqdm import tqdm

not_multi_selected_spoilers = []
for n in tqdm(range(not_multi_merged_df.shape[0])):
    spoilers = not_multi_merged_df.loc[n].tolist()
    data = [PROMPT.format(test_not_multi.loc[n, "question"], i, test_not_multi.loc[n, "context"]) for i in spoilers]
    input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")

    with torch.no_grad():
        outputs = model(**input_ids)

    not_multi_selected_spoilers.append(spoilers[outputs.logits.argmax()])

100%|██████████| 814/814 [01:39<00:00,  8.16it/s]


In [127]:
import torch
from tqdm import tqdm

multi_selected_spoilers = []
test_multi = test[test["type"] == "multi"]
tmp_multi = test_multi.reset_index(drop=True)
for n in tqdm(range(multi_merged_df.shape[0])):
    spoilers = multi_merged_df.loc[n].tolist()
    data = [PROMPT.format(tmp_multi.loc[n, "question"], i, tmp_multi.loc[n, "context"]) for i in spoilers]
    input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")

    with torch.no_grad():
        outputs = model(**input_ids)

    multi_selected_spoilers.append(spoilers[outputs.logits.argmax()])

100%|██████████| 174/174 [00:21<00:00,  7.94it/s]


In [129]:
# vicuna_output = pd.read_csv(
#     "/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv"
# )

# vicuna_output = vicuna_output.loc[test["id"] - 1].reset_index(drop=True)[
#     test["type"] == "multi"
# ]
test_multi["spoiler"] = test_multi["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))
test_not_multi["spoiler"] = test_not_multi["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_multi["spoiler"] = test_multi["answers"].apply(


In [130]:
from stats import prepare_stats

pred = pd.concat(
    [
        pd.DataFrame(not_multi_selected_spoilers, columns=["spoiler"]),
        pd.DataFrame(multi_selected_spoilers, columns=["spoiler"]),
    ]
).reset_index(drop=True)
ref = pd.concat([test_not_multi, test_multi]).reset_index(drop=True)
prepare_stats(ref, pred)

{'bleu': 0.44303314639062596,
 'precision': 0.9096897679422549,
 'recall': 0.913005078852418,
 'f1': 0.9108124781354718,
 'exact_match': 0.3208502024291498,
 'meteor': 0.5301155410177051}

In [109]:
results = prepare_stats(ref, pred)
search_df = pd.DataFrame([results])

In [111]:
path = "/home/mateusz15wozny/master_thesis/results/tables/regressor.csv"
df = pd.read_csv(path)
search_df["model_name"] = "regressor-with-deberta-and-llms-and-new-data"
search_df[
    "description"
] = "Use regressor finetuned on data from best models to select spoiler: for types phrase and passage from deberta and vicuna (prompt per type), for multi from llama and 2 vicuna"
search_df = search_df[["model_name", "bleu", "meteor", "exact_match", "recall", "f1", "precision", "description"]]

pd.concat([df, search_df], ignore_index=True).to_csv(path, index=False)

In [15]:
with mlflow.start_run(run_name="not multi deberta, 2 vicuna, llama and regressor v3") as run:
    run_id = run.info.run_id

log_to_mlflow("", prepare_stats(ref, pred), run_id)

### Others

In [8]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer


model_path = "/home/mateusz15wozny/master_thesis/spoiler_generation/regressor/best-model-deberta-finetune-v2"
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=1).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [9]:
PROMPT = "For given question:\n {} \nanswer:\n {} \ncontext:\n{}"

In [108]:
n = 21
spoilers = merged_df.loc[n].tolist()
data = [PROMPT.format(test.loc[n, "question"], i, test.loc[n, "context"]) for i in spoilers]
input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")

What is the coolest feature that the Oregon Home includes?


In [119]:
tmp_test = test[original_df.loc[test["id"] - 1].reset_index(drop=True)["tags"] != "multi"].reset_index(drop=True)

In [105]:
import torch
from tqdm import tqdm

selected_spoilers = []
for n in tqdm(range(merged_df.shape[0])):
    spoilers = merged_df.loc[n].tolist()
    data = [PROMPT.format(test.loc[n, "question"], i, test.loc[n, "context"]) for i in spoilers]
    input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")
    with torch.no_grad():
        outputs = model(**input_ids)

    selected_spoilers.append(spoilers[outputs.logits.argmax()])

100%|██████████| 988/988 [02:57<00:00,  5.56it/s]


In [43]:
test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
original_df = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl").df
tmp = original_df.loc[test["id"] - 1].reset_index(drop=True)
pd.concat([test[tmp["tags"] != "multi"], test[tmp["tags"] == "multi"]])

In [None]:
pred.loc[pd.concat([test[tmp["tags"] != "multi"], test[tmp["tags"] == "multi"]]).index]

In [51]:
from stats import calculate_bleu

calculate_bleu(
    test,
    pred.loc[pd.concat([test[tmp["tags"] != "multi"], test[tmp["tags"] == "multi"]]).index],
)

0.0025475436780943023

In [17]:
with mlflow.start_run(run_name="Use deberta v2 regressor for llama, vicuna and opt") as run:
    run_id = run.info.run_id

In [106]:
results = prepare_stats(test, pd.DataFrame(selected_spoilers, columns=["spoiler"]))

In [107]:
results

{'bleu': 0.3933260781647434,
 'precision': 0.9020928325440719,
 'recall': 0.9053516111634521,
 'f1': 0.9031291687295504,
 'exact_match': 0.2783400809716599,
 'meteor': 0.4940156110653304}

In [108]:
results = prepare_stats(test, pd.DataFrame(selected_spoilers, columns=["spoiler"]))
search_df = pd.DataFrame([results])

In [109]:
search_df

Unnamed: 0,bleu,precision,recall,f1,exact_match,meteor
0,0.393326,0.902093,0.905352,0.903129,0.27834,0.494016


In [110]:
# results = prepare_stats(test, pd.DataFrame(selected_spoilers, columns=["spoiler"]))
# search_df = pd.DataFrame([results])
path = "/home/mateusz15wozny/master_thesis/results/tables/regressor.csv"
df = pd.read_csv(path)
search_df["model_name"] = "regressor-v5"
search_df["description"] = "Use regressor for llama (one common prompt), vicuna (one common prompt) and opt(prompt per type)"
search_df = search_df[["model_name", "bleu", "meteor", "exact_match", "recall", "f1", "precision", "description"]]

pd.concat([df, search_df], ignore_index=True).to_csv(path, index=False)

In [41]:
meteor = calculate_meteor(test, pd.DataFrame(selected_spoilers, columns=["spoiler"]))
log_to_mlflow("", {"meteor": meteor}, "86b8ff84bf2e497e821722d14585904b")

In [42]:
merged_df = pd.concat(
    [
        deberta_baseline[mixed["tags"] != "multi"]["spoiler"].apply(Dataset.preprocess_func),
        vicuna_output[mixed["tags"] == "multi"]["spoiler"].apply(Dataset.preprocess_func),
    ]
).to_frame()

In [136]:
with mlflow.start_run(run_name="Select non multi spoilers from baseline and multi from vicuna") as run:
    run_id = run.info.run_id

In [137]:
log_to_mlflow("", prepare_stats(test, merged_df), run_id)

# Classificator

In [2]:
PROMPT = "For given question:\n{question}\nchoose what answer is better\n\n\n## Answer1:\n{ans1}\n\n## Answer2:\n{ans2}\n\n## Context:\n{context}"

In [3]:
import pandas as pd
import re
from nltk.stem import WordNetLemmatizer


def preprocess_func(x: str) -> str:
    stemmer = WordNetLemmatizer()
    document = re.sub(r"\W", " ", x)
    document = re.sub(r"^b\s+", "", document)

    document = document.lower()
    document = document.split()

    document = [stemmer.lemmatize(word) for word in document]
    return " ".join(document)

In [25]:
from spoiler_generation.utils.dataset_class import Dataset
import pandas as pd

test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/test_2.json")
test_not_multi = pd.read_json("/home/mateusz15wozny/master_thesis/data/type_based_clf/not_multi/test.json")

not_multi_vicunav2_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
not_multi_vicunav2_output = (
    not_multi_vicunav2_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] != "multi"].reset_index(drop=True)
)
multi_vicunav2_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
multi_vicunav2_output = multi_vicunav2_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] == "multi"].reset_index(drop=True)
multi_vicuna_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
multi_vicuna_output = multi_vicuna_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] == "multi"].reset_index(drop=True)
multi_llama_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/llama-13b-finetuned/output.csv")
multi_llama_output = multi_llama_output.loc[test["id"] - 1].reset_index(drop=True)[test["type"] == "multi"].reset_index(drop=True)
not_multi_output = pd.read_json(
    "/home/mateusz15wozny/master_thesis/models/not-multi-deberta/checkpoint-1293/output.jsonl",
    lines=True,
)

In [4]:
vicuna_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-finetuned/output.csv")
vicunav2_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/vicuna-13b-new-prompt/output.csv")
llama_output = pd.read_csv("/home/mateusz15wozny/master_thesis/models/llama-13b-finetuned/output.csv")


test = pd.read_json("/home/mateusz15wozny/master_thesis/data/hf_qa/vicuna/test.json")
original_df = Dataset.from_jsonl("/home/mateusz15wozny/master_thesis/data/test.jsonl").df
deberta_full = pd.read_json(
    "/home/mateusz15wozny/master_thesis/models/not-multi-deberta-v2/full_output.jsonl",
    lines=True,
)

In [5]:
test["spoiler"] = test["answers"].apply(lambda x: " ".join([record["text"][0] for record in x]))
vicuna_output = vicuna_output.loc[test["id"] - 1].reset_index(drop=True)
vicunav2_output = vicunav2_output.loc[test["id"] - 1].reset_index(drop=True)
llama_output = llama_output.loc[test["id"] - 1].reset_index(drop=True)

In [6]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

model_path = "/home/mateusz15wozny/master_thesis/spoiler_generation/classificator/distilbert-base-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [26]:
llama_test = pd.read_json("/home/mateusz15wozny/master_thesis/data/llama_generation/test.json").loc[test["id"] - 1]
llama_test.rename(columns={"output": "spoiler"}, inplace=True)

In [27]:
llama_test_not_multi = llama_test[llama_test["type"] != "multi"].reset_index(drop=True)

In [28]:
llama_test_multi = llama_test[llama_test["type"] == "multi"].reset_index(drop=True)

In [29]:
not_multi_merged_df = pd.DataFrame(
    zip(not_multi_output["spoiler"].apply(Dataset.preprocess_func), not_multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func)),
    columns=["ans1", "ans2"],
).reset_index(drop=True)

In [30]:
import torch
from tqdm import tqdm

not_multi_selected_spoilers = []
for n in tqdm(range(not_multi_merged_df.shape[0])):
    spoilers = not_multi_merged_df.loc[n].tolist()
    if spoilers[0] == "":
        not_multi_selected_spoilers.append(spoilers[1])
        continue
    if spoilers[1] == "":
        not_multi_selected_spoilers.append(spoilers[0])
        continue
    data = [PROMPT.format(question=llama_test_not_multi.iloc[n, 1], ans1=spoilers[0], ans2=spoilers[1], context=llama_test.iloc[n, 0])]
    input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")
    with torch.no_grad():
        outputs = model(**input_ids)

    not_multi_selected_spoilers.append(spoilers[1 - outputs.logits.argmax()])

100%|██████████| 814/814 [00:17<00:00, 47.46it/s]


In [31]:
llama_test_not_multi.rename(columns={"output": "spoiler"}, inplace=True)
llama_test_not_multi.reset_index(drop=True, inplace=True)

In [42]:
multi_merged_df = pd.DataFrame(
    zip(
        multi_vicunav2_output["spoiler"].apply(Dataset.preprocess_func),
        multi_llama_output["spoiler"].apply(Dataset.preprocess_func),
    ),
    columns=["ans1", "ans2"],
).reset_index(drop=True)

In [43]:
import numpy as np
import itertools

multi_selected_spoilers = []
for n in tqdm(range(multi_merged_df.shape[0])):
    spoilers = multi_merged_df.loc[n].tolist()
    score = [0] * len(spoilers)
    for comb in itertools.combinations(range(len(spoilers)), 2):
        if spoilers[comb[0]] == "":
            score[comb[1]] += 1
            continue
        if spoilers[comb[1]] == "":
            score[comb[0]] += 1
            continue
        data = [
            PROMPT.format(
                question=llama_test_multi.iloc[n, 1], ans1=spoilers[comb[0]], ans2=spoilers[comb[1]], context=llama_test.iloc[n, 0]
            )
        ]
        input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")
        with torch.no_grad():
            outputs = model(**input_ids)
        if outputs.logits.argmax() == 1:
            score[comb[0]] += 1
        else:
            score[comb[1]] += 1

    multi_selected_spoilers.append(spoilers[np.argmax(score)])

100%|██████████| 174/174 [00:03<00:00, 44.88it/s]


In [45]:
merged_df = pd.DataFrame(
    zip(
        vicuna_output["spoiler"].apply(Dataset.preprocess_func),
        # vicunav2_output["spoiler"].apply(Dataset.preprocess_func),
        llama_output["spoiler"].apply(Dataset.preprocess_func),
    )
).reset_index(drop=True)

In [64]:
pd.read_csv("/home/mateusz15wozny/master_thesis/results/tables/regressor.csv").fillna("")

Unnamed: 0,regressor-3-models-v0,bleu,meteor,exact_match,recall,f1,precision,description,use_type,train_on_new_data,opt-13B-v1,llama-13B-v1,vicuna-13B-v1,vicuna-13B-v2,deberta-finetuned-v3
0,regressor-v0,0.394793,0.486069,0.281377,0.904139,0.90008,0.897016,Use regressor for llama and vicuna which have ...,False,False,,True,True,,
1,regressor-v1,0.421308,0.517158,0.29251,0.909114,0.90511,0.902059,"Use regressor for llama (one common prompt), v...",False,False,,True,True,True,
2,deberta-for-2-types-and-vicuna-for-1,0.422794,0.506421,0.302632,0.908095,0.908629,0.910297,Use deberta to generate spoilers for types phr...,True,False,,,True,,True
3,regressor-v2,0.448089,0.534837,0.319838,0.91354,0.911827,0.911183,Use regressor to select spoiler: for types phr...,True,False,,True,True,True,True
4,regressor-v3,0.41176,0.509925,0.285425,0.90831,0.904619,0.901942,Use regressor finetuned on data from best mode...,False,True,,True,True,,
5,regressor-v4,0.418017,0.515851,0.289474,0.909038,0.905336,0.902617,Use regressor finetuned on data from best mode...,False,True,,True,True,True,
6,regressor-v5,0.396488,0.493906,0.284413,0.905954,0.904187,0.903616,Use regressor finetuned on data from best mode...,False,True,True,True,True,,
7,regressor-v6,0.401651,0.499391,0.286437,0.907015,0.905197,0.90455,Use regressor finetuned on data from best mode...,False,True,True,True,True,True,
8,regressor-v7,0.444519,0.532024,0.32085,0.913477,0.911488,0.910561,Use regressor finetuned on data from best mode...,True,True,,True,True,True,True


In [46]:
import numpy as np
import itertools
from tqdm import tqdm

selected_spoilers = []
for n in tqdm(range(merged_df.shape[0])):
    spoilers = merged_df.loc[n].tolist()
    score = [0] * len(spoilers)
    for comb in itertools.combinations(range(len(spoilers)), 2):
        if spoilers[comb[0]] == "":
            score[comb[1]] += 1
            continue
        if spoilers[comb[1]] == "":
            score[comb[0]] += 1
            continue
        data = [
            PROMPT.format(question=llama_test.iloc[n, 1], ans1=spoilers[comb[0]], ans2=spoilers[comb[1]], context=llama_test.iloc[n, 0])
        ]
        input_ids = tokenizer(data, return_tensors="pt", padding="max_length", truncation=True).to("cuda")
        with torch.no_grad():
            outputs = model(**input_ids)
        if outputs.logits.argmax() == 1:
            score[comb[0]] += 1
        else:
            score[comb[1]] += 1

    selected_spoilers.append(spoilers[np.argmax(score)])

100%|██████████| 988/988 [00:21<00:00, 45.73it/s]


In [44]:
from stats import prepare_stats

pred = pd.concat(
    [
        pd.DataFrame(not_multi_selected_spoilers, columns=["spoiler"]),
        pd.DataFrame(multi_selected_spoilers, columns=["spoiler"]),
    ]
).reset_index(drop=True)
ref = pd.concat([llama_test_not_multi, llama_test_multi]).reset_index(drop=True)
prepare_stats(ref, pred)

{'bleu': 0.43455372191988867,
 'precision': 0.9114617712584584,
 'recall': 0.9109391218978866,
 'f1': 0.9106571890323268,
 'exact_match': 0.31275303643724695,
 'meteor': 0.5198940468784699}

In [49]:
llama_test.reset_index(drop=True, inplace=True)

In [50]:
results = prepare_stats(llama_test, pd.DataFrame(selected_spoilers, columns=["spoiler"]))
search_df = pd.DataFrame([results])



In [51]:
results

{'bleu': 0.4005121569512152,
 'precision': 0.9002666785287471,
 'recall': 0.9057403748575975,
 'f1': 0.9025039885811478,
 'exact_match': 0.2834008097165992,
 'meteor': 0.4927087598212761}

In [52]:
path = "/home/mateusz15wozny/master_thesis/results/tables/classificator.csv"
df = pd.read_csv(path)
search_df["model_name"] = "classifier-for-2-models-not-use-type"
search_df["description"] = "Use classifier for llama and vicuna which have one common prompt for all spoiler types"
search_df = search_df[["model_name", "bleu", "meteor", "exact_match", "recall", "f1", "precision", "description"]]

pd.concat([df, search_df], ignore_index=True).to_csv(path, index=False)