# Platypus 2 w/ person

## 7B - Context window 1

In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = "cache_dir"
import transformers
from transformers import pipeline
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import re
import pandas as pd
import torch
import langchain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import sklearn
import time
from sacrebleu import BLEU
import tqdm.contrib
df = pd.read_csv("./data/KnowledgeTransfers-one_sent.csv", sep=",")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "garage-bAInd/Platypus2-7B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model_bf16 = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
def run_llm(snippets, persons1, persons2):
    outputs = []
    for snippet, person1, person2 in zip(snippets, persons1, persons2):
        pipe = pipeline(
        "text-generation", model=model_bf16, tokenizer=tokenizer, max_new_tokens=200, temperature = 0.001
        )
        hf = HuggingFacePipeline(pipeline=pipe)
        template = """
        ####
        Instruction: You are a literary scholar.
        What is the family relation between {person1} and {person2} in the German text {drama_snippet}?
        The possible family relations are parent, child, uncle, siblings, cousins.
        Answer in a single sentence in the following format: The family relation between {person1} and {person2} is >>correct family relation<<.
        ####
        """
        drama_snippet = str(snippet)
        person1 = str(person1)
        person2 = str(person2)
        prompt = PromptTemplate(
            input_variables=["drama_snippet", "person1", "person2"],
            template=template,
        )
        chain = LLMChain(llm=hf, prompt=prompt)
        output = chain.run({"drama_snippet": {drama_snippet}, "person1": person1, "person2": person2})
        outputs.append(output)
    return outputs
df_sample = df.loc[(df.annotation != "no-transfer") & (df.relation.isin(["parent_of", "child_of", "siblings", "uncle_of", "cousins"]))]
llm_outputs = run_llm(df_sample.context, df_sample.arg1, df_sample.arg2)
predictions_relation = []
for o in llm_outputs:
    if "parent" in o:
        prediction_relation = ["parent_of"]
    elif "child" in o:
        prediction_relation = ["child_of"]
    elif "siblings" in o:
        prediction_relation = ["siblings"]
    elif "uncle" in o:
        prediction_relation = ["uncle_of"]
    elif "cousins" in o:
        prediction_relation = ["cousins"]
    else:
        prediction_relation = ["else"]
    predictions_relation.extend(prediction_relation)
results_relation = pd.DataFrame(list(zip(list(df_sample.relation), predictions_relation)), columns = ["key", "pred"])
pd.DataFrame(list(zip([sklearn.metrics.f1_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.recall_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.precision_score(results_relation.key, results_relation.pred, average="weighted")],
             [sklearn.metrics.accuracy_score(results_relation.key, results_relation.pred)])), 
             columns = ["F1", "Recall", "Precision", "Accuracy"])

2023-12-12 16:38:07.774487: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,F1,Recall,Precision,Accuracy
0,0.257109,0.190476,0.506378,0.190476


## 7B - Context window 2

In [2]:
import os
os.environ['TRANSFORMERS_CACHE'] = "cache_dir"
import transformers
from transformers import pipeline
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import re
import pandas as pd
import torch
import langchain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import sklearn
import time
from sacrebleu import BLEU
import tqdm.contrib
df = pd.read_csv("./data/KnowledgeTransfers-two_sent.csv", sep=",")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "garage-bAInd/Platypus2-7B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model_bf16 = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
def run_llm(snippets, persons1, persons2):
    outputs = []
    for snippet, person1, person2 in zip(snippets, persons1, persons2):
        pipe = pipeline(
        "text-generation", model=model_bf16, tokenizer=tokenizer, max_new_tokens=200, temperature = 0.001
        )
        hf = HuggingFacePipeline(pipeline=pipe)
        template = """
        ####
        Instruction: You are a literary scholar.
        What is the family relation between {person1} and {person2} in the German text {drama_snippet}?
        The possible family relations are parent, child, uncle, siblings, cousins.
        Answer in a single sentence in the following format: The family relation between {person1} and {person2} is >>correct family relation<<.
        ####
        """
        drama_snippet = str(snippet)
        person1 = str(person1)
        person2 = str(person2)
        prompt = PromptTemplate(
            input_variables=["drama_snippet", "person1", "person2"],
            template=template,
        )
        chain = LLMChain(llm=hf, prompt=prompt)
        output = chain.run({"drama_snippet": {drama_snippet}, "person1": person1, "person2": person2})
        outputs.append(output)
    return outputs
df_sample = df.loc[(df.annotation != "no-transfer") & (df.relation.isin(["parent_of", "child_of", "siblings", "uncle_of", "cousins"]))]
llm_outputs = run_llm(df_sample.context, df_sample.arg1, df_sample.arg2)
predictions_relation = []
for o in llm_outputs:
    if "parent" in o:
        prediction_relation = ["parent_of"]
    elif "child" in o:
        prediction_relation = ["child_of"]
    elif "siblings" in o:
        prediction_relation = ["siblings"]
    elif "uncle" in o:
        prediction_relation = ["uncle_of"]
    elif "cousins" in o:
        prediction_relation = ["cousins"]
    predictions_relation.extend(prediction_relation)
results_relation = pd.DataFrame(list(zip(list(df_sample.relation), predictions_relation)), columns = ["key", "pred"])
pd.DataFrame(list(zip([sklearn.metrics.f1_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.recall_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.precision_score(results_relation.key, results_relation.pred, average="weighted")],
             [sklearn.metrics.accuracy_score(results_relation.key, results_relation.pred)])), 
             columns = ["F1", "Recall", "Precision", "Accuracy"])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,F1,Recall,Precision,Accuracy
0,0.293283,0.333333,0.309524,0.333333


## 13B - Context window 1

In [3]:
import os
os.environ['TRANSFORMERS_CACHE'] = "cache_dir"
import transformers
from transformers import pipeline
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import re
import pandas as pd
import torch
import langchain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import sklearn
import time
from sacrebleu import BLEU
import tqdm.contrib
df = pd.read_csv("./data/KnowledgeTransfers-one_sent.csv", sep=",")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "garage-bAInd/Platypus2-13B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model_bf16 = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
def run_llm(snippets, persons1, persons2):
    outputs = []
    for snippet, person1, person2 in zip(snippets, persons1, persons2):
        pipe = pipeline(
        "text-generation", model=model_bf16, tokenizer=tokenizer, max_new_tokens=200, temperature = 0.001
        )
        hf = HuggingFacePipeline(pipeline=pipe)
        template = """
        ####
        Instruction: You are a literary scholar.
        What is the family relation between {person1} and {person2} in the German text {drama_snippet}?
        The possible family relations are parent, child, uncle, siblings, cousins.
        Answer in a single sentence in the following format: The family relation between {person1} and {person2} is >>correct family relation<<.
        Do NOT write code.
        Do NOT write anything before or after the answer sentence.
        ####
        """
        drama_snippet = str(snippet)
        person1 = str(person1)
        person2 = str(person2)
        prompt = PromptTemplate(
            input_variables=["drama_snippet", "person1", "person2"],
            template=template,
        )
        chain = LLMChain(llm=hf, prompt=prompt)
        output = chain.run({"drama_snippet": {drama_snippet}, "person1": person1, "person2": person2})
        outputs.append(output)
    return outputs
df_sample = df.loc[(df.annotation != "no-transfer") & (df.relation.isin(["parent_of", "child_of", "siblings", "uncle_of", "cousins"]))]
llm_outputs = run_llm(df_sample.context, df_sample.arg1, df_sample.arg2)
predictions_relation = []
for o in llm_outputs:
    if "parent" in o:
        prediction_relation = ["parent_of"]
    elif "child" in o:
        prediction_relation = ["child_of"]
    elif "siblings" in o:
        prediction_relation = ["siblings"]
    elif "uncle" in o:
        prediction_relation = ["uncle_of"]
    elif "cousins" in o:
        prediction_relation = ["cousins"]
    predictions_relation.extend(prediction_relation)
results_relation = pd.DataFrame(list(zip(list(df_sample.relation), predictions_relation)), columns = ["key", "pred"])
pd.DataFrame(list(zip([sklearn.metrics.f1_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.recall_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.precision_score(results_relation.key, results_relation.pred, average="weighted")],
             [sklearn.metrics.accuracy_score(results_relation.key, results_relation.pred)])), 
             columns = ["F1", "Recall", "Precision", "Accuracy"])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,F1,Recall,Precision,Accuracy
0,0.411558,0.464286,0.49604,0.464286


## 13B - Context window 2

In [4]:
import os
os.environ['TRANSFORMERS_CACHE'] = "cache_dir"
import transformers
from transformers import pipeline
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import re
import pandas as pd
import torch
import langchain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import sklearn
import time
from sacrebleu import BLEU
import tqdm.contrib
df = pd.read_csv("./data/KnowledgeTransfers-two_sent.csv", sep=",")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "garage-bAInd/Platypus2-13B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model_bf16 = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
def run_llm(snippets, persons1, persons2):
    outputs = []
    for snippet, person1, person2 in zip(snippets, persons1, persons2):
        pipe = pipeline(
        "text-generation", model=model_bf16, tokenizer=tokenizer, max_new_tokens=200, temperature = 0.001
        )
        hf = HuggingFacePipeline(pipeline=pipe)
        template = """
        ####
        Instruction: You are a literary scholar.
        What is the family relation between {person1} and {person2} in the German text {drama_snippet}?
        The possible family relations are parent, child, uncle, siblings, cousins.
        Answer in a single sentence in the following format: The family relation between {person1} and {person2} is >>correct family relation<<.
        Do NOT write code.
        Do NOT write anything before or after the answer sentence.
        ####
        """
        drama_snippet = str(snippet)
        person1 = str(person1)
        person2 = str(person2)
        prompt = PromptTemplate(
            input_variables=["drama_snippet", "person1", "person2"],
            template=template,
        )
        chain = LLMChain(llm=hf, prompt=prompt)
        output = chain.run({"drama_snippet": {drama_snippet}, "person1": person1, "person2": person2})
        outputs.append(output)
    return outputs
df_sample = df.loc[(df.annotation != "no-transfer") & (df.relation.isin(["parent_of", "child_of", "siblings", "uncle_of", "cousins"]))]
llm_outputs = run_llm(df_sample.context, df_sample.arg1, df_sample.arg2)
predictions_relation = []
for o in llm_outputs:
    if "parent" in o:
        prediction_relation = ["parent_of"]
    elif "child" in o:
        prediction_relation = ["child_of"]
    elif "siblings" in o:
        prediction_relation = ["siblings"]
    elif "uncle" in o:
        prediction_relation = ["uncle_of"]
    elif "cousins" in o:
        prediction_relation = ["cousins"]
    predictions_relation.extend(prediction_relation)
results_relation = pd.DataFrame(list(zip(list(df_sample.relation), predictions_relation)), columns = ["key", "pred"])
pd.DataFrame(list(zip([sklearn.metrics.f1_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.recall_score(results_relation.key, results_relation.pred, average="weighted")], 
             [sklearn.metrics.precision_score(results_relation.key, results_relation.pred, average="weighted")],
             [sklearn.metrics.accuracy_score(results_relation.key, results_relation.pred)])), 
             columns = ["F1", "Recall", "Precision", "Accuracy"])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,F1,Recall,Precision,Accuracy
0,0.419667,0.464286,0.485267,0.464286
