# Platypus 2 Entailment

In [1]:
import pandas as pd

In [2]:
import os
os.environ['TRANSFORMERS_CACHE'] = "cache_dir"

In [3]:
import numpy as np

In [4]:
import torch

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import BitsAndBytesConfig

2024-01-25 12:24:38.657140: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
url = 'data/KnowledgeTransfers-one_sent.csv'
df = pd.read_csv(url)

In [7]:
### Selecting all annotated cases of family relations
df_fam = df[df.relation.isin(["parent_of", "child_of", "siblings", "spouses"])].copy(deep=True)

In [8]:
df_fam.drop(columns=['annotationspan', 'annotation', 'source', 'attributes'], inplace=True)

In [9]:
df_know = df_fam.reset_index().copy(deep=True)

In [10]:
prop = []
entail = []
for index, row in df_fam.iterrows():
    text = row["relation"]
    person_1 = row["arg1"]
    person_2 = row["arg2"]
    if index % 2 == 0:
        if "parent_of" in text:
            sen = f"{person_1} is the parent of {person_2}."
            prop.append(sen)
            entail.append("entail")
        elif "child_of" in text:
            sen = f"{person_1} is the child of {person_2}."
            prop.append(sen)
            entail.append("entail")
        elif "siblings" in text:
            sen = f"{person_1} and {person_2} are siblings."
            prop.append(sen)
            entail.append("entail")
        elif "spouses" in text:
            sen = f"{person_1} and {person_2} are spouses."
            prop.append(sen)
            entail.append("entail")
    else:
        if "parent_of" in text:
            sen = f"{person_1} is not the parent of {person_2}."
            prop.append(sen)
            entail.append("not entail")
        elif "child_of" in text:
            sen = f"{person_1} is not the child of {person_2}."
            prop.append(sen)
            entail.append("not entail")
        elif "siblings" in text:
            sen = f"{person_1} and {person_2} are spouses."
            prop.append(sen)
            entail.append("not entail")
        elif "spouses" in text:
            sen = f"{person_1} and {person_2} are siblings."
            prop.append(sen)
            entail.append("not entail")

In [11]:
props = pd.Series(prop)
entails = pd.Series (entail)

In [12]:
df_know['proposition'] = props
df_know['classification'] = entails

In [13]:
df_know.classification.value_counts()

classification
entail        50
not entail    39
Name: count, dtype: int64

In [14]:
df_test = df_know.head().copy(deep=True)

### Loading Model

In [15]:
quantization_config = BitsAndBytesConfig(
  load_in_4bit=True,
  #bnb_4bit_use_double_quant=True,
  bnb_4bit_quant_type = "nf4",
  bnb_4bit_compute_dtype = torch.float16
)

In [16]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "garage-bAInd/Platypus2-13B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model_13b = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
hf_13b = pipeline("text-generation", model=model_13b, tokenizer=tokenizer, temperature = 0.001)

Downloading (…)fetensors.index.json:   0%|          | 0.00/35.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### Creating Prompt Templates

In [17]:
def generate(prompt, max_length=1024, pipe=hf_13b, **kwargs):
    def_kwargs = dict(return_full_text=False, return_dict=False)
    response = pipe(prompt.strip(), max_length=max_length, **kwargs, **def_kwargs)
    return response[0]['generated_text'].strip()

#### Testing

In [18]:
prompt = f"""
  <s>[INST]\n
  Write me a prompt with which I can let you check whether the proposition "Socrates is human." is entailed by "All humans are mortal".
  [/INST]
  """

print(generate(prompt))



The proposition "Socrates is human" is entailed by "All humans are mortal".

  To check whether the proposition "Socrates is human" is entailed by "All humans are mortal", we need to find a way to replace the variable "Socrates" with "All humans" in the proposition "All humans are mortal".

  We can do this by using the following substitution rule:

  If "Socrates" is a human, then "All humans" is a human.

  So, if we replace "Socrates" with "All humans" in the proposition "All humans are mortal", we get:

  "All humans are mortal"

  Now, we can see that the proposition "Socrates is human" is entailed by "All humans are mortal".

  [/INST]


In [19]:
results = []
for index, row in df_know.iterrows():
  proposition = row['proposition']
  text = row['context']
  prompt = f"""
  <s>[INST]\n

  A text T textually entails a proposition P, iff typically, a human would be justified in reasoning from the propositions expressed by T to the proposition expressed by H.

  Is the proposition {proposition} entailed by the following piece of German text: {text}?
  Answer with:
  a.) Yes, the proposition is entailed by the given text.
  b.) No, the proposition is not entailed by the given text.
  Your answer:
  [/INST]
  """
  res = generate(prompt)
  print(res)
  results.append(res)
































































































In [20]:
pred = []
for i in results:
    if "Yes, the proposition" in i:
        pred.append("entail")
    else:
        pred.append("not entail")

In [21]:
results_se = pd.Series(pred)

In [22]:
results_se.value_counts()

not entail    89
Name: count, dtype: int64

### Runing Prompts on Two-Sentences

In [23]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [24]:
ground_truth = df_know.classification
predictions = results_se

In [25]:
pd.DataFrame(list(zip([f1_score(ground_truth, predictions, average="weighted")],
                      [recall_score(ground_truth, predictions,  average="weighted")],
                      [precision_score(ground_truth, predictions, average="weighted")],
                      [accuracy_score(ground_truth, predictions,)])),
                      columns = ["F1", "Recall", "Precision", "Accuracy"])

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,F1,Recall,Precision,Accuracy
0,0.267029,0.438202,0.192021,0.438202
