In [1]:
import os
os.chdir('/mnt/openfact/users/msawinski/factue-task2')
# from factue.methods.llm_langchain.llm import Llm


In [2]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from transformers import BitsAndBytesConfig

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

In [4]:
# Load tokenizer first
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-3.1-8B-Instruct", 
    trust_remote_code=True
)

# Fix pad_token if needed
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8B-Instruct",
    trust_remote_code=True,
    quantization_config=quantization_config,
    # device_map="auto"
)

# Build HF pipeline
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=False,
    # temperature=0.0,
    repetition_penalty=1.03,
    return_full_text=False,
)

# Now build LangChain LLM
llm = HuggingFacePipeline(pipeline=pipe)

chat_model = ChatHuggingFace(llm=llm)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [5]:

# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype="float16",
#     bnb_4bit_use_double_quant=True,
# )

# llm = HuggingFacePipeline.from_model_id(
#     model_id="meta-llama/Llama-3.1-8B-Instruct",
#     task="text-generation",
    
#     pipeline_kwargs=dict(
#         max_new_tokens=512,
#         do_sample=False,
#         repetition_penalty=1.03,
#         return_full_text=False,
#         tokenizer=tokenizer,
#     ),
#     model_kwargs={"quantization_config": quantization_config},
# )

# chat_model = ChatHuggingFace(llm=llm)

In [8]:
promt_format = """\nOutput ONLY JSON: in the format {{"rating": 0-10, "reason": "reason for rating","alternative": "claim text that better represents the text of post}}."""

prmpt_system = """You are an expert journalist. Your task is to validate if a claim was accuratley extracted from a post. Compare it and determine how accurately the claim reflects post. You outout rating  from 0 to 10, where 0 means the claim is completely different from original text and 10 means the claim most accurately reflects the post. Do not include additional information. Compare ONLY the claim text with post text."""


claim = """Photo shows Louis Armstrong as a child"""
post = """The Karnofsky Jewish family, who immigrated to the United States from Lithuania, employed a 7-year-old boy and adopted (so to speak) him into their home.  He was originally given homework to get food because he was a starving kid.  He remained under the Jewish families employ, until he was 12  Karnofsky gave him money to buy his first instrument, which was a common instrument in Jewish families.  They really admired his musical talent.Later, when he became a professional"""
prmpt_user = f"""POST:\n{post}\n\nCLAIM:\n{claim}"""

from langchain.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", prmpt_system+promt_format),
        ("human", prmpt_user),
    ]
)
prompt = prompt_template.invoke(
    {
        "post": post,
        "claim": claim
    })
print(chat_model.invoke(prompt).content)



{
  "rating": 0,
  "reason": "The claim does not mention the Karnofsky family or the boy's name, but rather shows a photo of Louis Armstrong as a child.",
  "alternative": "The Karnofsky Jewish family, who immigrated to the United States from Lithuania, employed a 7-year-old boy and adopted him into their home."
}
