## **Question Answering with Llama 3.2 1B Model**




In [1]:
#load Model
import torch
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.2.12: Fast Llama patching. Transformers: 4.46.1.
   \\   /|    GPU: NVIDIA GeForce RTX 4070 Ti SUPER. Max memory: 15.693 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [73]:
from unsloth.chat_templates import get_chat_template
import re
import base64
from PIL import Image
import io
import numpy as np
from torchvision import transforms

# set the model for inference
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
FastVisionModel.for_inference(modelV) # Enable for inference!

# Configure tokenizer with chat template
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",  # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},  # ShareGPT style
    map_eos_token=True,  # Maps <|im_end|> to </s> instead
)

def get_summary(text):
    messages = [
        {"from": "system", "value": "You are a helpful AI assistant."},
        {"from": "human", "value": f"Summarize the following text: {text}"},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(input_ids=inputs, max_new_tokens=512, use_cache=True)
        response = tokenizer.batch_decode(outputs)[0]
        
        # Extract the content after "<|im_start|>assistant" and before "<|im_end|>"
        pattern = r"<\|im_start\|>assistant\n(.*?)<\|im_end\|>"
        matches = re.search(pattern, response, re.DOTALL)
        
        if matches:
            return matches.group(1).strip()  # Extract matched content and remove extra spaces
        else:
            return None
            
def base64_to_tensor(base64_string):
    # Decode base64 string to bytes
    image_data = base64.b64decode(base64_string)
    # Convert bytes to image using PIL
    image = Image.open(io.BytesIO(image_data)).convert("RGB")  # Convert to RGB for LLaMA
    return image
    
def analyze_image(image_tensor):
    instruction = "Describe accurately and concisely what you see in this image."

    messages = [
        {"role": "user", "content": [
            {"type": "image"},
            {"type": "text", "text": instruction}
        ]}
    ]
    
    input_text = tokenizerV.apply_chat_template(messages, add_generation_prompt=True)
    
    inputs = tokenizerV(
        image_tensor,
        input_text,
        add_special_tokens=False,
        return_tensors="pt",
    ).to("cuda")

    # Remove TextStreamer, no longer necessary
    with torch.no_grad():
        outputs = modelV.generate(**inputs, max_new_tokens=300, temperature=0.5)
        response = tokenizerV.decode(outputs[0], skip_special_tokens=True)
    return response.split("assistant\n\n")[1].strip() if "assistant\n\n" in response else response
    
def summarize_row(row):
    if row["element_type"] == "text":
        return get_summary(row["content"])
    elif row["element_type"] == "image":
        return analyze_image(base64_to_tensor(row["content"]))
    else:
        return None
        
# ###--Uncomment for summarizing using llama----#####
# df["summary"] = df.apply(summarize_row, axis=1)
# df.to_parquet(os.path.join(output_dir, "extracted_data_summary_llama.parquet"), index=False)


In [74]:
# Load the Parquet file into a DataFrame
df = pd.read_parquet(os.path.join(output_dir, "extracted_data_summary_llama.parquet"))
# Check the loaded DataFrame
df

Unnamed: 0,pdf_name,element_type,content,summary
0,Sample_Table.pdf,table,gASV6wIAAAAAAAB9lCiMBHR5cGWUjAVUYWJsZZSMCmVsZW...,
1,chap_1_content.pdf,text,1 www.tntextbooks.in LAWS OF MOTION\n\nLearnin...,Here's a summary of the text:\n\nThe text disc...
2,chap_1_content.pdf,text,in this unit.\n\n1\n\n| | 10th_Science_Unit-1....,The text describes the concepts of mechanics i...
3,chap_1_content.pdf,text,1 .2 .1 T ypes of I nertia\n\na) Inertia of re...,The text appears to be a summary of concepts r...
4,chap_1_content.pdf,text,forces.\n\n(a) Like parallel forces: Two or mo...,The text describes the concept of forces and r...
5,chap_1_content.pdf,text,1 .4 .5 Rotating Effect of Force\n\nThe door c...,Here's a summary of the text:\n\nThe rotating ...
6,chap_1_content.pdf,text,1 .4 .7 Application of T orque\n\n1. Gears:\n\...,The text appears to be a summary of the laws o...
7,chap_1_content.pdf,text,1 .5 NE W T ONâ€™ S S E C OND L A W OF MOT I ON\...,The given text is a summary of Newton's Second...
8,chap_1_content.pdf,text,1 .7 NE W T ONâ€™ S T H I R D L A W OF MOT I ON\...,The text explains Newton's third law of motion...
9,chap_1_content.pdf,text,Figure 1.7 Conservation of\n\nlinear momentum\...,The text is a proof of the law of conservation...
