In [20]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import fitz  # PyMuPDF
import faiss

In [21]:
model_id = "meta-llama/Llama-3.2-3B"  # Use a smaller version if needed

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.float16, device_map="auto"
)  # Use MPS backend


Loading checkpoint shards: 100%|██████████| 2/2 [00:16<00:00,  8.00s/it]
Some parameters are on the meta device because they were offloaded to the disk.


In [22]:
def extract_pdf_text(pdf_path):
    """Extracts text from each page of the PDF."""
    text_content = []
    with fitz.open(pdf_path) as doc:
        for page_num in range(doc.page_count):
            page = doc.load_page(page_num)
            text_content.append(page.get_text())
    return "\n".join(text_content)

# Extract text from your PDF
pdf_path = "Dataset/HP PWI - MS AI Capstone Project proposal 2024-07-22.pdf"
pdf_text = extract_pdf_text(pdf_path)
print(pdf_text[:500])  # Print a snippet of the extracted text


https://eecs.oregonstate.edu/capstone/submission/pages/viewSingleProject.php?id=yoAZcQaDi1oBECw1 
1/2 
 
MS AI Capstone project for 2024-2025 academic year 
22 July 2024 
Pieter van Zee 
pieter.van.zee@hp.com 
HP Corvallis PWI (PageWide Industrial) 
 
 
AI (LLM, RAG) Smart Advisor to 
Automate Control of a  Million-
Dollar Printing Press 
HP’s Corvallis site develops and produces multi-million-dollar printing presses larger than a shipping container that print from 
large rolls of paper at up to


In [23]:
# Ensure the tokenizer has a valid pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Option 1: Use eos_token as pad_token
    # Alternatively: tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Helper function to embed text using the Llama model
def embed_text(text):
    inputs = tokenizer(
        text, 
        return_tensors="pt", 
        padding=True, 
        truncation=True
    ).to("mps")  # Use MPS backend

    with torch.no_grad():
        outputs = model(**inputs)

    # Use logits instead of last_hidden_state
    embeddings = outputs.logits.mean(dim=1).cpu().numpy()
    return embeddings

# Example usage: Splitting PDF into chunks and indexing with FAISS
pdf_chunks = [pdf_text[i:i + 512] for i in range(0, len(pdf_text), 512)]

# Initialize FAISS index
dimension = embed_text("sample text").shape[1]
index = faiss.IndexFlatL2(dimension)

# Add chunks to the FAISS index
for chunk in pdf_chunks:
    embedding = embed_text(chunk)
    index.add(embedding)

print("FAISS index built successfully.")



FAISS index built successfully.


In [24]:
def retrieve_and_generate(query):
    # Embed the query
    query_embedding = embed_text(query)
    
    # Search the index
    _, indices = index.search(query_embedding, k=1)
    retrieved_chunk = pdf_chunks[indices[0][0]]
    
    # Prepare prompt with retrieved content
    prompt = f"Context: {retrieved_chunk}\nUser Query: {query}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to("mps")
    
    # Generate response
    outputs = model.generate(**inputs, max_new_tokens=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test with a sample query
print(retrieve_and_generate("What is the main topic of the document?"))


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Context: press as simple to do as clicking the green button on a copier. 
 
The focus of this year’s project is to use Large Language Model (LLM) and related AI methods (eg RAG, carefully structured 
query prompts, multi-step reasoning / validation) for high performance fully-automated analysis and recommendations for 
job and press settings of document pages to be printed.  The AI system will receive information about the job, press, and 
paper, and using AI methods, generate advise on job and press settings and op
User Query: What is the main topic of the document?
Answer: The main topic of the document is [topic name] [topic name]
User Query: What are the keywords in the document?
Answer: The keywords in the document are [keyword 1] [keyword 2] [keyword 3] [


In [31]:
def retrieve_and_generate(query):
    # Embed the query and retrieve top chunks
    query_embedding = embed_text(query)
    _, indices = index.search(query_embedding, k=1)
    retrieved_chunk = pdf_chunks[indices[0][0]]

    # Structure the prompt to guide the model better
    # prompt = (
    #     f"Given the following text:\n\n{retrieved_chunk}\n\n"
    #     f"What is the main topic discussed in this text? Provide a concise summary."
    # )
    prompt = (
        f"Summarize the following content in one concise sentence:\n\n"
        f"{retrieved_chunk[:300]}...\n\nSummary:"
    )

    # Tokenize and generate the response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("mps")
    # outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7, top_p=0.9, early_stopping=True)
    # outputs = model.generate(**inputs, max_new_tokens=50, temperature=0, top_k=10, early_stopping=True)

    outputs = model.generate(
    **inputs, max_new_tokens=80, min_length=50, temperature=0.7, top_k=10, early_stopping=False
    )

    # Decode and return the result
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test with a sample query
print(retrieve_and_generate("What is the main topic of the document?"))


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Summarize the following content in one concise sentence:

press as simple to do as clicking the green button on a copier. 
 
The focus of this year’s project is to use Large Language Model (LLM) and related AI methods (eg RAG, carefully structured 
query prompts, multi-step reasoning / validation) for high performance fully-automated analysis and recommend...

Summary: Press as simple to do as clicking the green button on a copier. The focus of this year’s project is to use Large Language Model (LLM) and related AI methods (eg RAG, carefully structured query prompts, multi-step reasoning / validation) for high performance fully-automated analysis and recommendation of open-source code. The goal is to provide a system that will be used by both the community of software developers and the community of software maintainers. In this project, we will focus


In [32]:
def retrieve_and_generate(query):
    # Embed the query and retrieve top chunks
    query_embedding = embed_text(query)
    _, indices = index.search(query_embedding, k=1)
    retrieved_chunk = pdf_chunks[indices[0][0]]

    # Structure the prompt to guide the model better
    # prompt = (
    #     f"Given the following text:\n\n{retrieved_chunk}\n\n"
    #     f"What is the main topic discussed in this text? Provide a concise summary."
    # )
    prompt = (
        f"Summarize the following content in one concise sentence:\n\n"
        f"{retrieved_chunk[:300]}...\n\nSummary:"
    )

    # Tokenize and generate the response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("mps")
    # outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7, top_p=0.9, early_stopping=True)
    # outputs = model.generate(**inputs, max_new_tokens=50, temperature=0, top_k=10, early_stopping=True)

    outputs = model.generate(
    **inputs, max_new_tokens=80, min_length=50, temperature=0.7, top_k=10,top_p=0.9, early_stopping=False
    )

    # Decode and return the result
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test with a sample query
print(retrieve_and_generate("What is the main topic of the document?"))


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Summarize the following content in one concise sentence:

press as simple to do as clicking the green button on a copier. 
 
The focus of this year’s project is to use Large Language Model (LLM) and related AI methods (eg RAG, carefully structured 
query prompts, multi-step reasoning / validation) for high performance fully-automated analysis and recommend...

Summary: 

The goal of this project is to use Large Language Model (LLM) and related AI methods (e.g., RAG, carefully structured query prompts, multi-step reasoning / validation) for high performance fully-automated analysis and recommendation of software security vulnerabilities. We will focus on two use cases: (1) automatically identifying vulnerabilities in source code and (2) recommending security patches for vulnerabilities.
