In [1]:
import ollama
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import List
import PyPDF2

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def extract_text_from_pdf(pdf_path: str) -> List[str]:
    """Extract text from a PDF file and split it into chunks."""
    text_chunks = []
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text = page.extract_text()
            if text:
                text_chunks.append(text)
    return text_chunks

# Load troubleshooting manual from a PDF
pdf_path = "all-new-r15-en.pdf"  # Replace with actual PDF file
manuals = extract_text_from_pdf(pdf_path)

# Convert documents to embeddings
document_embeddings = embedding_model.encode(manuals)
document_embeddings = np.array(document_embeddings, dtype=np.float32)

# Create FAISS index
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

def retrieve_docs(query: str, top_k: int = 2) -> List[str]:
    """Retrieve top-k relevant documents for a given query."""
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding, dtype=np.float32)
    distances, indices = index.search(query_embedding, top_k)
    return [manuals[i] for i in indices[0]]

def generate_answer(query: str) -> str:
    """Retrieve relevant context and use Ollama to generate an answer."""
    instruction = "As a bike service expert working in bike service center, your role is to provide accurate answer to questions asked by the rider. Data present in the context part is your knowledge about the bike. Stick to the context to generate the results. And also you can use your knowledge to answer if the required data is not available in context"
    retrieved_docs = retrieve_docs(query)
    context = "\n".join(retrieved_docs)
    prompt = f"Context:\n{context}\nInstruction: {instruction}\nQuestion: {query}\nAnswer:"
    response = ollama.chat(model='wizardlm2', messages=[{"role": "user", "content": prompt}])
    return response["message"]["content"]

# Example query
query = "give some intersting facts about the bike?"
answer = generate_answer(query)
print("Answer:", answer)


Answer: Based on the provided context, which details the specifications and components of a motorcycle with the model code UB9BEYE, here are some interesting facts about this bike:

1. **Model and Year**: The bike is from the year 2017, as indicated by the document creation date (Wednesday, August 2, 2017).

2. **Dimensions**: It has an overall length of 1990 mm (78.3 in), overall width of 725 mm (28.5 in), overall height of 1135 mm (44.7 in), and a seat height of 815 mm (32.1 in). The ground clearance is 170 mm (6.69 in), which helps in navigating over obstacles without scraping the underside of the vehicle.

3. **Wheelbase**: The wheelbase measures 1325 mm (52.2 in), which contributes to the bike's stability and handling characteristics.

4. **Engine Capacity and Type**: The bike is powered by a single-cylinder, 4-stroke, SOHC engine with a displacement of 155 cm³. It has a bore x stroke of 58.0 × 58.7 mm (2.28 × 2.31 in) and a compression ratio of 11.6:1.

5. **Cooling System**: The