In [5]:
import ollama
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import List
import PyPDF2

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def extract_text_from_pdf(pdf_path: str) -> List[str]:
    """Extract text from a PDF file and split it into chunks."""
    text_chunks = []
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text = page.extract_text()
            if text:
                text_chunks.append(text)
    return text_chunks

# Load troubleshooting manual from a PDF
pdf_path = "all-new-r15-en.pdf"  # Replace with actual PDF file
manuals = extract_text_from_pdf(pdf_path)

# Convert documents to embeddings
document_embeddings = embedding_model.encode(manuals)
document_embeddings = np.array(document_embeddings, dtype=np.float32)

# Create FAISS index
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

def retrieve_docs(query: str, top_k: int = 2) -> List[str]:
    """Retrieve top-k relevant documents for a given query."""
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding, dtype=np.float32)
    distances, indices = index.search(query_embedding, top_k)
    return [manuals[i] for i in indices[0]]

def generate_answer(query: str) -> str:
    """Retrieve relevant context and use Ollama to generate an answer."""
    instruction = "As a bike service expert working in bike service center, your role is to provide accurate answer to questions asked by the rider. Data present in the context part is your knowledge about the bike. Stick to the context to generate the results. And also you can use your knowledge to answer if the required data is not available in context"
    retrieved_docs = retrieve_docs(query)
    context = "\n".join(retrieved_docs)
    prompt = f"Context:\n{context}\nInstruction: {instruction}\nQuestion: {query}\nAnswer:"
    response = ollama.chat(model='mistral', messages=[{"role": "user", "content": prompt}])
    return response["message"]["content"]

# Example query
query = "give some intersting facts about the bike?"
answer = generate_answer(query)
print("Answer:", answer)


Answer: 1. The bike, model 3-13EAU10411, has a single cylinder engine with a displacement of 155 cm³.
2. It is equipped with a liquid-cooled SOHC (Single Overhead Camshaft) engine and uses regular unleaded gasoline (Gasohol [E10] acceptable).
3. The bike has a 6-speed constant mesh transmission with a primary reduction ratio of 3.042 and a final drive chain.
4. Its dimensions are: overall length - 1990 mm (78.3 in), overall width - 725 mm (28.5 in), overall height - 1135 mm (44.7 in), seat height - 815 mm (32.1 in), wheelbase - 1325 mm (52.2 in), ground clearance - 170 mm (6.69 in), and minimum turning radius - 2.8 m (9.19 ft).
5. The bike weighs approximately 137 kg (302 lb) with a fuel tank capacity of 11 L (2.9 US gal, 2.4 Imp.gal) and a reserve amount of 1.9 L (0.50 US gal, 0.42 Imp.gal).
6. The spark plug model used is NGK/MR8E9 with a gap of 0.8–0.9 mm (0.031–0.035 in).
7. The recommended engine oil for this bike is YAMALUBE SAE viscosity grades: 10W-40, API service SG type or hi