In [1]:
from langchain_ollama.llms import OllamaLLM
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
import time
from typing import Dict, List, Tuple
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class PDFAnalyzer:
    def __init__(self, pdf_path: str):
        """Initialize the PDF analyzer with the path to the PDF file."""
        self.pdf_path = pdf_path
        self.models = {
            "llama3.2:1b": OllamaLLM(model="llama3.2:1b"),
            "llama3.2": OllamaLLM(model="llama3.2"),
            "llama3.1": OllamaLLM(model="llama3.1")
        }
        
        # Define expected answers for each question
        self.questions_and_answers = {
            "At which flowrate of fertilizer in kg/min is it advised to use the fine application": 
                "40kg/min; page 67",
            "What is the maximum operating angle of the coupling shaft?": 
                "30 degrees; Page 54",
            "What materials are required to perform the tray test?": 
                "You'll need a measuring tape or ruler, a spirit level, 7 troughs, 7 graduated tubes, a funnel, a notebook, pen, calculator, this manual, and the software's instruction manual. Page 77",
            "What is the general formula for calculating the required flowrate of fertilizer in kg/min?":
                "Working width (m) x Driving speed (km/h) x Application rate (kg/ha) / 600; Page 68"
        }
        
        # Template for zero-shot learning
        self.prompt_template = PromptTemplate(
            input_variables=["context", "question"],
            template="""
            Below is a Context from a technical manual. Please answer the question at the end based only on the context provided.
            The context includes much unnecessary information. Therefore ignore tables. Include the page number and relevant context in your answer, and only answer the question if you are very sure.

            Context:
            {context}
            End of Context.
            Question: {question}

            Answer: Let me analyze the provided context and answer your question...
            """
        )

    def load_pdf(self) -> List[str]:
        """Load and process the PDF document."""
        try:
            loader = PyPDFLoader(self.pdf_path)
            pages = loader.load()
            
            # Combine all pages into a single text, keeping track of page numbers
            processed_pages = []
            for page in pages:
                content = page.page_content
                page_num = page.metadata['page']
                processed_pages.append({
                    'content': content,
                    'page': page_num
                })
            return processed_pages
        except Exception as e:
            logger.error(f"Error loading PDF: {str(e)}")
            raise

    def run_model_evaluation(self) -> Dict:
        """Run evaluation across all models for given questions."""
        results = {}
        pages = self.load_pdf()
        
        # Combine pages into a single context, but ensure it fits in context window
        combined_context = " ".join([f"[Page {p['page']}] {p['content']}" for p in pages])
        
        for model_name, model in self.models.items():
            logger.info(f"Evaluating model: {model_name}")
            model_results = []
            
            for question, expected_answer in self.questions_and_answers.items():
                try:
                    # Prepare prompt
                    prompt = self.prompt_template.format(
                        context=combined_context,
                        question=question
                    )
                    # print("Prompt:", prompt)
                    # Time the response
                    start_time = time.time()
                    response = model.invoke(prompt)
                    end_time = time.time()
                    
                    model_results.append({
                        'question': question,
                        'expected_answer': expected_answer,
                        'response': response,
                        'time_taken': end_time - start_time
                    })
                    
                except Exception as e:
                    logger.error(f"Error with model {model_name} on question: {question}")
                    logger.error(str(e))
                    model_results.append({
                        'question': question,
                        'expected_answer': expected_answer,
                        'response': f"Error: {str(e)}",
                        'time_taken': None
                    })
            
            results[model_name] = model_results
        
        return results

    def format_results(self, results: Dict) -> str:
        """Format the results into a readable report."""
        report = "PDF Analysis Results\n" + "="*20 + "\n\n"
        
        for model_name, model_results in results.items():
            report += f"Model: {model_name}\n" + "-"*50 + "\n"
            
            for result in model_results:
                report += f"Question: {result['question']}\n"
                report += f"Expected Answer: {result['expected_answer']}\n"
                report += f"Model Response: {result['response']}\n"
                if result['time_taken']:
                    report += f"Time taken: {result['time_taken']:.2f} seconds\n"
                report += "\n"
            
            report += "\n"
            
        return report

def main():
    # Initialize analyzer
    pdf_path = "PDF/EN-A148703540-2.pdf"
    analyzer = PDFAnalyzer(pdf_path)
    
    try:
        # Run evaluation
        results = analyzer.run_model_evaluation()
        
        # Format and display results
        report = analyzer.format_results(results)
        print(report)
        
        # Optionally save results to file
        with open("analysis_results_complete_PDF_input.txt", "w") as f:
            f.write(report)
            
    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")

if __name__ == "__main__":
    main()

INFO:__main__:Evaluating model: llama3.2:1b
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Evaluating model: llama3.2
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Evaluating model: llama3.1
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http

PDF Analysis Results

Model: llama3.2:1b
--------------------------------------------------
Question: At which flowrate of fertilizer in kg/min is it advised to use the fine application
Expected Answer: 40kg/min; page 67
Model Response: At which flowrate of fertilizer in kg/min is it advised to use the fine application?

According to the provided context, a flow rate of 0.7 kg/min is recommended for fine application.

The correct text from the original document states:

"Fine application - should be done at 0.7 kg/min (6.33 psi)"

Therefore, the answer to your question is: **0.7 kg/min**
Time taken: 60.79 seconds

Question: What is the maximum operating angle of the coupling shaft?
Expected Answer: 30 degrees; Page 54
Model Response: The maximum operating angle of the coupling shaft depends on various factors, such as:

* The type of fertiliser being applied (e.g., granular, mineral, etc.)
* The specific application rate setting
* The working method used (e.g., ExactLine, full field, e