In [None]:
from langchain_ollama.llms import OllamaLLM
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
import time
from typing import Dict, List, Tuple
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class PDFAnalyzer:
    def __init__(self, pdf_path: str):
        """Initialize the PDF analyzer with the path to the PDF file."""
        self.pdf_path = pdf_path
        self.models = {
            "llama3.2:1b": OllamaLLM(model="llama3.2:1b"),
            "llama3.2": OllamaLLM(model="llama3.2"),
            "llama3.1": OllamaLLM(model="llama3.1")
        }
        
        # Format: question, then expected answers + page number inside a list
        self.questions_and_answers = {
            "At which flowrate of fertilizer in kg/min is it advised to use the fine application?": 
                ["40kg/min", 67],
            "What is the maximum operating angle of the coupling shaft?": 
                ["30 degrees", 54],
            "What materials are required to perform the tray test?": 
                ["You'll need a measuring tape or ruler, a spirit level, 7 troughs, 7 graduated tubes, a funnel, a notebook, pen, calculator, this manual, and the software's instruction manual.", 77],
            "What is the general formula for calculating the required flowrate of fertilizer in kg/min?":
                ["Working width (m) x Driving speed (km/h) x Application rate (kg/ha) / 600", 68]
        }
        
        # Template for zero-shot learning
        self.prompt_template = PromptTemplate(
            input_variables=["context", "question"],
            template="""
            Below is a Context from a technical manual. Please answer the question at the end based only on the context provided.
            The context includes much unnecessary information. Therefore ignore tables. Include the relevant sentence in your answer, and only answer the question if you are very sure.

            Context:
            {context}
            End of Context.
            Question: {question}

            Answer: Let me analyze the provided context and answer your question...
            """
        )

    def load_pdf(self) -> List[dict]:
        """Load and process the PDF document."""
        try:
            loader = PyPDFLoader(self.pdf_path)
            pages = loader.load()
            
            # Process pages and keep track of page numbers
            processed_pages = []
            for page in pages:
                content = page.page_content
                page_num = page.metadata['page']
                processed_pages.append({
                    'content': content,
                    'page': page_num
                })
            return processed_pages

        except Exception as e:
            logger.error(f"Error loading PDF: {str(e)}")
            raise

    def get_page_content(self, pages: List[dict], target_page: int) -> str:
        """Get content for a specific page number."""
        for page in pages:
            if page['page'] == target_page:
                return f"[Page {page['page']}] {page['content']}"
        return ""

    def run_model_evaluation(self) -> Dict:
        """Run evaluation across all models for given questions."""
        results = {}
        pages = self.load_pdf()
        
        for model_name, model in self.models.items():
            logger.info(f"Evaluating model: {model_name}")
            model_results = []
            
            for question, expected_answer in self.questions_and_answers.items():
                try:
                    # Get only the specific page content mentioned in the answer
                    target_page = expected_answer[1] - 1 # 0-based index
                    page_content = self.get_page_content(pages, target_page)
                    
                    # Prepare prompt with only the relevant page
                    prompt = self.prompt_template.format(
                        context=page_content,
                        question=question
                    )
                    # print(prompt)
                    # Time the response
                    start_time = time.time()
                    response = model.invoke(prompt)
                    end_time = time.time()
                    
                    model_results.append({
                        'question': question,
                        'expected_answer': expected_answer,
                        'response': response,
                        'time_taken': end_time - start_time
                    })
                    
                except Exception as e:
                    logger.error(f"Error with model {model_name} on question: {question}")
                    logger.error(str(e))
                    model_results.append({
                        'question': question,
                        'expected_answer': expected_answer,
                        'response': f"Error: {str(e)}",
                        'time_taken': None
                    })
            
            results[model_name] = model_results
        
        return results

    def format_results(self, results: Dict) -> str:
        """Format the results into a readable report."""
        report = "PDF Analysis Results\n" + "="*20 + "\n\n"
        
        for model_name, model_results in results.items():
            report += f"Model: {model_name}\n" + "-"*50 + "\n"
            
            for result in model_results:
                report += f"Question: {result['question']}\n"
                report += f"Expected Answer: {result['expected_answer']}\n"
                report += f"Model Response: {result['response']}\n"
                if result['time_taken']:
                    report += f"Time taken: {result['time_taken']:.2f} seconds\n"
                report += "\n"
            
            report += "\n"
            
        return report

def main():
    # Initialize analyzer
    pdf_path = "PDF/EN-A148703540-2.pdf"
    analyzer = PDFAnalyzer(pdf_path)
    
    try:
        # Run evaluation
        results = analyzer.run_model_evaluation()
        
        # Format and display results
        report = analyzer.format_results(results)
        print(report)
        
        # Optionally save results to file
        with open("analysis_results_one_page_input.txt", "w") as f:
            f.write(report)
            
    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")

if __name__ == "__main__":
    main()

INFO:__main__:Evaluating model: llama3.2:1b
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Evaluating model: llama3.2
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Evaluating model: llama3.1
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http

PDF Analysis Results

Model: llama3.2:1b
--------------------------------------------------
Question: At which flowrate of fertilizer in kg/min is it advised to use the fine application?
Expected Answer: ['40kg/min', 67]
Model Response: Based on the provided context, I am very sure that at a flowrate of 40 kg/min or more, it is advised to use the fine application. According to the text, "With a low application rate and/or a low driving speed the dosing openings are only opened a slight bit so that  very little fertiliser would stream through the dosing openings of the spreader."
Time taken: 34.03 seconds

Question: What is the maximum operating angle of the coupling shaft?
Expected Answer: ['30 degrees', 54]
Model Response: Based on the provided context, the maximum operating angle of the coupling shaft is 30 degrees when lifted and 30 degrees when lowered.
Time taken: 16.74 seconds

Question: What materials are required to perform the tray test?
Expected Answer: ["You'll need a measur