In [3]:
import json
import requests
from typing import Dict, List
import time

class RDReportGenerator:
    def __init__(self, llm_url: str = "http://localhost:11434/api/generate"):
        """Initialize the report generator with Ollama API endpoint"""
        self.llm_url = llm_url
        self.model = "llama3.1"
    
    def generate_section(self, prompt: str, max_retries: int = 3) -> str:
        """Generate content using Ollama API with retry logic"""
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": False
        }
        
        for attempt in range(max_retries):
            try:
                response = requests.post(self.llm_url, json=payload)
                # response.raise_for_status()
                return response.json()['response']
            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {str(e)}")
                if attempt < max_retries - 1:
                    time.sleep(1)  # Wait before retry
                else:
                    raise Exception(f"Failed to generate content after {max_retries} attempts")

    def create_section_prompt(self, section_name: str, project_data: Dict, context: str) -> str:
        """Create detailed prompts for each section"""
        base_prompt = f"""You are a technical writer specializing in R&D tax claims for software projects. 
Write a detailed {section_name} section for an HMRC R&D tax claim report.

Project Information:
- Name: {project_data['Project Name']}
- Objectives: {', '.join(project_data['Project Objectives'])}
- Technologies: {', '.join(project_data['Technologies Used'])}
- Baseline: {', '.join(project_data['Baseline Technology'])}
- Challenges: {', '.join(project_data['Technological Challenges'])}

{context}

Write a detailed, persuasive response that:
- Uses clear, non-technical language suitable for HMRC reviewers
- Emphasizes genuine technological uncertainty and innovation
- Provides specific examples and details
- Maintains a professional tone
- Is at least 3 paragraphs long

Response:"""
        return base_prompt

In [4]:
def load_project_data(json_file: str) -> Dict:
    """Load project data from JSON file"""
    print(f"Loading project data from {json_file}...")
    with open(json_file, 'r') as f:
        data = json.load(f)
    print("\nLoaded project data:")
    print(json.dumps(data, indent=2))
    return data

# Load data
json_file = "../data/project_data.json"
project_data = load_project_data(json_file)

Loading project data from ../data/project_data.json...

Loaded project data:
{
  "Project Name": "NextGen AI Drug Discovery",
  "Project Objectives": [
    "To develop a machine learning model that predicts drug interactions.",
    "To identify potential drug candidates for COVID-19 treatment.",
    "To improve the efficiency of drug discovery pipelines."
  ],
  "Technologies Used": [
    "Python",
    "TensorFlow",
    "Scikit-learn",
    "Pandas",
    "Jupyter Notebook"
  ],
  "Baseline Technology": [
    "Traditional high-throughput screening methods combined with molecular modeling."
  ],
  "Technological Advancements": [
    "AI models for predicting drug interactions.",
    "Deep learning models for drug discovery.",
    "Automated data processing pipelines."
  ],
  "Technological Challenges": [
    "Data scarcity and quality issues.",
    "Model interpretability and explainability.",
    "Integration with existing drug discovery workflows."
  ],
  "Attempts": [
    "Developed a 

In [5]:
section_contexts = {
    "project_objective": """
Context: Explain the project's objectives and why they represent genuine R&D rather than routine development. 
Focus on: Technical complexity, innovation required, and advancement beyond current capabilities.
""",
    "baseline_investigation": """
Context: Describe how the company investigated existing solutions and their limitations.
Focus on: Systematic investigation, technical limitations discovered, and why existing solutions were inadequate.
""",
    "baseline_measurement": """
Context: Explain what specific technological capabilities are being measured against.
Focus on: Concrete metrics, technical limitations, and quantifiable improvements sought.
""",
    "baseline_limitations": """
Context: Detail the specific limitations in existing technology that necessitated R&D.
Focus on: Technical constraints, performance limitations, and why these couldn't be solved with existing methods.
""",
    "advancement_sought": """
Context: Describe the specific technological advancements the project aims to achieve.
Focus on: Innovation beyond current capabilities, technical challenges to overcome, and potential impact.
""",
    "uncertainties_nature": """
Context: Explain why success was not guaranteed from the outset.
Focus on: Technical uncertainties, complexity of challenges, and why standard approaches were insufficient.
""",
    "uncertainties_timing": """
Context: Describe when and how uncertainties were discovered during the project.
Focus on: Systematic approach to identifying uncertainties and their technical nature.
""",
    "uncertainties_type": """
Context: Explain whether the uncertainties were about feasibility, practical application, or both.
Focus on: Specific technical challenges and why they represent genuine uncertainty.
""",
    "resolution_methods": """
Context: Describe why typical methods were insufficient and what novel approaches were required.
Focus on: Technical limitations of standard approaches and innovation in solutions.
""",
    "resolution_timing": """
Context: Explain the current status of uncertainty resolution and future measures.
Focus on: Concrete metrics for success and systematic approach to resolution.
"""
}

In [6]:
# Initialize generator
generator = RDReportGenerator()
content = {}

# Generate each section individually
for section, context in section_contexts.items():
    print(f"\nGenerating {section}...")
    print("-" * 50)
    
    # Create and show prompt
    prompt = generator.create_section_prompt(section, project_data, context)
    print("\nPrompt:")
    print(prompt)
    
    # Generate content
    generated_content = generator.generate_section(prompt)
    content[section] = generated_content
    
    # Show generated content
    print("\nGenerated content:")
    print(generated_content)
    print("-" * 50)


Generating project_objective...
--------------------------------------------------

Prompt:
You are a technical writer specializing in R&D tax claims for software projects. 
Write a detailed project_objective section for an HMRC R&D tax claim report.

Project Information:
- Name: NextGen AI Drug Discovery
- Objectives: To develop a machine learning model that predicts drug interactions., To identify potential drug candidates for COVID-19 treatment., To improve the efficiency of drug discovery pipelines.
- Technologies: Python, TensorFlow, Scikit-learn, Pandas, Jupyter Notebook
- Baseline: Traditional high-throughput screening methods combined with molecular modeling.
- Challenges: Data scarcity and quality issues., Model interpretability and explainability., Integration with existing drug discovery workflows.


Context: Explain the project's objectives and why they represent genuine R&D rather than routine development. 
Focus on: Technical complexity, innovation required, and advancem

KeyboardInterrupt: 

In [None]:
def generate_latex(template_file: str, content: Dict[str, str]) -> str:
    """Generate LaTeX document from template and content"""
    print("Generating LaTeX document...")
    with open(template_file, 'r') as f:
        template = f.read()
    
    # Replace sections with content
    sections = {
        r"\\section{Project Objective}\n\n": content["project_objective"],
        r"\\subsection{What was/were the pre-existing solution\(s\), and how did the company investigate the pre-existing solution\(s\)\?}\n\n": 
            content["baseline_investigation"],
        r"\\subsection{What is the baseline that the advance is being measured against\?}\n\n":
            content["baseline_measurement"],
        r"\\subsection{What was the limitation of the existing technology/science baseline at the start of the R\\&D project\?}\n\n":
            content["baseline_limitations"],
        r"\\subsection{What is the advance achieved \(or sought to be achieved\) by the company at the end of the R\\&D activity\?}\n\n":
            content["advancement_sought"],
        r"\\subsection{What was it about the state of science or technology that made it uncertain it could not be made to do what the company wanted it to do\?}\n\n":
            content["uncertainties_nature"],
        r"\\subsection{When was the technological uncertainty identified\?}\n\n":
            content["uncertainties_timing"],
        r"\\subsection{Is the issue a matter of feasibility, or, is the issue a matter of practical application, or is it both \?}\n\n":
            content["uncertainties_type"],
        r"\\subsection{What would be the typical methods that would be applied to resolving this particular scientific/technological uncertainty by a competent professional in the field and why were these not applicable to overcome the uncertainty\?}\n\n":
            content["resolution_methods"],
        r"\\subsection{When were the uncertainties resolved, or, if not resolved what is the measure the company is planning on using to identify when it will be considered resolved\?}\n\n":
            content["resolution_timing"]
    }
    
    latex_content = template
    for pattern, replacement in sections.items():
        latex_content = latex_content.replace(pattern, pattern + replacement + "\n")
    
    return latex_content

def save_latex(content: str, output_file: str):
    """Save LaTeX content to file"""
    print(f"Saving LaTeX document to {output_file}...")
    with open(output_file, 'w') as f:
        f.write(content)

In [None]:
# Generate and save LaTeX
template_file = "template.tex"
output_file = "output.tex"

latex_document = generate_latex(template_file, content)
save_latex(latex_document, output_file)

# Preview the start of the generated LaTeX
print("\nPreview of generated LaTeX:")
print(latex_document[:500] + "...")

print("\nReport generation complete!")