# Llama2-Powered AI Solution for Clinical Study Reports (CSR)
This notebook automates **extraction, summarization, and generation** of Clinical Study Reports using **Llama2**.
It also integrates **Langfuse** for observability, real-time monitoring, and debugging.

In [None]:
# Install Required Packages
!pip install torch transformers fastapi uvicorn PyPDF2 langfuse

In [None]:
import os
import PyPDF2  # PDF extraction
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI, UploadFile, File
import langfuse
import uvicorn

In [None]:
# Initialize Langfuse API
LANGFUSE_SECRET_KEY = "your_langfuse_secret_key"
langfuse.init(secret_key=LANGFUSE_SECRET_KEY)

In [None]:
# Load Llama2 Model
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")

In [None]:
# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            extracted_text = page.extract_text()
            if extracted_text:
                text += extracted_text + "\n"
    return text.strip()

In [None]:
# Summarization using Llama2
def summarize_text(text, max_length=512):
    prompt = f"Summarize the following Clinical Study Report: {text[:4000]}"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")
    summary_ids = model.generate(**inputs, max_length=max_length)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
# Generate Clinical Study Report
def generate_text(text, max_length=1024):
    prompt = f"Generate a comprehensive Clinical Study Report based on: {text[:4000]}"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")
    output_ids = model.generate(**inputs, max_length=max_length)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

In [None]:
# Langfuse Logging
langfuse.log("llama2_summary", input_text="sample input", output_text="sample summary")
langfuse.log("llama2_generated_report", input_text="sample input", output_text="sample report")

## Next Steps
- Fine-tune Llama2 for medical NLP
- Optimize prompt engineering
- Deploy the API using AWS/GCP
- Implement document indexing for structured CSR handling