# AI-Orchestrated Report Generator (using Agentic LLM API developed by SiHun Lee, CAE G., MX div., SEC.)

This notebook uses **LLM API's agentic capabilities** to automatically generate comprehensive PDF and PowerPoint reports from warpage data.

## How It Works

1. **Phase 1:** Analyze data → finds outliers, calculates statistics
2. **Phase 2:** Generate charts → uses Phase 1 findings (not raw files)
3. **Phase 3:** Build PDF Report → comprehensive, beautiful PDF document
4. **Phase 4:** Build PowerPoint → comprehensive presentation with same content

**Key Advantage:** Each phase reuses conversation memory, avoiding redundant file processing.

## 1. Setup & Configuration

In [None]:
import httpx
import json
from pathlib import Path
from datetime import datetime
import time
from IPython.display import display, Latex

# Build Universal LLM API Client
class LLMApiClient:
    def __init__(self, base_url: str, timeout: float = 360000.0):
        self.base_url = base_url.rstrip("/")
        self.token = None
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        return {"Authorization": f"Bearer {self.token}"} if self.token else {}

    def login(self, username: str, password: str):
        r = httpx.post(f"{self.base_url}/api/auth/login", 
                      json={"username": username, "password": password}, timeout=10.0)
        r.raise_for_status()
        self.token = r.json()["access_token"]
        return r.json()

    def list_models(self):
        r = httpx.get(f"{self.base_url}/v1/models", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        data = {"model": model, "messages": json.dumps(messages), "agent_type": agent_type}
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(f"{self.base_url}/v1/chat/completions", data=data,
                          files=files_to_upload if files_to_upload else None,
                          headers=self._headers(), timeout=self.timeout)
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_continue(self, model: str, session_id: str, user_message: str, 
                     agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        data = {"model": model, "messages": json.dumps(messages), 
                "session_id": session_id, "agent_type": agent_type}
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(f"{self.base_url}/v1/chat/completions", data=data,
                          files=files_to_upload if files_to_upload else None,
                          headers=self._headers(), timeout=self.timeout)
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def get_session_artifacts(self, session_id: str):
        """Get list of files generated during the session"""
        r = httpx.get(f"{self.base_url}/api/chat/sessions/{session_id}/artifacts",
                     headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def download_artifact(self, session_id: str, filename: str, save_to: str = None):
        """
        Download a generated artifact file to local disk.
        
        Args:
            session_id: The session ID that generated the file
            filename: Name of the file to download (can include subdirectory, e.g., 'temp_charts/chart.png')
            save_to: Local path to save the file (default: current directory with original filename)
        
        Returns:
            str: Path to the downloaded file
        
        Example:
            client.download_artifact(session_id, "Warpage_Report_20250126.pptx", "./downloads/report.pptx")
        """
        r = httpx.get(
            f"{self.base_url}/api/chat/sessions/{session_id}/artifacts/{filename}",
            headers=self._headers(),
            timeout=60.0
        )
        r.raise_for_status()
        
        # Determine local save path
        if save_to is None:
            save_to = Path(filename).name  # Use just the filename, not subdirectory
        
        # Create parent directories if needed
        save_path = Path(save_to)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Write file content
        with open(save_path, "wb") as f:
            f.write(r.content)
        
        return str(save_path)

# Configuration
API_BASE_URL = 'http://localhost:10007'
USERNAME = "leesihun"
PASSWORD = "s.hun.lee"

# Initialize and login
client = LLMApiClient(API_BASE_URL, timeout=36000.0)# 10 hours
client.login(USERNAME, PASSWORD)
models = client.list_models()
MODEL = models["data"][0]["id"]

print(f"✓ Logged in as: {USERNAME}")
print(f"✓ Using model: {MODEL}")

## 2. Configure Data Files

In [None]:
# Define your data files
stats_paths = [
    Path("B8_1021_stats.json"),
    Path("B8_1027_stats.json"),
]

# Verify files exist
print(f"Configured {len(stats_paths)} data file(s):\n")
for i, path in enumerate(stats_paths, 1):
    if path.exists():
        size_kb = path.stat().st_size / 1024
        print(f"  [{i}] {path.name} ({size_kb:.1f} KB) - ✓")
    else:
        print(f"  [{i}] {path.name} - ✗ NOT FOUND")

file_paths_str = [str(p) for p in stats_paths]

## 3. Phase 1: Data Analysis

The AI will analyze your data and identify key patterns.

In [None]:
analysis_prompt = f"""
Analyze {len(stats_paths)} warpage measurement JSON files attached.

Input Data Structure:
- Each file contain warpage statistics per PCB board
- Statistics: min, max, range (warpage value), mean, median, std, skewness, kurtosis
- PCA values (pc1, pc2) calculated within each source_pdf
- Filenames contain acquisition date/time (e.g., 1021 = October 21th)
- Note that usually, mean, median is not important. To assess warpage, range is the single most important feature.

Tasks:
1. Calculate overall statistics (mean, std, min, max of range across all files)
2. Identify PCA-based outliers using pc1, pc2 values. Look for PCA values that are quite a far from others
3. Compare production dates - which is better quality and why?
4. List specific outlier filenames with reasons
5. Save your results to a numpy array locally

**Required Output:**
- Total measurements count
- Outlier list with full filenames
- Production date comparison (winner + reason)
- Key concerns or patterns

Think HARD!
"""
print("=" * 80)
print("PHASE 1: DATA ANALYSIS")
print("=" * 80)

start = time.time()
analysis_result, session_id = client.chat_new(
    MODEL, analysis_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n✓ Analysis completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(analysis_result))
print("=" * 80)

## 4. Phase 2: Generate Visualizations

**Key:** AI reuses Phase 1 findings from conversation memory (not raw files).

In [None]:
viz_prompt = f"""
**PRIORITY: Use your Phase 1 analysis from conversation memory and saved numpy array.**

In Phase 1, you already:
- Analyzed {len(stats_paths)} datasets and loaded all data
- Identified PCA outliers with pc1, pc2 values
- Compared production dates
- Listed specific outlier filenames

**Avoid re-analyze raw files if possible. Use your Phase 1 findings and file.**
Files attached are ONLY for verification if needed.

**Task:** Create visualizations and classify outliers

**Outlier Classification:**
- **BAD outliers:** High mean/std/range (critical quality issues)
- **GOOD outliers:** Unusual PCA position but acceptable metrics
- **Normal:** Within PCA cluster, standard metrics

**Required Charts** (save to temp_charts/):
1. `pca_outliers_classified.png` - PC1 vs PC2 scatter (Blue=normal, Orange=good outlier, RED=bad outlier)
2. `bad_outliers_detail.png` - Bar chart comparing bad outliers vs average
3. `production_comparison.png` - Production date quality comparison
4. Additional charts as appropriate (distributions, trends, control charts, etc.)

**Style:** 300 DPI, seaborn whitegrid, professional colors

**Required Output:**
- List of generated chart files
- Bad outlier summary (file IDs + reasons)
- Production date insights

THINK HARD!
"""

print("=" * 80)
print("PHASE 2: VISUALIZATION GENERATION")
print("=" * 80)

start = time.time()
viz_result, _ = client.chat_continue(
    MODEL, session_id, viz_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n✓ Visualizations completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(viz_result))
print("=" * 80)

## 5. Phase 3: PDF Report Assembly

**Key:** AI uses Phase 1 & 2 findings from conversation memory to create a beautiful, comprehensive PDF.

In [None]:
# Get total file count
total_files = 0
for path in stats_paths:
    with open(path, 'r') as f:
        data = json.load(f)
        total_files += len(data.get('files', []))

# Phase 3: PDF Report Generation
pdf_prompt = f"""
**PRIORITY: Use Phase 1 & 2 findings from conversation memory and files.**

You have:
- Phase 1: Statistics, outlier IDs, production date comparison
- Phase 2: Images useful for report generation, bad outlier classifications

**Avoid re-analyze raw files if possible. Use conversation context.**
Files attached are ONLY for verification if needed.

**Task:** Create comprehensive, beautiful PDF report using ReportLab library.

**CRITICAL: Use the following structured template approach to ensure consistent formatting**

## Installation
```python
# Ensure ReportLab is installed
import subprocess
subprocess.run(['pip', 'install', 'reportlab'], check=False)
```

## PDF Configuration (MUST USE THESE EXACT SETTINGS)

```python
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
from datetime import datetime
from pathlib import Path

# Page setup - CONSISTENT A4 PORTRAIT ONLY
PAGE_WIDTH, PAGE_HEIGHT = A4  # 595.27 x 841.89 points (210mm x 297mm)
MARGIN = 2.5 * cm  # 25mm margins on all sides
CONTENT_WIDTH = PAGE_WIDTH - 2 * MARGIN  # ~16cm usable width

# Image sizing - CONSISTENT across ALL pages
IMAGE_MAX_WIDTH = CONTENT_WIDTH  # Full content width
IMAGE_MAX_HEIGHT = 12 * cm  # 12cm max height for consistency
```

## Helper Functions (MUST IMPLEMENT EXACTLY AS SHOWN)

```python
def resize_image_to_fit(image_path, max_width, max_height):
    \"\"\"
    Resize image maintaining aspect ratio to fit within max dimensions.
    Centers the image horizontally.
    \"\"\"
    from reportlab.platypus import Image
    
    img = Image(image_path)
    img_width = img.imageWidth
    img_height = img.imageHeight
    aspect = img_height / img_width
    
    # Calculate target dimensions
    target_width = max_width
    target_height = target_width * aspect
    
    # If too tall, scale down by height
    if target_height > max_height:
        target_height = max_height
        target_width = target_height / aspect
    
    # Set final dimensions
    img.drawWidth = target_width
    img.drawHeight = target_height
    img.hAlign = 'CENTER'  # Center horizontally
    
    return img

def add_header_footer(canvas, doc):
    \"\"\"Add header and footer to all pages except cover page\"\"\"
    canvas.saveState()
    
    if doc.page > 1:  # Skip cover page
        # Header
        canvas.setFont('Helvetica', 9)
        canvas.setFillColor(colors.grey)
        canvas.drawString(MARGIN, PAGE_HEIGHT - 1.5*cm, "Warpage Analysis Report")
        
        # Footer with page number
        canvas.setFont('Helvetica', 9)
        page_num_text = f"Page {{doc.page - 1}}"  # Exclude cover from count
        canvas.drawCentredString(PAGE_WIDTH / 2, 1.5*cm, page_num_text)
    
    canvas.restoreState()
```

## Style Definitions (USE THESE EXACT STYLES)

```python
styles = getSampleStyleSheet()

# Custom styles for consistency
style_title = ParagraphStyle(
    'CustomTitle',
    parent=styles['Title'],
    fontSize=24,
    textColor=colors.HexColor('#1f4788'),
    alignment=TA_CENTER,
    spaceAfter=12,
    leading=30
)

style_subtitle = ParagraphStyle(
    'CustomSubtitle',
    parent=styles['Normal'],
    fontSize=14,
    textColor=colors.HexColor('#1f4788'),
    alignment=TA_CENTER,
    spaceAfter=6
)

style_heading1 = ParagraphStyle(
    'CustomHeading1',
    parent=styles['Heading1'],
    fontSize=18,
    textColor=colors.HexColor('#1f4788'),
    spaceAfter=12,
    spaceBefore=0
)

style_heading2 = ParagraphStyle(
    'CustomHeading2',
    parent=styles['Heading2'],
    fontSize=14,
    textColor=colors.HexColor('#1f4788'),
    spaceAfter=8,
    spaceBefore=0
)

style_body = ParagraphStyle(
    'CustomBody',
    parent=styles['BodyText'],
    fontSize=11,
    alignment=TA_JUSTIFY,
    spaceAfter=8,
    leading=14
)

style_bullet = ParagraphStyle(
    'CustomBullet',
    parent=styles['BodyText'],
    fontSize=11,
    leftIndent=20,
    spaceAfter=6,
    leading=14,
    bulletIndent=10
)
```

## Document Structure (FOLLOW THIS EXACT ORDER)

```python
# Initialize document
filename = f'Warpage_Report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.pdf'
doc = SimpleDocTemplate(
    filename,
    pagesize=A4,
    leftMargin=MARGIN,
    rightMargin=MARGIN,
    topMargin=MARGIN,
    bottomMargin=MARGIN
)

# Story list - holds all content
story = []

# === PAGE 1: COVER PAGE ===
story.append(Spacer(1, 6*cm))
story.append(Paragraph("Automatic Warpage Analysis Report", style_title))
story.append(Spacer(1, 1*cm))
story.append(Paragraph(f"Analysis of {total_files} Measurements ({len(stats_paths)} Production Dates)", style_subtitle))
story.append(Spacer(1, 0.5*cm))
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}", style_body))
story.append(PageBreak())

# === PAGE 2: TABLE OF CONTENTS ===
story.append(Paragraph("Table of Contents", style_heading1))
story.append(Spacer(1, 0.5*cm))

toc_data = [
    ["Section", "Page"],
    ["Executive Summary", "3"],
    ["PCA Outlier Classification", "4"],
    ["Bad Outlier Details", "5"],
    ["Production Comparison", "6"],
    ["Additional Analysis", "7+"],
    ["Recommendations", "Last"]
]

toc_table = Table(toc_data, colWidths=[CONTENT_WIDTH * 0.7, CONTENT_WIDTH * 0.3])
toc_table.setStyle(TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1f4788')),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
    ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
    ('FONTSIZE', (0, 0), (-1, 0), 12),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
    ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
    ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
    ('FONTSIZE', (0, 1), (-1, -1), 11),
    ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f0f0f0')])
]))
story.append(toc_table)
story.append(PageBreak())

# === PAGE 3: EXECUTIVE SUMMARY ===
story.append(Paragraph("Executive Summary", style_heading1))
story.append(Spacer(1, 0.5*cm))
story.append(Paragraph("Key findings from the warpage analysis:", style_body))
story.append(Spacer(1, 0.3*cm))

# Add bullet points summarizing Phase 1 & 2 findings
summary_points = [
    f"Total measurements analyzed: {total_files}",
    "PCA-based outlier detection identified critical quality issues",
    "Production date comparison reveals significant quality variations",
    "Bad outliers show elevated range, mean, and standard deviation",
]

for point in summary_points:
    story.append(Paragraph(f"• {point}", style_bullet))

story.append(PageBreak())

# === PAGE 4: PCA OUTLIER CLASSIFICATION ===
story.append(Paragraph("PCA Outlier Classification", style_heading2))
story.append(Spacer(1, 0.3*cm))
story.append(Paragraph(
    "The scatter plot below shows PC1 vs PC2 values with outliers classified into three categories: "
    "normal (blue), good outliers with unusual PCA position but acceptable metrics (orange), "
    "and bad outliers with critical quality issues (red).",
    style_body
))
story.append(Spacer(1, 0.5*cm))

img = resize_image_to_fit("temp_charts/pca_outliers_classified.png", IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT)
story.append(img)
story.append(PageBreak())

# === PAGE 5: BAD OUTLIER DETAILS ===
story.append(Paragraph("Bad Outlier Details", style_heading2))
story.append(Spacer(1, 0.3*cm))
story.append(Paragraph(
    "This chart compares bad outliers against average metrics, showing elevated range, "
    "mean, and standard deviation values that indicate critical warpage issues.",
    style_body
))
story.append(Spacer(1, 0.5*cm))

img = resize_image_to_fit("temp_charts/bad_outliers_detail.png", IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT)
story.append(img)
story.append(PageBreak())

# === PAGE 6: PRODUCTION COMPARISON ===
story.append(Paragraph("Production Comparison", style_heading2))
story.append(Spacer(1, 0.3*cm))
story.append(Paragraph(
    "Quality comparison between production dates reveals which batch demonstrated better performance "
    "and lower warpage variability.",
    style_body
))
story.append(Spacer(1, 0.5*cm))

img = resize_image_to_fit("temp_charts/production_comparison.png", IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT)
story.append(img)
story.append(PageBreak())

# === PAGES 7+: ADDITIONAL CHARTS ===
# Find all charts in temp_charts/ directory
import glob
all_charts = glob.glob("temp_charts/*.png")
required_charts = ["pca_outliers_classified.png", "bad_outliers_detail.png", "production_comparison.png"]

for chart_path in sorted(all_charts):
    chart_filename = Path(chart_path).name
    
    # Skip already included charts
    if chart_filename in required_charts:
        continue
    
    # Generate title from filename
    chart_title = chart_filename.replace("_", " ").replace(".png", "").title()
    
    story.append(Paragraph(chart_title, style_heading2))
    story.append(Spacer(1, 0.3*cm))
    story.append(Paragraph(f"Additional analysis: {chart_title}", style_body))
    story.append(Spacer(1, 0.5*cm))
    
    img = resize_image_to_fit(chart_path, IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT)
    story.append(img)
    story.append(PageBreak())

# === LAST PAGE: RECOMMENDATIONS ===
story.append(Paragraph("Recommendations", style_heading1))
story.append(Spacer(1, 0.5*cm))
story.append(Paragraph("Based on the analysis, the following actions are recommended:", style_body))
story.append(Spacer(1, 0.3*cm))

recommendations = [
    "Investigate root causes of bad outliers identified in PCA analysis",
    "Implement stricter quality controls for production batches with higher warpage",
    "Focus process improvements on reducing range variability",
    "Consider real-time PCA monitoring for early outlier detection",
    "Review and optimize manufacturing parameters for problematic production dates"
]

for rec in recommendations:
    story.append(Paragraph(f"• {rec}", style_bullet))

# === BUILD PDF ===
doc.build(story, onFirstPage=add_header_footer, onLaterPages=add_header_footer)

print(f"✓ PDF generated: {filename}")
```

## Critical Requirements

**MUST follow these rules:**
1. **Portrait orientation ONLY** - Never change page orientation
2. **Consistent image sizing** - Always use `resize_image_to_fit()` with IMAGE_MAX_WIDTH and IMAGE_MAX_HEIGHT
3. **One section per page** - Always add `PageBreak()` after each section
4. **Use defined styles** - Only use the custom styles defined above
5. **Maintain aspect ratios** - Images must scale proportionally
6. **Center images** - Use `img.hAlign = 'CENTER'`
7. **Consistent spacing** - Use Spacer elements as shown

## Expected Output

Filename: `Warpage_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf`

**Verify before completing:**
- [ ] All images are same width and properly centered
- [ ] All pages are portrait orientation
- [ ] Each section starts on a new page
- [ ] Headers/footers appear on all pages except cover
- [ ] TOC is properly formatted
- [ ] Professional color scheme (#1f4788 for headings)

IMPLEMENT THIS EXACTLY AS SPECIFIED!
"""

print("=" * 80)
print("PHASE 3: PDF REPORT ASSEMBLY")
print("=" * 80)

start = time.time()
pdf_result, _ = client.chat_continue(
    MODEL, session_id, pdf_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n✓ PDF report completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(pdf_result))
print("=" * 80)