# AI-Orchestrated Report Generator (using Agentic LLM API developed by SiHun Lee, CAE G., MX div., SEC.)

This notebook uses **LLM API's agentic capabilities** to automatically generate comprehensive PDF and PowerPoint reports from warpage data.

## How It Works

1. **Phase 1:** Analyze data → finds outliers, calculates statistics
2. **Phase 2:** Generate charts → uses Phase 1 findings (not raw files)
3. **Phase 3:** Build PDF Report → comprehensive, beautiful PDF document
4. **Phase 4:** Build PowerPoint → comprehensive presentation with same content

**Key Advantage:** Each phase reuses conversation memory, avoiding redundant file processing.

## 1. Setup & Configuration

In [None]:
import httpx
import json
from pathlib import Path
from datetime import datetime
import time
from IPython.display import display, Latex

# Build Universal LLM API Client
class LLMApiClient:
    def __init__(self, base_url: str, timeout: float = 360000.0):
        self.base_url = base_url.rstrip("/")
        self.token = None
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        return {"Authorization": f"Bearer {self.token}"} if self.token else {}

    def login(self, username: str, password: str):
        r = httpx.post(f"{self.base_url}/api/auth/login", 
                      json={"username": username, "password": password}, timeout=10.0)
        r.raise_for_status()
        self.token = r.json()["access_token"]
        return r.json()

    def list_models(self):
        r = httpx.get(f"{self.base_url}/v1/models", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        data = {"model": model, "messages": json.dumps(messages), "agent_type": agent_type}
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(f"{self.base_url}/v1/chat/completions", data=data,
                          files=files_to_upload if files_to_upload else None,
                          headers=self._headers(), timeout=self.timeout)
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_continue(self, model: str, session_id: str, user_message: str, 
                     agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        data = {"model": model, "messages": json.dumps(messages), 
                "session_id": session_id, "agent_type": agent_type}
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(f"{self.base_url}/v1/chat/completions", data=data,
                          files=files_to_upload if files_to_upload else None,
                          headers=self._headers(), timeout=self.timeout)
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def get_session_artifacts(self, session_id: str):
        """Get list of files generated during the session"""
        r = httpx.get(f"{self.base_url}/api/chat/sessions/{session_id}/artifacts",
                     headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def download_artifact(self, session_id: str, filename: str, save_to: str = None):
        """
        Download a generated artifact file to local disk.
        
        Args:
            session_id: The session ID that generated the file
            filename: Name of the file to download (can include subdirectory, e.g., 'temp_charts/chart.png')
            save_to: Local path to save the file (default: current directory with original filename)
        
        Returns:
            str: Path to the downloaded file
        
        Example:
            client.download_artifact(session_id, "Warpage_Report_20250126.pptx", "./downloads/report.pptx")
        """
        r = httpx.get(
            f"{self.base_url}/api/chat/sessions/{session_id}/artifacts/{filename}",
            headers=self._headers(),
            timeout=60.0
        )
        r.raise_for_status()
        
        # Determine local save path
        if save_to is None:
            save_to = Path(filename).name  # Use just the filename, not subdirectory
        
        # Create parent directories if needed
        save_path = Path(save_to)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Write file content
        with open(save_path, "wb") as f:
            f.write(r.content)
        
        return str(save_path)

# Configuration
API_BASE_URL = 'http://localhost:10007'
USERNAME = "leesihun"
PASSWORD = "s.hun.lee"

# Initialize and login
client = LLMApiClient(API_BASE_URL, timeout=36000.0)# 10 hours
client.login(USERNAME, PASSWORD)
models = client.list_models()
MODEL = models["data"][0]["id"]

print(f"✓ Logged in as: {USERNAME}")
print(f"✓ Using model: {MODEL}")

## 2. Configure Data Files

In [None]:
# Define your data files
stats_paths = [
    Path("B8_1021_stats.json"),
    Path("B8_1027_stats.json"),
]

# Verify files exist
print(f"Configured {len(stats_paths)} data file(s):\n")
for i, path in enumerate(stats_paths, 1):
    if path.exists():
        size_kb = path.stat().st_size / 1024
        print(f"  [{i}] {path.name} ({size_kb:.1f} KB) - ✓")
    else:
        print(f"  [{i}] {path.name} - ✗ NOT FOUND")

file_paths_str = [str(p) for p in stats_paths]

## 3. Phase 1: Data Analysis

The AI will analyze your data and identify key patterns.

In [None]:
analysis_prompt = f"""
Analyze {len(stats_paths)} warpage measurement JSON files attached.

Input Data Structure:
- Each file contain warpage statistics per PCB board
- Statistics: min, max, range (warpage value), mean, median, std, skewness, kurtosis
- PCA values (pc1, pc2) calculated within each source_pdf
- Filenames contain acquisition date/time (e.g., 1021 = October 21th)
- Note that usually, mean, median is not important. To assess warpage, range is the single most important feature.

Tasks:
1. Calculate overall statistics (mean, std, min, max of range across all files)
2. Identify PCA-based outliers using pc1, pc2 values. Look for PCA values that are quite a far from others
3. Compare production dates - which is better quality and why?
4. List specific outlier filenames with reasons
5. Save your results to a numpy array locally

**Required Output:**
- Total measurements count
- Outlier list with full filenames
- Production date comparison (winner + reason)
- Key concerns or patterns

Think HARD!
"""
print("=" * 80)
print("PHASE 1: DATA ANALYSIS")
print("=" * 80)

start = time.time()
analysis_result, session_id = client.chat_new(
    MODEL, analysis_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n✓ Analysis completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(analysis_result))
print("=" * 80)

## 4. Phase 2: Generate Visualizations

**Key:** AI reuses Phase 1 findings from conversation memory (not raw files).

In [None]:
viz_prompt = f"""
**PRIORITY: Use your Phase 1 analysis from conversation memory and saved numpy array.**

In Phase 1, you already:
- Analyzed {len(stats_paths)} datasets and loaded all data
- Identified PCA outliers with pc1, pc2 values
- Compared production dates
- Listed specific outlier filenames

**Avoid re-analyze raw files if possible. Use your Phase 1 findings and file.**
Files attached are ONLY for verification if needed.

**Task:** Create visualizations and classify outliers

**Outlier Classification:**
- **BAD outliers:** High mean/std/range (critical quality issues)
- **GOOD outliers:** Unusual PCA position but acceptable metrics
- **Normal:** Within PCA cluster, standard metrics

**Required Charts** (save to temp_charts/):
1. `pca_outliers_classified.png` - PC1 vs PC2 scatter (Blue=normal, Orange=good outlier, RED=bad outlier)
2. `bad_outliers_detail.png` - Bar chart comparing bad outliers vs average
3. `production_comparison.png` - Production date quality comparison
4. Additional charts as appropriate (distributions, trends, control charts, etc.)

**Style:** 300 DPI, seaborn whitegrid, professional colors

**Required Output:**
- List of generated chart files
- Bad outlier summary (file IDs + reasons)
- Production date insights

THINK HARD!
"""

print("=" * 80)
print("PHASE 2: VISUALIZATION GENERATION")
print("=" * 80)

start = time.time()
viz_result, _ = client.chat_continue(
    MODEL, session_id, viz_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n✓ Visualizations completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(viz_result))
print("=" * 80)

## 5. Phase 3: PDF Report Assembly

**Key:** AI uses Phase 1 & 2 findings from conversation memory to create a beautiful, comprehensive PDF.

In [None]:
# Get total file count
total_files = 0
for path in stats_paths:
    with open(path, 'r') as f:
        data = json.load(f)
        total_files += len(data.get('files', []))

# Phase 3: PDF Report Generation
pdf_prompt = f"""
**PRIORITY: Use Phase 1 & 2 findings from conversation memory and files.**

You have:
- Phase 1: Statistics, outlier IDs, production date comparison
- Phase 2: Images useful for report generation, bad outlier classifications

**Avoid re-analyze raw files if possible. Use conversation context.**
Files attached are ONLY for verification if needed.

**Task:** Create comprehensive, beautiful PDF report for the attached files.

**Report:**
- Title: "Automatic Warpage Analysis Report
- Subtitle: "Analysis of {total_files} Measurements ({len(stats_paths)} Production Dates)"
- Filename: `Warpage_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf`
- Format: A4 size (or Letter), professional margins

**Document Structure:**

1. **Cover Page** - Report title, subtitle, generation date, professional styling

2. **Executive Summary**

3. **PCA Outlier Classification** - with image

4. **Bad Outlier Details** - with image

5. **Production Comparison** -with image

6-N. **Additional Charts** - with images

N+1. **Recommendations** 

**Style Requirements:**
- Professional color scheme
- High-quality image embedding (maintain 300 DPI quality)
- Professional typography (use standard fonts: Helvetica, Times, Courier)
- Proper page breaks between sections
- Headers/footers with page numbers
- Table of Contents with page numbers
- Beautiful, publication-quality output

"""

print("=" * 80)
print("PHASE 3: PDF REPORT ASSEMBLY")
print("=" * 80)

start = time.time()
pdf_result, _ = client.chat_continue(
    MODEL, session_id, pdf_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n✓ PDF report completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(pdf_result))
print("=" * 80)