# PhytoChem-Tracker (Jupyter Edition)

**Version:** 1.0  
**Author:** rpwdoc2025  
**Powered by:** Google Gemini 3.0 & Agentic Python Workflow

## Overview
This tool is an Agentic AI platform for biochemists to track phytochemicals, spectra, pathways, and gene data. It simulates an orchestration layer connecting PubChem, GNPS, and Phytozome data sources, synthesized by Google Gemini.

### How to Run
1. Click the **Open in Colab** badge above (if viewing on GitHub).
2. Run the **Setup** cell to install dependencies.
3. Enter your **Google Gemini API Key**.
4. Run the **Application** cell to launch the dashboard interface.

In [1]:
# @title 1. Setup Environment
# Installs necessary libraries for UI, AI, and PDF generation
!pip install -q google-generativeai ipywidgets fpdf pandas

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.6/1.6 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fpdf (setup.py) ... [?25l[?25hdone


In [2]:
# @title 2. Import Libraries & Configure Agents
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import google.generativeai as genai
import pandas as pd
from fpdf import FPDF
import time
import json
from datetime import datetime

# --- MOCK DATA AGENT ---
# Simulating database returns from PubChem/KNApSAcK
MOCK_TEA_DATA = [
    {"Rank": 1, "Name": "Epigallocatechin gallate (EGCG)", "Formula": "C22H18O11", "MW": 458.37, "Papers": 14502, "SMILES": "C1[C@@H]([C@H](OC2=CC(=CC(=C21)O)O)C3=CC(=C(C(=C3)O)O)O)OC(=O)C4=CC(=C(C(=C4)O)O)O"},
    {"Rank": 2, "Name": "Caffeine", "Formula": "C8H10N4O2", "MW": 194.19, "Papers": 32100, "SMILES": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"},
    {"Rank": 3, "Name": "L-Theanine", "Formula": "C7H14N2O3", "MW": 174.20, "Papers": 1250, "SMILES": "CCN=C(O)CC[C@H](N)C(=O)O"},
    {"Rank": 4, "Name": "Kaempferol", "Formula": "C15H10O6", "MW": 286.23, "Papers": 8900, "SMILES": "C1=CC(=CC=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O"},
    {"Rank": 5, "Name": "Quercetin", "Formula": "C15H10O7", "MW": 302.23, "Papers": 22100, "SMILES": "C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O"}
]

MOCK_GENERIC_DATA = [
    {"Rank": 1, "Name": "Simulated Compound A", "Formula": "C10H12O4", "MW": 196.2, "Papers": 50, "SMILES": "CC(C)C1=CC=C(C=C1)O"},
    {"Rank": 2, "Name": "Simulated Compound B", "Formula": "C12H22O11", "MW": 342.3, "Papers": 12, "SMILES": "OCC1OC(CO)C(O)C1O"}
]

# --- PDF AGENT ---
class ReportPDF(FPDF):
    def header(self):
        self.set_fill_color(16, 185, 129) # Emerald Green
        self.rect(0, 0, 210, 20, 'F')
        self.set_font('Arial', 'B', 15)
        self.set_text_color(255, 255, 255)
        self.cell(0, 5, 'PhytoChem-Tracker Report', 0, 1, 'L')
        self.ln(15)

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.set_text_color(128)
        self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')

def generate_pdf(species, summary, data, filename="report.pdf"):
    pdf = ReportPDF()
    pdf.add_page()

    # Title
    pdf.set_text_color(0, 0, 0)
    pdf.set_font("Arial", "B", 16)
    pdf.cell(0, 10, f"Analysis: {species}", 0, 1)
    pdf.set_font("Arial", "", 10)
    pdf.set_text_color(100, 100, 100)
    pdf.cell(0, 10, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1)

    # Summary
    pdf.ln(5)
    pdf.set_text_color(0)
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Gemini 3.0 Executive Summary", 0, 1)
    pdf.set_font("Arial", "", 10)
    pdf.multi_cell(0, 5, summary)

    # Table
    pdf.ln(10)
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Top Phytochemicals", 0, 1)

    # Table Header
    pdf.set_fill_color(240, 240, 240)
    pdf.set_font("Arial", "B", 10)
    pdf.cell(15, 8, "Rank", 1, 0, 'C', 1)
    pdf.cell(60, 8, "Name", 1, 0, 'L', 1)
    pdf.cell(40, 8, "Formula", 1, 0, 'L', 1)
    pdf.cell(30, 8, "MW (g/mol)", 1, 0, 'L', 1)
    pdf.cell(30, 8, "Papers", 1, 1, 'C', 1)

    # Table Rows
    pdf.set_font("Arial", "", 9)
    for item in data:
        pdf.cell(15, 8, str(item['Rank']), 1, 0, 'C')
        pdf.cell(60, 8, item['Name'][:25], 1, 0, 'L')
        pdf.cell(40, 8, item['Formula'], 1, 0, 'L')
        pdf.cell(30, 8, str(item['MW']), 1, 0, 'L')
        pdf.cell(30, 8, str(item['Papers']), 1, 1, 'C')

    pdf.output(filename)
    return filename

# --- GEMINI AGENT ---
def get_gemini_summary(api_key, query, count):
    if not api_key:
        return "[Simulation] Gemini API Key missing. Please enter key to get real AI analysis."

    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-2.0-flash-exp') # Using latest available or pro
        prompt = f"""
        You are the 'PhytoChem-Tracker' AI brain.
        The user is searching for phytochemical data regarding: "{query}".
        We have identified {count} top compounds found in this source.

        Please provide a concise, scientific summary (max 100 words) suitable for a biochemist.
        Focus on the major therapeutic or industrial significance of this plant/compound class.
        """
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Gemini Error: {str(e)}"

print("Agents Loaded Successfully.")

Agents Loaded Successfully.


In [3]:
# @title 3. PhytoChem-Tracker Dashboard

# --- WIDGET DEFINITIONS ---
style = {'description_width': 'initial'}

# Header
header = widgets.HTML("<h1>üåø PhytoChem-Tracker <span style='color:#10b981'>Agentic Platform</span></h1>")

# API Key Input
api_key_input = widgets.Password(
    placeholder='Paste Google Gemini API Key',
    description='<b>API Key:</b>',
    style=style,
    layout=widgets.Layout(width='50%')
)

# 1. Input Source
input_type = widgets.ToggleButtons(
    options=['Plant Species', 'Compound', 'Spectrogram'],
    description='<b>1. Input Source:</b>',
    disabled=False,
    button_style='success',
    style=style
)

# 2. Search Query
query_input = widgets.Text(
    value='Camellia sinensis',
    placeholder='Enter species or compound...',
    description='<b>2. Identification:</b>',
    style=style,
    layout=widgets.Layout(width='60%')
)

# 3. Preferences (Dropdowns)
compound_class = widgets.Dropdown(options=['All Classes', 'Alkaloids', 'Flavonoids', 'Terpenoids'], description='Class:', style=style)
solvent_pref = widgets.Dropdown(options=['Water', 'Ethanol', 'Methanol', 'DMSO'], description='Solvent:', style=style)
output_fmt = widgets.Dropdown(options=['Interactive Dashboard', 'PDF Report', 'CSV'], description='Format:', style=style)

pref_box = widgets.HBox([compound_class, solvent_pref, output_fmt])

# 4. Start Button
start_btn = widgets.Button(
    description='START ANALYSIS',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Activate Agents',
    icon='flask',
    layout=widgets.Layout(width='100%', height='50px')
)

# Logs & Output
log_output = widgets.Output(layout={'border': '1px solid #334155', 'height': '150px', 'overflow_y': 'scroll', 'padding': '10px', 'background_color': '#0f172a', 'color': '#00ff00'})
result_output = widgets.Output()

# --- EVENT LOGIC ---
def log(message):
    with log_output:
        print(f"[{datetime.now().strftime('%H:%M:%S')}] {message}")

def on_start_click(b):
    log_output.clear_output()
    result_output.clear_output()

    # 1. Validation
    query = query_input.value
    api_key = api_key_input.value

    # 2. Workflow Simulation
    start_btn.disabled = True
    start_btn.description = "AGENTS RUNNING..."

    try:
        log("Orchestrator: Initializing PhytoChem-Tracker...")
        time.sleep(0.5)

        log(f"Input Agent: Analyzing {input_type.value} '{query}'...")
        time.sleep(0.8)

        # 3. Data Selection
        data = MOCK_GENERIC_DATA
        if "tea" in query.lower() or "camellia" in query.lower():
            data = MOCK_TEA_DATA

        log("PubChem Agent: Querying REST API...")
        time.sleep(0.5)
        log(f"PubChem Agent: Resolved {len(data)} compounds.")

        log(f"Spectral Agent: Accessing GNPS ({solvent_pref.value})...")
        time.sleep(0.5)

        # 4. Gemini Call
        log("Gemini Agent: Synthesizing executive summary...")
        summary = get_gemini_summary(api_key, query, len(data))
        log("Gemini Agent: Summary generation complete.")

        # 5. Render Results
        with result_output:
            # Display Summary
            display(HTML(f"<div style='background-color:#e6fffa; padding:15px; border-left: 5px solid #10b981; border-radius: 5px; color: #333'><h3>Gemini AI Summary</h3><p>{summary}</p></div>"))

            # Display Table
            df = pd.DataFrame(data)
            display(HTML("<h3>Top Identified Phytochemicals</h3>"))
            display(df)

            # Handle PDF Generation
            if output_fmt.value == 'PDF Report':
                log("PDF Agent: Compiling report...")
                filename = generate_pdf(query, summary, data)
                log(f"PDF Agent: Saved to {filename}")
                display(HTML(f"<br><b><a href='{filename}' target='_blank' style='background-color:#10b981; color:white; padding:10px; text-decoration:none; border-radius:5px;'>‚¨áÔ∏è Download PDF Report</a></b>"))

    except Exception as e:
        log(f"ERROR: {str(e)}")

    finally:
        start_btn.disabled = False
        start_btn.description = "START ANALYSIS"

start_btn.on_click(on_start_click)

# --- LAYOUT ---
ui = widgets.VBox([
    header,
    api_key_input,
    widgets.HTML("<hr>"),
    input_type,
    query_input,
    widgets.HTML("<b>3. Analysis Filters:</b>"),
    pref_box,
    widgets.HTML("<br>"),
    start_btn,
    widgets.HTML("<br><b>Agent Logs:</b>"),
    log_output,
    widgets.HTML("<hr>"),
    result_output
])

display(ui)

VBox(children=(HTML(value="<h1>üåø PhytoChem-Tracker <span style='color:#10b981'>Agentic Platform</span></h1>"),‚Ä¶