## Identify tech stack and generate diagram

In [None]:
%pip install google-genai python-dotenv

In [7]:
import os
from google import genai 
from dotenv import load_dotenv
 
# Load the .env file
load_dotenv() 

# Initialize the client
client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])

In [3]:
## Create XML from code

import os

# Configuration 
IGNORE_DIRS = {'.git', 'node_modules', '__pycache__', '.next', 'dist'}
IGNORE_FILES = {'package-lock.json', 'yarn.lock', '.DS_Store'}
# Only include relevant code extensions to save tokens
ALLOWED_EXT = {'.js', '.ts', '.tsx', '.py', '.md', '.json', '.css', '.yaml', '.gradle', '.java', '.properties', '.xml'}

def generate_xml_prompt(path):
    output = []
    for root, dirs, files in os.walk(path):
        # Skip ignored directories
        dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
        
        for file in files:
            if file in IGNORE_FILES:
                continue
            if any(file.endswith(ext) for ext in ALLOWED_EXT):
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, path)
                
                try:
                    with open(full_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                        # Wrap content in XML tags as requested
                        output.append(f'<file path="{rel_path}">\n{content}\n</file>')
                except Exception as e:
                    print(f"Could not read {rel_path}: {e}")
                    
    return "\n\n".join(output)

In [None]:
CODE_FOLDER = "../spring-boot-dockerized"

OUTPUT_FILE = 'repo_context.xml'

# Execute and save
xml_content = generate_xml_prompt(CODE_FOLDER)
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
    f.write(xml_content)

print(f"Successfully generated prompt in {OUTPUT_FILE}")


Successfully generated prompt in repo_context.xml


In [5]:
TECH_FINDING_PROMPT_STACK = f""" I have an XML file containing exported source code. Analyze the following XML snippet and identify:

Identify the Primary Stack

       *   Primary Programming Languages:**
              File extensions (`.py`, `.java`, `.ts`, `.js`, `.go`, `.cs`, etc.)
              Syntax, keywords, and common constructs within the `<content>` tags (e.g., `import`, `package`, `def`, `class`, `function`).
       *   Frameworks & Libraries:**
              Specific import statements (e.g., `import React from 'react'`, `from flask import Flask`, `import org.springframework`).
              Configuration files (e.g., `package.json`, `pom.xml`, `requirements.txt`).
              Common class names, annotations, or function calls associated with known frameworks (e.g., `ReactDOM.render`, `@RestController`, `app.use()`).
       *   Database/Storage Technologies:**
              Connection strings or configuration for databases (e.g., `jdbc:postgresql`, `mongodb://`, `mysql://`).
              SQL queries or ORM (Object-Relational Mapping) configurations (e.g., `Hibernate`, `SQLAlchemy`, `Mongoose`).
              Keywords related to specific database types (e.g., `PostgreSQL`, `MySQL`, `MongoDB`, `Redis`, `DynamoDB`).
       *   Infrastructure/Deployment Tools:**
              `Dockerfile` or `docker-compose.yml` content.
              Cloud provider-specific tags, APIs, or configuration (e.g., `aws`, `gcp`, `azure`, `s3`, `ec2`, `lambda`).
              Kubernetes manifests (`.yaml` files with `apiVersion`, `kind`, `metadata`, `spec`).
              Terraform (`.tf` files) or Ansible (`.yml` playbooks).

{xml_content}"
"""

In [8]:
## Finding tech stack

response = client.models.generate_content(
    model='gemini-2.5-flash', contents=TECH_FINDING_PROMPT_STACK
)
# print(response.text)

# Define the filename
filename = "response-gem-stack.md"

# Save the response.text to the file
with open(filename, "w", encoding="utf-8") as f:
    f.write(response.text)

print(f"Response successfully saved to {filename} for tech stack")

Response successfully saved to response-gem-stack.md for tech stack


In [9]:
DRAWIO_ARCH_FROM_CODE_XML_PROMPT = f"""
You are a Senior Full-Stack Cloud Architect and expert in the Draw.io XML schema.

TASK:
Analyze the provided source code XML. Infer the system's structural components and generate a valid, importable Draw.io Raw XML diagram representing the architecture.

ARCHITECTURAL BLUEPRINT:
Organize the components into a top-down hierarchical tree within these specific swimlanes:
    1. Client/Frontend (Top)
    2. API/Controller Layer
    3. Service/Logic Layer
    4. Database/Persistence Layer (Bottom)
    5. Database Information
    5. Infrastructure/Deployment (Separate swimlane on the right)


STRICT XML SCHEMA REQUIREMENTS:
- Root Structure: Must follow the exact hierarchy: <mxfile><diagram><mxGraphModel><root>...
- Mandatory IDs: The first two elements in <root> MUST be <mxCell id="0"/> and <mxCell id="1" parent="0"/>. All subsequent elements must have unique IDs and reference parent="1".
- Geometry: Every object must include an <mxGeometry> element with explicit x, y, width, and height attributes to avoid overlapping.
- Connections: Use <mxCell> with 'source' and 'target' attributes for edges. Use 'edge="1"' and 'style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;"'.
- Styling: Use 'style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fe;strokeColor=#6c8ebf;"' for blocks.

OUTPUT FORMAT:
- Provide ONLY the raw XML string.
- No markdown code blocks (no ```xml).
- No conversational text, preambles, or post-analysis.
- Start with: <mxfile>
- End with: </mxfile>

SOURCE CODE TO ANALYZE:
{xml_content}
"""

In [10]:
## Generate Diagram

response = client.models.generate_content(
    model='gemini-2.5-flash', contents=DRAWIO_ARCH_FROM_CODE_XML_PROMPT
)
# print(response.text)

# Define the filename
filename = "response-gem-arch.xml"

# Save the response.text to the file
with open(filename, "w", encoding="utf-8") as f:
    f.write(response.text)

print(f"Response successfully saved to {filename} architecture diagram")

Response successfully saved to response-gem-arch.xml architecture diagram
