In [26]:
import cv2
import numpy as np
import easyocr
import json
import os
from openai import AzureOpenAI  # For Azure OpenAI integration
from matplotlib import pyplot as plt  # For visualization (optional)

In [27]:
def preprocess_image(image_path):
    # Read image
    img = cv2.imread(image_path)
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(
        gray, 255, 
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV, 11, 2
    )
    
    # Denoise
    denoised = cv2.fastNlMeansDenoising(thresh, h=10)
    
    return denoised, img  # Return processed and original image

In [28]:
def detect_shapes(image):
    # Find contours (for boxes)
    contours, _ = cv2.findContours(
        image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    
    boxes = []
    for cnt in contours:
        # Approximate to polygon
        epsilon = 0.01 * cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, epsilon, True)
        
        # If quadrilateral, consider it a box
        if len(approx) == 4:
            x, y, w, h = cv2.boundingRect(approx)
            boxes.append({
                "type": "box",
                "coordinates": [x, y, x+w, y+h]
            })
    
    # Detect lines (for arrows)
    lines = cv2.HoughLinesP(
        image, 1, np.pi/180, 
        threshold=50, 
        minLineLength=30, 
        maxLineGap=10
    )
    
    arrows = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            arrows.append({
                "type": "arrow",
                "coordinates": [x1, y1, x2, y2]
            })
    
    return {"boxes": boxes, "arrows": arrows}

In [29]:
def extract_text(image, reader):
    # Use EasyOCR to detect text
    results = reader.readtext(image)
    
    extracted_text = []
    for (bbox, text, prob) in results:
        if prob > 0.4:  # Confidence threshold
            extracted_text.append({
                "text": text,
                "bounding_box": bbox.tolist(),  # Convert numpy array to list
                "confidence": float(prob)
            })
    
    return extracted_text

In [30]:
def match_text_to_shapes(text_elements, shapes):
    matched_data = []
    
    for box in shapes["boxes"]:
        x1, y1, x2, y2 = box["coordinates"]
        box_center = ((x1 + x2) / 2, (y1 + y2) / 2)
        
        # Find text inside or near the box
        associated_text = []
        for text in text_elements:
            text_bbox = text["bounding_box"]
            text_center = (
                (text_bbox[0][0] + text_bbox[2][0]) / 2,
                (text_bbox[0][1] + text_bbox[2][1]) / 2
            )
            
            # Check if text is inside the box
            if (x1 <= text_center[0] <= x2 and y1 <= text_center[1] <= y2):
                associated_text.append(text["text"])
        
        if associated_text:
            matched_data.append({
                "box": box["coordinates"],
                "text": associated_text
            })
    
    return matched_data

In [31]:
def generate_json_output(shapes, text, matched_data):
    return {
        "metadata": {
            "source": "Azure Architecture Diagram",
            "processing_method": "OpenCV + EasyOCR"
        },
        "shapes": shapes,
        "text_elements": text,
        "matched_data": matched_data  # Text inside boxes
    }

In [None]:
def analyze_with_azure_openai(json_data, api_key, endpoint):
    client = AzureOpenAI(
        api_key=api_key,
        api_version="2023-12-01-preview",
        azure_endpoint=endpoint
    )
    
    prompt = f"""
    Analyze this Azure architecture diagram and describe the components:
    {json.dumps(json_data, indent=2)}
    
    Provide:
    1. A summary of the architecture.
    2. Key components (VMs, storage, networking).
    3. Possible data flows.
    """
    
    response = client.chat.completions.create(
        model="gpt-4",  # Use your deployment name
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=500
    )
    
    return response.choices[0].message.content

In [32]:
def process_azure_diagram(image_path, azure_openai_key=None, azure_openai_endpoint=None):
    # 1. Preprocess image
    processed_img, original_img = preprocess_image(image_path)
    
    # 2. Initialize EasyOCR (CPU mode)
    reader = easyocr.Reader(['en'], gpu=False)
    
    # 3. Extract shapes
    shapes = detect_shapes(processed_img)
    
    # 4. Extract text
    text_elements = extract_text(original_img, reader)
    
    # 5. Match text to shapes
    matched_data = match_text_to_shapes(text_elements, shapes)
    
    # 6. Generate JSON
    json_output = generate_json_output(shapes, text_elements, matched_data)
    
    # 7. (Optional) Send to Azure OpenAI
    if azure_openai_key and azure_openai_endpoint:
        analysis = analyze_with_azure_openai(
            json_output, 
            azure_openai_key, 
            azure_openai_endpoint
        )
        print("Azure OpenAI Analysis:")
        print(analysis)
    
    return json_output

In [34]:
# Example usage
if __name__ == "__main__":
    image_path = "protect-apis.png"
    
    # Run without Azure OpenAI
    extracted_data = process_azure_diagram(image_path)
    
    # Save JSON output
    with open("extracted_diagram.json", "w") as f:
        json.dump(extracted_data, f, indent=2)
    
    print("Extraction complete! JSON saved.")

Using CPU. Note: This module is much faster with a GPU.


AttributeError: 'list' object has no attribute 'tolist'

In [None]:
import easyocr
import cv2
import json

def extract_text_from_diagram(image_path, languages=['en'], confidence_threshold=0.4):
    """
    Extracts text from Azure architecture diagrams with EasyOCR.
    
    Args:
        image_path (str): Path to the image file
        languages (list): Languages for OCR (default: English)
        confidence_threshold (float): Minimum confidence score (0-1)
    
    Returns:
        List of dictionaries with text and metadata
    """
    # Initialize EasyOCR reader (CPU mode)
    reader = easyocr.Reader(languages, gpu=False)
    
    # Read image
    img = cv2.imread(image_path)
    
    # Extract text
    results = reader.readtext(img)
    
    # Process results
    extracted_text = []
    for (bbox, text, prob) in results:
        if prob >= confidence_threshold:
            extracted_text.append({
                "text": text.strip(),
                "confidence": float(prob),
                #"bounding_box": [[int(x), int(y)] for [x, y] in bbox]  # Convert to integers
            })
    
    return extracted_text

def save_text_to_json(text_data, output_file="extracted_text.json"):
    """Saves extracted text to JSON file"""
    with open(output_file, 'w') as f:
        json.dump(text_data, f, indent=2)
    print(f"Text saved to {output_file}")

# Example usage
if __name__ == "__main__":
    # Configure as needed
    image_path = "protect-apis.png"
    output_json = "azure_resources.json"
    
    # Extract text
    extracted_text = extract_text_from_diagram(image_path)
    
    # Save to JSON
    save_text_to_json(extracted_text, output_json)
    
    # Print results
    print("Extracted Azure Resources:")
    # for item in extracted_text:
    #     print(f"- {item['text']} (Confidence: {item['confidence']:.2f})")

Using CPU. Note: This module is much faster with a GPU.


Text saved to azure_resources.json
Extracted Azure Resources:


In [3]:
import os
import json
from openai import AzureOpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
extracted_resources = {}


with open("azure_resources.json") as f:
    extracted_resources = json.load(f)

def analyze_with_azure_openai(extracted_resources):
    # Initialize Azure OpenAI client
    client = AzureOpenAI(
        api_key=os.getenv("AZURE_OPENAI_KEY"),
        api_version="2024-02-01",
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
    )
    
    # Prepare the prompt
    prompt = f"""
    Analyze these Azure resources extracted from an architecture diagram:
    {json.dumps(extracted_resources, indent=2)}

    Generate a comprehensive Azure cloud deployment plan based on the provided JSON file containing resources from an architecture diagram. Please structure the plan as follows:
    Resource Group Creation:
    Create all resources within a single resource group following the naming convention 'rg-<environment>-<application_name>-<location>'.
    Deployment Order:
    Outline a suggested deployment order considering dependencies between resources.
    Resource Configuration:
    For each resource, provide the recommended configuration settings, including but not limited to:
    Compute resources (VMs, Functions, etc.)
    Storage resources (Blobs, Disks, etc.)
    Networking resources (VNets, Subnets, etc.)
    Database resources (Cosmos DB, SQL Database, etc.)
    Dependencies:
    Clearly define required dependencies between resources, ensuring a smooth deployment process.
    Cost Optimization Suggestions:
    Provide potential cost optimization strategies for each resource, including but not limited to:
    Reserved Instances
    Right-sizing resources
    Using Azure Hybrid Benefit
    Auto-shutdown and auto-start for non-production environments
    Ensure the plan is well-structured, easy to follow, and adaptable to different environments (dev, staging, prod).
    """
    
    # Make the API call
    response = client.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
        messages=[
            {"role": "system", "content": "You are an Azure cloud architect expert in creating deployment plans and ARM templates."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        max_tokens=3000
    )
    
    return response.choices[0].message.content

def save_output_to_files(response_content):
    # Split the response into plan and template
    deployment_plan = response_content.split("ARM TEMPLATE:")[0].strip()
    arm_template = "ARM TEMPLATE:" + response_content.split("ARM TEMPLATE:")[1].strip()
    
    # Save to files
    with open("deployment_plan.md", "w") as f:
        f.write(deployment_plan)
    
    # Try to extract clean JSON if possible
    try:
        template_json = arm_template.split("```json")[1].split("```")[0].strip()
        with open("arm_template.json", "w") as f:
            f.write(template_json)
    except:
        with open("arm_template.txt", "w") as f:
            f.write(arm_template)
    
    print("Saved deployment_plan.md and arm_template.json")

# Example usage
if __name__ == "__main__":
    # Load your extracted resources (from previous step)
    with open("azure_resources.json") as f:
        extracted_resources = json.load(f)
    
    # Get analysis from Azure OpenAI
    analysis_result = analyze_with_azure_openai(extracted_resources)
    
    # Save the results
    save_output_to_files(analysis_result)
    
    print("Analysis complete! Check the generated files.")

IndexError: list index out of range

In [5]:
import os
import json
import time
from openai import AzureOpenAI, APIConnectionError, RateLimitError, APIStatusError
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

class AzureDeploymentPlanner:
    def __init__(self):
        self.client = self._initialize_openai_client()
        self.deployment_plan = None
        self.arm_template = None
    
    def _initialize_openai_client(self):
        """Initialize Azure OpenAI client with validation"""
        required_vars = [
            "AZURE_OPENAI_ENDPOINT",
            "AZURE_OPENAI_KEY",
            "AZURE_OPENAI_DEPLOYMENT_NAME"
        ]
        
        missing_vars = [var for var in required_vars if not os.getenv(var)]
        if missing_vars:
            raise ValueError(f"Missing environment variables: {', '.join(missing_vars)}")
        
        return AzureOpenAI(
            api_key=os.getenv("AZURE_OPENAI_KEY"),
            api_version="2024-02-01",
            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            max_retries=3
        )
    
    def _call_openai_with_retry(self, prompt, is_template_generation=False):
        """Make API call with retry logic"""
        max_retries = 3
        retry_delay = 2  # seconds
        
        for attempt in range(max_retries):
            try:
                messages = [
                    {
                        "role": "system", 
                        "content": "You are an Azure cloud architect specializing in deployment planning and ARM templates."
                    },
                    {"role": "user", "content": prompt}
                ]
                
                response = self.client.chat.completions.create(
                    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
                    messages=messages,
                    temperature=0.7 if not is_template_generation else 0.5,
                    max_tokens=2500 if not is_template_generation else 3000
                )
                return response.choices[0].message.content
                
            except APIConnectionError as e:
                if attempt == max_retries - 1:
                    raise ConnectionError(f"Failed to connect to Azure OpenAI after {max_retries} attempts: {str(e)}")
                print(f"Connection error (attempt {attempt + 1}), retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                retry_delay *= 2  # Exponential backoff
                
            except RateLimitError:
                if attempt == max_retries - 1:
                    raise RuntimeError("Rate limit exceeded. Please try again later.")
                print(f"Rate limit hit (attempt {attempt + 1}), retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                
            except APIStatusError as e:
                raise RuntimeError(f"API error: {e.status_code} - {e.message}")
    
    def generate_deployment_plan(self, extracted_resources):
        """Generate initial deployment plan for human review"""
        prompt = f"""
        Analyze these Azure resources extracted from an architecture diagram:
        {json.dumps(extracted_resources, indent=2)}

        Generate a comprehensive Azure cloud deployment plan with this structure:
        
        ## Resource Group Strategy
        - Naming convention: 'rg-<environment>-<application_name>-<location>'
        - Recommended locations
        
        ## Deployment Sequence
        - Ordered list of resources to deploy
        - Dependencies between resources
        
        ## Resource Configuration
        - Recommended settings for each resource type
        - Sizing recommendations
        
        ## Cost Optimization
        - Reserved instances opportunities
        - Right-sizing suggestions
        - Auto-shutdown recommendations
        
        ## Security Considerations
        - Network security recommendations
        - Identity and access management
        - Data protection suggestions
        
        Output in markdown format only - DO NOT include any ARM template yet.
        """
        
        try:
            self.deployment_plan = self._call_openai_with_retry(prompt)
            self._save_to_file("deployment_plan.md", self.deployment_plan)
            return self.deployment_plan
        except Exception as e:
            print(f"Error generating deployment plan: {str(e)}")
            raise
    
    def generate_arm_template(self, feedback=None):
        """Generate ARM template after plan approval"""
        if not self.deployment_plan:
            raise ValueError("No deployment plan exists. Generate plan first.")
            
        prompt = f"""
        Deployment Plan:
        {self.deployment_plan}
        
        {f"User Feedback: {feedback}" if feedback else ""}
        
        Create a complete ARM template with:
        1. All necessary resources in JSON format
        2. Parameterized values for environment customization
        3. Comments explaining key sections
        4. Follow Azure best practices
        
        Output ONLY the ARM template in JSON format within ```json ``` markers.
        """
        
        try:
            template_content = self._call_openai_with_retry(prompt, is_template_generation=True)
            self.arm_template = self._extract_json(template_content)
            self._save_to_file("arm_template.json", json.dumps(self.arm_template, indent=2))
            return self.arm_template
        except Exception as e:
            print(f"Error generating ARM template: {str(e)}")
            raise
    
    def _extract_json(self, content):
        """Extract JSON from markdown code blocks"""
        try:
            if "```json" in content:
                return json.loads(content.split("```json")[1].split("```")[0].strip())
            elif "```" in content:
                return json.loads(content.split("```")[1].split("```")[0].strip())
            return json.loads(content)
        except json.JSONDecodeError:
            return {"error": "Could not parse JSON", "raw_content": content}
    
    def _save_to_file(self, filename, content):
        """Save content to file with validation"""
        try:
            with open(filename, "w") as f:
                if isinstance(content, dict):
                    json.dump(content, f, indent=2)
                else:
                    f.write(content)
            print(f"Successfully saved {filename}")
        except IOError as e:
            print(f"Error saving file {filename}: {str(e)}")

def get_human_feedback():
    """Interactive prompt for human review"""
    print("\n=== DEPLOYMENT PLAN REVIEW ===")
    print("1. Approve and generate ARM template")
    print("2. Request modifications")
    print("3. Exit")
    
    while True:
        try:
            choice = input("Enter your choice (1-3): ").strip()
            if choice in ("1", "2", "3"):
                return choice
            print("Invalid input. Please enter 1, 2, or 3.")
        except KeyboardInterrupt:
            print("\nOperation cancelled by user.")
            return "3"

def main():
    try:
        # Validate environment before starting
        if not os.path.exists("azure_resources.json"):
            raise FileNotFoundError("azure_resources.json not found in current directory")
        
        # Load extracted resources
        with open("azure_resources.json") as f:
            extracted_resources = json.load(f)
        
        planner = AzureDeploymentPlanner()
        
        # Generate initial deployment plan
        print("Generating deployment plan...")
        plan = planner.generate_deployment_plan(extracted_resources)
        print("\nGenerated Deployment Plan Preview:")
        print(plan[:1000] + "...")  # Print first part for preview
        
        # Human review loop
        while True:
            choice = get_human_feedback()
            
            if choice == "1":  # Approved
                print("\nGenerating ARM template...")
                template = planner.generate_arm_template()
                print("\nARM Template generated successfully!")
                break
                
            elif choice == "2":  # Needs modification
                feedback = input("\nEnter your modification requests: ").strip()
                print("\nRegenerating deployment plan with your feedback...")
                plan = planner.generate_deployment_plan(extracted_resources)
                print("\nUpdated Deployment Plan Preview:")
                print(plan[:1000] + "...")
                
            elif choice == "3":  # Exit
                print("Exiting without generating ARM template.")
                return
        
        print("\nProcess completed. Check the generated files.")
        
    except Exception as e:
        print(f"\nFatal error: {str(e)}")
        print("Please check your configuration and try again.")

if __name__ == "__main__":
    main()

Generating deployment plan...
Successfully saved deployment_plan.md

Generated Deployment Plan Preview:
## Resource Group Strategy
- **Naming convention:** 'rg-\<environment\>-\<application_name\>-\<location\>'
- **Recommended locations:** 
  - East US
  - West US
  - North Europe
  - West Europe

## Deployment Sequence
1. Virtual Network (VNet)
   - `apim-subnet`
   - `ase-subnet`
   - `sglmi-subnet`
   - `ag-subnet`
   - `aks-subnet`
2. SQL Managed Instance
   - `aumanager-sqlmi`
3. Application Gateway
   - `aumanager-ag`
4. API Management
   - `aumanager-apim`
5. App Service Environment (ASE)
   - `aumanager-ase-linux`
6. AKS (Azure Kubernetes Service)
   - `aumanager-aks`
7. Web Application
   - `api`, `portal.<sore-domain>`
8. External Domain Configuration
   - `api.<some-domain>/external/apil`
   - `portal.<sore-domain>`
9. Security Services
   - Web Application Firewall
   - DDoS Protection

### Dependencies between resources
- VNet and subnets must be created first to host othe