In [None]:
import os
from dotenv import load_dotenv
from crewai import Agent, LLM, Task, Crew, Process

# Load environment variables
load_dotenv()

# Option D: Azure OpenAI
openai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")
openai_model_name = os.getenv("AZURE_OPENAI_MODEL_NAME")

llm = LLM(
    model="azure/gpt-4o-mini",
    api_key=openai_api_key,
    base_url=openai_endpoint,
    api_version=openai_api_version,
    azure=True
)

print("🚀 Environment configured!")
print(f"LLM Model: {llm.model}")

## Multimodal Agents

In [2]:
# Define a Multimodal Agent for Product Quality Inspection
quality_inspector = Agent(
    role="Product Quality Inspector",
    goal="Analyze and assess the quality of product images",
    backstory="An experienced manufacturing quality control expert who specializes in detecting defects and ensuring compliance.",
    multimodal=True,  # Enables multimodal capabilities
    verbose=True,
    llm=llm
)

In [3]:
from crewai import Task

# Define a Task for Product Image Inspection
inspection_task = Task(
    description="""
    Inspect the product image at {image_url}.
    Identify any visible defects such as scratches, dents, misalignment, or color inconsistencies.
    Provide a structured quality assessment report.
    """,
    expected_output="A detailed report highlighting detected issues and overall quality score.",
    agent=quality_inspector
)


In [None]:
from crewai import Crew

# Create a Crew with the Multimodal Agent
quality_inspection_crew = Crew(
    agents=[quality_inspector],  # Single agent for inspection
    tasks=[inspection_task],  # Task to analyze product images
    verbose=True
)

image_url = "https://s.marketwatch.com/public/resources/images/MW-HT101_nerd_d_ZG_20191010165334.jpg"
# Run the workflow
result = quality_inspection_crew.kickoff(inputs={"image_url": image_url})

# Display the final inspection report
print("\n=== Final Product Quality Report ===")
print(result.raw)

In [None]:
from IPython.display import Markdown
Markdown(result.raw)

### NG: Note on the output
The output seems to wrong and the rationale seems to be that  gpt-4o-mini did not process the image properly. The intent of the tutorial was to show how to use multimodal models in CrewAI which is demonstrated here. Since, the objective is to not to improve the quality of the output, time was not spent to optimize the result quality by replacing the model.