In [1]:
!pip install openai



In [2]:
# Required packages to install
# pip install openai requests pydantic

# Required imports
import json
import requests
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from openai import OpenAI  # For chat completions API

# Initialize the OpenAI client
client = OpenAI(api_key="fw_3ZYLXEz2c3YWcN3CAkyaVedA")  

# For the vision model, you need either:
# Option 1: If using OpenAI's Vision model
# (already included in the OpenAI package)

# Option 2: If using another vision model like Google's Vertex AI
# pip install google-cloud-aiplatform
# from vertexai.vision_models import ImageCaptioningModel
# vision_model = ImageCaptioningModel.from_pretrained("your-model-name")

In [3]:
# Define the schema for product search parameters
class AmazonSearchQuery(BaseModel):
    product_name: str = Field(..., description="Main product name extracted from the image")
    brand: Optional[str] = Field(None, description="Brand name if visible in the image")
    product_size: Optional[str] = Field(None, description="Size/amount/weight information (e.g., '16oz', '500ml')")
    identifiers: Optional[Dict[str, str]] = Field(None, description="Product identifiers such as UPC, EAN, ASIN if visible")
    key_features: Optional[List[str]] = Field(None, description="Key product features or descriptors visible in the image")
    
# Define the schema for Amazon search results verification
class ProductVerification(BaseModel):
    match_found: bool = Field(..., description="Whether a matching product was found on Amazon")
    confidence_score: float = Field(..., description="Confidence score for the match (0-1)")
    matched_product: Optional[Dict] = Field(None, description="Details of the matched product")
    discrepancies: Optional[List[str]] = Field(None, description="List of discrepancies between image and found product")
    amazon_link: Optional[str] = Field(None, description="Link to the Amazon product page if found")

In [4]:
def amazon_search(search_parameters):
    """
    Search Amazon for products matching parameters extracted from a product image.
    
    Input: Structured data from VLM image analysis
    Output: Top matching Amazon products with detailed information
    """
    import requests
    
    api_key = "FB0C32E1DCB3433C997C0A0FB1E70608"
    base_url = "https://api.rainforestapi.com/request"
    
    # Configure request parameters
    params = {
        "api_key": api_key,
        "amazon_domain": "amazon.com"
    }
    
    # Determine search approach based on available information
    if "asin" in search_parameters and search_parameters["asin"]:
        # Direct ASIN lookup if available
        params["type"] = "product"
        params["asin"] = search_parameters["asin"]
    elif "upc" in search_parameters or "ean" in search_parameters or "isbn" in search_parameters:
        # GTIN/UPC/EAN lookup
        params["type"] = "product"
        params["gtin"] = search_parameters.get("upc") or search_parameters.get("ean") or search_parameters.get("isbn")
    else:
        # Text-based search using extracted information
        params["type"] = "search"
        
        # Construct search term from product name and attributes
        search_term = search_parameters["product_name"]
        if "brand" in search_parameters:
            search_term = f"{search_parameters['brand']} {search_term}"
        if "attributes" in search_parameters:
            for key, value in search_parameters["attributes"].items():
                search_term += f" {value}"
        
        params["search_term"] = search_term
    
    # Make API request
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise exception for HTTP errors
        data = response.json()
        
        # Process results based on request type
        results = []
        if params["type"] == "product":
            if "product" in data:
                product = data["product"]
                results.append({
                    "asin": product.get("asin"),
                    "title": product.get("title"),
                    "brand": product.get("brand"),
                    "price": product.get("buybox_winner", {}).get("price"),
                    "rating": product.get("rating"),
                    "ratings_total": product.get("ratings_total"),
                    "main_image": product.get("main_image", {}).get("link"),
                    "link": product.get("link"),
                    "dimensions": product.get("dimensions"),
                    "weight": product.get("weight")
                })
        elif params["type"] == "search":
            for item in data.get("search_results", [])[:5]:  # Limit to top 5 results
                results.append({
                    "asin": item.get("asin"),
                    "title": item.get("title"),
                    "link": item.get("link"),
                    "image": item.get("image"),
                    "price": item.get("price")
                })
        
        return {
            "query_info": search_parameters,
            "results": results
        }
    except Exception as e:
        # Handle any errors
        return {
            "query_info": search_parameters,
            "error": str(e),
            "results": []
        }

In [12]:
def process_image_with_function_calling(image_path, client, rainforest_api_key):
    """
    Process an image using document inlining and function calling.
    Uses Llama-3.1 405B which has robust tool calling capabilities.
    """
    import base64
    import json
    import time
    
    # Define tools for function calling
    tools = [
        {
            "type": "function",
            "function": {
                "name": "amazon_search",
                "description": "Search Amazon for products matching parameters extracted from a product image",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "product_name": {
                            "type": "string", 
                            "description": "Main product name extracted from the image"
                        },
                        "brand": {
                            "type": "string",
                            "description": "Brand name if visible in the image"
                        },
                        "product_size": {
                            "type": "string",
                            "description": "Size/amount/weight information (e.g., '16oz', '500ml')"
                        },
                        "key_features": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "Key product features or descriptors visible in the image"
                        }
                    },
                    "required": ["product_name"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "customer_support",
                "description": "Provide customer support for an order based on receipt or delivery image",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "order_id": {
                            "type": "string",
                            "description": "Order ID or reference number from the receipt/delivery image"
                        },
                        "order_date": {
                            "type": "string",
                            "description": "Date the order was placed"
                        },
                        "items": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "Items included in the order"
                        }
                    },
                    "required": ["order_id"]
                }
            }
        }
    ]
    
    # Prepare image with document inlining
    with open(image_path, "rb") as image_file:
        image_content = image_file.read()
        
    base64_image = base64.b64encode(image_content).decode('utf-8')
    
    # Determine MIME type based on file extension
    if image_path.lower().endswith(('.jpg', '.jpeg')):
        mime_type = "image/jpeg"
    elif image_path.lower().endswith('.png'):
        mime_type = "image/png"
    else:
        mime_type = "image/jpeg"  # Default to JPEG
    
    # Create image URL with document inlining transform
    image_url = f"data:{mime_type};base64,{base64_image}#transform=inline"
    
    # Create a structured prompt that explicitly describes the functions
    system_content = f"""
You have access to the following functions:

Function Name: '{tools[0]["function"]["name"]}'
Purpose: '{tools[0]["function"]["description"]}'
Parameters Schema: {json.dumps(tools[0]["function"]["parameters"], indent=4)}

Function Name: '{tools[1]["function"]["name"]}'
Purpose: '{tools[1]["function"]["description"]}'
Parameters Schema: {json.dumps(tools[1]["function"]["parameters"], indent=4)}

Instructions for Using Functions:
1. Analyze the image to determine if it shows a PRODUCT or an ORDER RECEIPT.
2. For PRODUCT images, call the amazon_search function with all visible product details.
3. For ORDER RECEIPT images, call the customer_support function with all visible order details.
4. ALWAYS make a function call - do not respond with text.
5. Adhere strictly to the parameters schema. Ensure all required fields are provided.

Examples:
- For a product image showing "ITO EN Tea" bottle, call:
  <function=amazon_search>{{"product_name": "Green Tea", "brand": "ITO EN", "product_size": "16.9 fl oz", "key_features": ["Unsweetened"]}}</function>
  
- For an order receipt showing Order #123456, call:
  <function=customer_support>{{"order_id": "123456", "order_date": "2023-05-15", "items": ["Book", "Headphones"]}}</function>
"""
    
    # Set up messages
    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_url}},
            {"type": "text", "text": "Analyze this image and call the appropriate function based on what you see."}
        ]}
    ]
    
    print("Sending request to model...")
    start_time = time.time()
    
    # Make the API call with function calling enabled
    response = client.chat.completions.create(
        model="accounts/fireworks/models/qwen2p5-72b-instruct",
        messages=messages,
        tools=tools,
        tool_choice="auto",
        temperature=0.1,
        max_tokens=1000
    )
    
    print(f"Model response time: {time.time() - start_time:.2f} seconds")
    print(response.choices[0].message.model_dump_json(indent=4))

    # Get the response message
    message = response.choices[0].message
    print(f"Response content: {message.content if message.content else 'No content'}")
    print(f"Has tool calls: {hasattr(message, 'tool_calls') and bool(message.tool_calls)}")
    
    # Check if the model chose to call a function
    if hasattr(message, 'tool_calls') and message.tool_calls:
        tool_call = message.tool_calls[0]
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        
        print(f"Function called: {function_name}")
        print(f"Arguments: {function_args}")
        
        # Call the appropriate function based on the model's choice
        if function_name == "amazon_search":
            # Call the amazon_search function with the extracted parameters
            result = amazon_search(function_args)
            
            return {
                "type": "product_search",
                "extracted_info": function_args,
                "search_results": result
            }
        
        elif function_name == "customer_support":
            # Call the customer_support function
            support_response = customer_support(function_args)
            return {
                "type": "customer_support",
                "order_details": function_args,
                "support_info": support_response
            }
        else:
            return {"type": "unknown_function", "function_name": function_name, "args": function_args}
    
    # If no function was called, return the model's message
    return {"type": "unknown", "message": message.content}

In [13]:
def test_image_processing():
    """
    Test the process_image_with_function_calling function with tea.png
    """
    from openai import OpenAI
    import json
    import time
    
    # Initialize API client with Fireworks
    client = OpenAI(
        base_url="https://api.fireworks.ai/inference/v1",
        api_key="fw_3ZYLXEz2c3YWcN3CAkyaVedA"  # Replace with your actual API key
    )
    
    # Rainforest API key
    rainforest_api_key = "YOUR_RAINFOREST_API_KEY"  # Replace with your actual API key
    
    # Path to image
    image_path = "tea.png"
    
    print(f"Processing image: {image_path}")
    start_time = time.time()
    
    try:
        # Call the function to process the image
        results = process_image_with_function_calling(
            image_path=image_path,
            client=client,
            rainforest_api_key=rainforest_api_key
        )
        
        # Print results
        print(f"\nProcessing completed in {time.time() - start_time:.2f} seconds")
        print("\n=== Results ===")
        print(f"Result type: {results.get('type', 'unknown')}")
        
        if results.get('type') == 'product_search':
            print("\nExtracted Product Information:")
            print(json.dumps(results.get('extracted_info', {}), indent=2))
            
            print("\nTop Search Results:")
            search_results = results.get('search_results', {}).get('results', [])
            for i, result in enumerate(search_results[:3]):  # Show top 3 results
                print(f"\nResult #{i+1}:")
                print(f"Title: {result.get('title', 'N/A')}")
                print(f"ASIN: {result.get('asin', 'N/A')}")
                print(f"Price: {result.get('price', 'N/A')}")
                if 'link' in result:
                    print(f"Link: {result.get('link')}")
            
        elif results.get('type') == 'customer_support':
            print("\nOrder Details:")
            print(json.dumps(results.get('order_details', {}), indent=2))
            
            print("\nSupport Information:")
            support_info = results.get('support_info', {})
            print(f"Greeting: {support_info.get('greeting', 'N/A')}")
            print(f"Support message: {support_info.get('support_message', 'N/A')}")
            print("\nRecommendations:")
            for rec in support_info.get('recommendations', []):
                print(f"- {rec}")
        
        else:
            print("\nRaw Results:")
            print(json.dumps(results, indent=2, default=str))
        
        print("\n=== End of Results ===")
        
    except Exception as e:
        print(f"Error processing image: {str(e)}")
        import traceback
        traceback.print_exc()
    
    return results

# Run the test function
results = test_image_processing()

Processing image: tea.png
Sending request to model...
Model response time: 3.15 seconds
{
    "content": "Based on the provided context, it seems like the image is of a product, specifically a bottle or container of ITO EN Golden Oolong Tea. Since the context includes details such as the brand name, product type, and volume, it would be appropriate to use the `amazon_search` function to find matching products on Amazon.\n\n",
    "refusal": null,
    "role": "assistant",
    "audio": null,
    "function_call": null,
    "tool_calls": null
}
Response content: Based on the provided context, it seems like the image is of a product, specifically a bottle or container of ITO EN Golden Oolong Tea. Since the context includes details such as the brand name, product type, and volume, it would be appropriate to use the `amazon_search` function to find matching products on Amazon.


Has tool calls: False

Processing completed in 3.15 seconds

=== Results ===
Result type: unknown

Raw Results:
{
 

## Amazon Search Test

In [7]:
# Sample input parameters (as if from VLM analyzing a product image)
search_parameters = {
    "product_name": "Pro-V Daily Moisture Renewal Shampoo",
    "brand": "Pantene",
    "attributes": {
        "size": "25.4 fl oz",
        "color": "white bottle with gold cap",
        "description": "For dry hair"
    },
}

In [8]:
# # Import necessary libraries
# import json
# import requests

# # Call the function with sample parameters
# results = amazon_search(search_parameters)

# # Print the results in a readable format
# print(json.dumps(results, indent=2))

In [9]:
import requests
import json

def simple_rainforest_test(api_key):
    """
    Simple test of the Rainforest API using a basic search query
    """
    # Base URL for Rainforest API
    base_url = "https://api.rainforestapi.com/request"
    
    # Simple search parameters
    params = {
        "api_key": api_key,
        "type": "search",
        "amazon_domain": "amazon.com",
        "search_term": "iPhone charger",  # Simple, common product
        "sort_by": "featured"
    }
    
    # Make the request
    response = requests.get(base_url, params=params)
    
    # Check if request was successful
    if response.status_code == 200:
        data = response.json()
        
        # Return basic info about the results
        result_summary = {
            "request_info": data.get("request_info", {}),
            "total_results": len(data.get("search_results", [])),
            "first_result": data.get("search_results", [{}])[0] if data.get("search_results") else None
        }
        
        return result_summary
    else:
        return {
            "error": f"Request failed with status code {response.status_code}",
            "response": response.text
        }

# # Example usage:
# api_key = "FB0C32E1DCB3433C997C0A0FB1E70608"  # Replace with your actual API key
# results = simple_rainforest_test(api_key)
# print(json.dumps(results, indent=2))