Setting Up Pydantic

In [1]:
from pydantic import BaseModel, field_validator, ValidationError

# 1. Define the model
class CheckpointProduct(BaseModel):
    name: str
    price: float

    @field_validator('price')
    @classmethod
    def price_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError('Price must be positive')
        return v

# 2. Run the tests
print("--- Starting Pydantic Verification ---")

# Test A: Should work
try:
    p = CheckpointProduct(name="Coffee", price=3.50)
    print(f"‚úÖ Installation Check: Success! Created product: {p.name}")
except Exception as e:
    print(f"‚ùå Installation Check: Failed! Error: {e}")

# Test B: Should fail (Negative Price)
try:
    CheckpointProduct(name="Broken", price=-1.0)
    print("‚ùå Validation Check: Failed! (Should have caught negative price)")
except ValidationError:
    print("‚úÖ Validation Check: Success! (Caught invalid negative price)")

# Test C: Should fail (Wrong Data Type)
try:
    CheckpointProduct(name="Glitch", price="not-a-number")
    print("‚ùå Type Check: Failed! (Should have caught string-as-price)")
except ValidationError:
    print("‚úÖ Type Check: Success! (Caught wrong data type)")

--- Starting Pydantic Verification ---
‚úÖ Installation Check: Success! Created product: Coffee
‚úÖ Validation Check: Success! (Caught invalid negative price)
‚úÖ Type Check: Success! (Caught wrong data type)


Step 2: Creating Product Data Models

In [2]:
# 1 - Define the structure of product data for YOUR previous listing generator

from pydantic import BaseModel, Field, field_validator, HttpUrl
from typing import Optional, List

class ProductRequest(BaseModel):
    """
    This model defines the structure of the data coming FROM the client.
    It matches the 'metadata' used in Lab M1.05.
    """
    # Name must be a string, not empty, and max 100 chars
    name: str = Field(..., min_length=1, max_length=100, description="The name of the product")
    
    # Price must be a positive float
    price: float = Field(..., gt=0, description="The price must be greater than zero")
    
    # Category should be one of a few allowed types (Optional but good practice)
    category: str = Field(..., min_length=3)
    
    # Image path or URL (Required for the Vision API to work)
    image_url: str = Field(..., description="Local path or URL to the product image")
    
    # Additional info is optional
    additional_info: Optional[str] = Field(None, max_length=500)

    @field_validator('price')
    @classmethod
    def price_must_be_realistic(cls, v: float) -> float:
        if v > 100000:
            raise ValueError('Price seems too high for this automated system (max $100k)')
        return v

class ProductResponse(BaseModel):
    """
    This model validates the output RECEIVED from ChatGPT to ensure 
    it follows the JSON format requested in your prompt.
    """
    title: str = Field(..., max_length=60)
    description: str = Field(..., min_length=100)
    features: List[str] = Field(..., min_items=5, max_items=10)
    keywords: str

# --- Testing the Model ---
if __name__ == "__main__":
    print("--- Testing Validation ---")
    
    # Example of VALID data
    valid_data = {
        "name": "Ultra-Bass Wireless Headphones",
        "price": 89.99,
        "category": "Electronics",
        "image_url": "images/headphones.jpg",
        "additional_info": "Blue color, 20h battery"
    }
    
    try:
        product = ProductRequest(**valid_data)
        print(f"‚úì Validation Success: {product.name} is ready for API calling.")
    except Exception as e:
        print(f"‚úó Validation Failed: {e}")

--- Testing Validation ---
‚úì Validation Success: Ultra-Bass Wireless Headphones is ready for API calling.


C:\Users\pbiai\AppData\Local\Temp\ipykernel_24992\2680654082.py:40: PydanticDeprecatedSince20: `min_items` is deprecated and will be removed, use `min_length` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  features: List[str] = Field(..., min_items=5, max_items=10)
C:\Users\pbiai\AppData\Local\Temp\ipykernel_24992\2680654082.py:40: PydanticDeprecatedSince20: `max_items` is deprecated and will be removed, use `max_length` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  features: List[str] = Field(..., min_items=5, max_items=10)


Create Pydantic models with validation rules that make sense of your use case.

In [3]:
from pydantic import BaseModel, Field, field_validator
from typing import Optional, List

class ProductListingRequest(BaseModel):
    """Validates incoming product data before processing with AI."""
    
    # Logic: Names shouldn't be single letters or 500 characters long.
    name: str = Field(
        ..., 
        min_length=3, 
        max_length=150, 
        description="The commercial name of the product"
    )
    
    # Logic: E-commerce items must have a category for SEO and organization.
    category: str = Field(
        ..., 
        pattern=r"^[a-zA-Z\s&-]+$", # Only letters, spaces, & and -
        description="Product category (e.g., Electronics, Home & Garden)"
    )
    
    # Logic: Prices cannot be zero or negative.
    price: float = Field(
        ..., 
        gt=0, 
        description="The retail price must be greater than zero"
    )
    
    # Logic: We need a reference to the image for the Vision API.
    image_path: str = Field(
        ..., 
        description="The local file path or URL to the product image"
    )

    # Custom Business Logic Validator
    @field_validator('category')
    @classmethod
    def validate_category_list(cls, v: str) -> str:
        allowed = ["Electronics", "Fashion", "Home", "Beauty", "Sports"]
        if v not in allowed:
            # We don't raise an error, we just warn or normalize
            # But for this lab, let's be strict:
            raise ValueError(f"Category must be one of: {', '.join(allowed)}")
        return v

Add custom validators for business logic

In [4]:
from pydantic import BaseModel, Field, field_validator, model_validator
from typing import Optional, List

class ProductListingRequest(BaseModel):
    name: str = Field(..., min_length=3, max_length=150)
    category: str
    price: float
    currency: str = "USD"
    image_path: str
    discount: Optional[float] = 0.0

    # --- 1. Field Level Validator: Category Whitelisting ---
    @field_validator('category')
    @classmethod
    def must_be_supported_category(cls, v: str) -> str:
        """Ensures the product belongs to a department we actually support."""
        valid_departments = {'Electronics', 'Fashion', 'Home', 'Beauty', 'Sports'}
        # Normalize to title case so 'electronics' or 'ELECTRONICS' both pass
        formatted_v = v.strip().title()
        if formatted_v not in valid_departments:
            raise ValueError(f"Category '{v}' is not supported. Must be one of {valid_departments}")
        return formatted_v

    # --- 2. Field Level Validator: Realistic Pricing ---
    @field_validator('price')
    @classmethod
    def validate_price_range(cls, v: float) -> float:
        """Prevents processing items that are likely data entry errors (e.g., $0.01 or $1M)."""
        if v < 0.50:
            raise ValueError("Price is too low for individual listing generation (minimum $0.50).")
        if v > 5000:
            raise ValueError("High-value items over $5000 require manual description review.")
        return v

    # --- 3. Model Level Validator: Discount Logic ---
    @model_validator(mode='after')
    def check_discount_logic(self) -> 'ProductListingRequest':
        """Business Logic: A discount cannot be higher than the price itself."""
        if self.discount >= self.price:
            raise ValueError(f"Discount (${self.discount}) cannot exceed or equal the price (${self.price})")
        return self

    # --- 4. Logic: Image Extension Check ---
    @field_validator('image_path')
    @classmethod
    def validate_image_extension(cls, v: str) -> str:
        """Ensures the file is a format the Vision API can actually read."""
        valid_extensions = ('.jpg', '.jpeg', '.png', '.webp')
        if not v.lower().endswith(valid_extensions):
            raise ValueError(f"Unsupported image format. Vision API requires: {valid_extensions}")
        return v

Check validation rules

In [5]:
from pydantic import BaseModel, Field, field_validator
from typing import Optional

class FashionProduct(BaseModel):
    """
    Validated model based on the Fashion Dataset used in Lab M1.05.
    Fields match the typical columns: id, gender, masterCategory, subCategory, articleType, baseColour, etc.
    """
    # Core fields from your previous dataset
    id: int = Field(..., description="Unique product ID from the dataset")
    productDisplayName: str = Field(..., min_length=5, description="The full title of the item")
    masterCategory: str = Field(..., description="Top level category (e.g., Apparel)")
    subCategory: str = Field(..., description="Mid level category (e.g., Topwear)")
    baseColour: str = Field(..., description="The primary color of the product")
    usage: str = Field(..., description="Product usage (e.g., Casual, Formal)")
    
    # Adding business logic fields even if they weren't in the original CSV
    price: float = Field(default=19.99, gt=0) 
    image_url: str = Field(..., description="Path to the image used in the Vision API")

    # --- Business Logic: Case Sensitivity ---
    @field_validator('masterCategory', 'subCategory', 'baseColour')
    @classmethod
    def capitalize_strings(cls, v: str) -> str:
        """Normalizes data so 'apparel' and 'Apparel' are treated the same."""
        return v.strip().capitalize()

    # --- Business Logic: Valid Gender ---
    @field_validator('usage')
    @classmethod
    def validate_usage(cls, v: str) -> str:
        allowed = ["Casual", "Formal", "Sports", "Ethnic", "Party", "Travel"]
        if v.capitalize() not in allowed:
            raise ValueError(f"Invalid usage type. Must be one of: {allowed}")
        return v.capitalize()

Example of one product

In [6]:
# Example of processing one item from your previous dataset
raw_data_from_lab_5 = {
    "id": 15970,
    "productDisplayName": "Turtle Check Men Navy Blue Shirt",
    "masterCategory": "Apparel",
    "subCategory": "Topwear",
    "baseColour": "Navy Blue",
    "usage": "casual",
    "image_url": "15970.jpg"
}

# This validates the data from the previous lab!
validated_product = FashionProduct(**raw_data_from_lab_5)

Verifying the previous database

In [7]:
import json

# Simulated data from your Lab M1.05 (usually loaded from a file or HuggingFace)
previous_lab_data = [
    {
        "id": 15970,
        "productDisplayName": "Turtle Check Men Navy Blue Shirt",
        "masterCategory": "Apparel",
        "subCategory": "Topwear",
        "baseColour": "Navy Blue",
        "usage": "casual",
        "image_url": "images/15970.jpg"
    },
    {
        "id": -99,  # BUSINESS LOGIC ERROR (Negative ID)
        "productDisplayName": "Broken Link Shoes",
        "masterCategory": "Footwear",
        "subCategory": "Shoes",
        "baseColour": "Black",
        "usage": "formal",
        "image_url": "images/shoes.png"
    }
]

def validate_previous_lab_dataset(data_list):
    validated_products = []
    errors = []

    print(f"--- Processing {len(data_list)} Products ---")
    
    for item in data_list:
        try:
            # This is where the Pydantic model checks the data
            product = FashionProduct(**item)
            validated_products.append(product)
            print(f"‚úÖ PASSED: {product.productDisplayName} (ID: {product.id})")
        
        except Exception as e:
            # If an error is detected, we capture it without crashing the whole script
            errors.append({"item_id": item.get('id'), "error": str(e)})
            print(f"‚ùå FAILED: Item ID {item.get('id')} - {str(e)[:50]}...")

    return validated_products, errors

# Run the validation
valid_items, error_logs = validate_previous_lab_dataset(previous_lab_data)

print(f"\n--- Summary ---")
print(f"Successfully Validated: {len(valid_items)}")
print(f"Errors Found: {len(error_logs)}")

--- Processing 2 Products ---
‚úÖ PASSED: Turtle Check Men Navy Blue Shirt (ID: 15970)
‚úÖ PASSED: Broken Link Shoes (ID: -99)

--- Summary ---
Successfully Validated: 2
Errors Found: 0


Test "bad" database

In [8]:
from pydantic import ValidationError

def run_checkpoint_verification():
    # 1. TEST: Valid Data (Matches Lab M1.05 structure)
    valid_fashion_item = {
        "id": 15970,
        "productDisplayName": "Turtle Check Men Navy Blue Shirt",
        "masterCategory": "Apparel",
        "subCategory": "Topwear",
        "baseColour": "Navy Blue",
        "usage": "casual", # Our validator will change this to 'Casual'
        "image_url": "images/15970.jpg"
    }

    # 2. TEST: Invalid Data (Triggers Business Logic & Required Field errors)
    invalid_fashion_item = {
        "id": -5,                       # ERROR: ID must be positive
        "productDisplayName": "Short",  # ERROR: Too short
        "masterCategory": "Apparel",
        "subCategory": "Topwear",
        # "baseColour" is MISSING       # ERROR: Required field
        "usage": "Swimming",            # ERROR: Not in our allowed list
        "image_url": "images/15970.jpg"
    }

    print("--- CHECKPOINT: Validating Lab M1.05 Data ---")
    try:
        product = FashionProduct(**valid_fashion_item)
        print(f"‚úÖ PASS: Valid data accepted for '{product.productDisplayName}'")
        print(f"   (Logic Check: Usage was 'casual', is now '{product.usage}')")
    except ValidationError as e:
        print(f"‚ùå FAIL: Valid data should not have errors: {e}")

    print("\n--- CHECKPOINT: Verifying Error Enforcement ---")
    try:
        FashionProduct(**invalid_fashion_item)
        print("‚ùå FAIL: Invalid data was incorrectly accepted!")
    except ValidationError as e:
        print(f"‚úÖ PASS: Caught {len(e.errors())} expected errors:")
        for error in e.errors():
            print(f"   - {error['loc'][0]}: {error['msg']}")

if __name__ == "__main__":
    run_checkpoint_verification()

--- CHECKPOINT: Validating Lab M1.05 Data ---
‚úÖ PASS: Valid data accepted for 'Turtle Check Men Navy Blue Shirt'
   (Logic Check: Usage was 'casual', is now 'Casual')

--- CHECKPOINT: Verifying Error Enforcement ---
‚úÖ PASS: Caught 2 expected errors:
   - baseColour: Field required
   - usage: Value error, Invalid usage type. Must be one of: ['Casual', 'Formal', 'Sports', 'Ethnic', 'Party', 'Travel']


Generating the json files

In [9]:
import json

# This matches your Fashion Dataset structure
valid_data = {
    "id": 15970,
    "productDisplayName": "Turtle Check Men Navy Blue Shirt",
    "masterCategory": "Apparel",
    "subCategory": "Topwear",
    "baseColour": "Navy Blue",
    "usage": "Casual",
    "price": 29.99,
    "image_url": "images/15970.jpg"
}

# This deliberately breaks your custom rules
invalid_data = {
    "id": -1,                      # Error: Must be positive
    "productDisplayName": "Tee",   # Error: Too short (min_length=5)
    "masterCategory": "Apparel",
    "subCategory": "Topwear",
    "baseColour": "Navy Blue",
    "usage": "Space-Travel",       # Error: Not in our allowed list
    "price": 9999.0,               # Error: Exceeds our $5000 ceiling
    "image_url": "images/15970.jpg"
}

# Save to local files
with open("valid_product.json", "w") as f:
    json.dump(valid_data, f, indent=4)

with open("invalid_product.json", "w") as f:
    json.dump(invalid_data, f, indent=4)

print("‚úÖ Files 'valid_product.json' and 'invalid_product.json' created!")

‚úÖ Files 'valid_product.json' and 'invalid_product.json' created!


The validation Function

In [10]:
from pydantic import ValidationError

def validate_json_file(file_path):
    """Loads a JSON file and validates it against the FashionProduct model."""
    print(f"\n--- Checking File: {file_path} ---")
    
    try:
        # 1. Load raw data
        with open(file_path, "r") as f:
            data = json.load(f)
        
        # 2. Validate using your model
        product = FashionProduct(**data)
        
        print(f"‚úÖ SUCCESS: Data is valid for '{product.productDisplayName}'")
        return product.model_dump() # Returns a clean dictionary

    except FileNotFoundError:
        print(f"‚ùå ERROR: File {file_path} not found.")
    except json.JSONDecodeError:
        print(f"‚ùå ERROR: {file_path} is not a valid JSON format.")
    except ValidationError as e:
        print(f"‚ùå VALIDATION FAILED for {file_path}:")
        # Loop through errors to provide clear feedback
        for error in e.errors():
            field = error['loc'][0]
            message = error['msg']
            print(f"   - Field '{field}': {message}")
        return None

# Execute Step 3
valid_output = validate_json_file("valid_product.json")
invalid_output = validate_json_file("invalid_product.json")


--- Checking File: valid_product.json ---
‚úÖ SUCCESS: Data is valid for 'Turtle Check Men Navy Blue Shirt'

--- Checking File: invalid_product.json ---
‚ùå VALIDATION FAILED for invalid_product.json:
   - Field 'productDisplayName': String should have at least 5 characters
   - Field 'usage': Value error, Invalid usage type. Must be one of: ['Casual', 'Formal', 'Sports', 'Ethnic', 'Party', 'Travel']


The Validation function

In [11]:
import json
from pydantic import ValidationError

def load_and_validate_product(file_path: str):
    """
    Loads a JSON file and validates it against the FashionProduct model.
    
    Returns:
        FashionProduct: A validated Pydantic object if successful.
        None: If validation or loading fails.
    """
    print(f"\nüîç Loading and Validating: {file_path}")
    
    try:
        # 1. Open and Parse the JSON file
        with open(file_path, 'r') as file:
            raw_data = json.load(file)
        
        # 2. Pass the dictionary into the Pydantic Model
        # This triggers all your custom validators (ID, Category, etc.)
        validated_product = FashionProduct(**raw_data)
        
        print(f"‚úÖ VALIDATION SUCCESS: '{validated_product.productDisplayName}' is ready.")
        return validated_product

    except FileNotFoundError:
        print(f"‚ùå ERROR: The file '{file_path}' does not exist.")
    except json.JSONDecodeError:
        print(f"‚ùå ERROR: '{file_path}' contains invalid JSON syntax (check your commas/brackets).")
    except ValidationError as e:
        print(f"‚ùå DATA VALIDATION ERROR in '{file_path}':")
        # Loop through the specific errors provided by Pydantic
        for error in e.errors():
            field = error['loc'][0]
            message = error['msg']
            print(f"   - {field.upper()}: {message}")
    
    return None

# --- Example Usage for your Lab ---
# Assuming you created 'valid_product.json' and 'invalid_product.json' earlier
product_one = load_and_validate_product("valid_product.json")
product_two = load_and_validate_product("invalid_product.json")


üîç Loading and Validating: valid_product.json
‚úÖ VALIDATION SUCCESS: 'Turtle Check Men Navy Blue Shirt' is ready.

üîç Loading and Validating: invalid_product.json
‚ùå DATA VALIDATION ERROR in 'invalid_product.json':
   - PRODUCTDISPLAYNAME: String should have at least 5 characters
   - USAGE: Value error, Invalid usage type. Must be one of: ['Casual', 'Formal', 'Sports', 'Ethnic', 'Party', 'Travel']


Loading and connecting

In [12]:
def load_and_validate_json(file_path):
    # This combines your Step 2 logic with Step 3's file-loading requirement
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        
        # We use the model we defined in Step 2 here
        product = FashionProduct(**data) 
        return product
        
    except ValidationError as e:
        print(f"Validation failed for {file_path}")
        return None

Define a logic to handle validation errors

In [13]:
from pydantic import ValidationError

def handle_product_validation(raw_data: dict):
    """
    Logic to process raw data and handle specific validation errors.
    """
    try:
        # Attempt to create the model
        product = FashionProduct(**raw_data)
        print(f"‚úÖ Success: '{product.productDisplayName}' is valid.")
        return product

    except ValidationError as e:
        print(f"‚ùå Validation Failed with {len(e.errors())} errors:")
        
        # This is the "Logic" part: looping through the error list
        for error in e.errors():
            # 'loc' tells us which field failed (e.g., ('price',))
            field_name = error['loc'][0]
            # 'msg' tells us what went wrong (e.g., 'value is not a valid integer')
            error_message = error['msg']
            
            print(f"   üëâ Field: [{field_name.upper()}] | Issue: {error_message}")
        
        return None

Return validated data

    Returning a Pydantic Object

In [14]:
def get_validated_product(json_file_path):
    try:
        with open(json_file_path, 'r') as f:
            data = json.load(f)
        
        # This returns the data as a FashionProduct OBJECT
        return FashionProduct(**data) 
    except ValidationError as e:
        print(f"Validation Error: {e}")
        return None

# Usage:
product = get_validated_product("valid_product.json")
if product:
    print(product.productDisplayName)  # Dot notation access

Turtle Check Men Navy Blue Shirt


Returning a Cleaned Dictionary (.model_dump())

In [15]:
def get_validated_dict(json_file_path):
    try:
        with open(json_file_path, 'r') as f:
            data = json.load(f)
        
        validated_obj = FashionProduct(**data)
        
        # RETURNS as a clean dictionary for the API
        return validated_obj.model_dump() 
    except ValidationError:
        return None

# Usage:
clean_data = get_validated_dict("valid_product.json")
# This clean_data is now ready to be sent to ChatGPT!

Step 4: Integrating with ChatGPT API

Import your previous ChatGPT API code

In [16]:
import os
import base64
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables (Security first!)
load_dotenv("../.env")
api_key = os.getenv("OPENAI_API_KEY")

if api_key:
    print("‚úÖ API Key successfully loaded!")
else:
    print("‚ùå API Key not found. Check your .env file location.")

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def encode_image(image_path):
    """Encodes a local image to base64 for GPT-4 Vision."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def generate_product_listing(validated_data):
    """
    Takes the VALIDATED Pydantic object and calls ChatGPT.
    """
    # 1. Encode the image path found in our validated data
    base64_image = encode_image(validated_data.image_url)
    
    # 2. Build the prompt using validated fields
    # Notice we use dot notation (validated_data.price)
    prompt = f"""
    Create a professional e-commerce listing for: {validated_data.productDisplayName}.
    Category: {validated_data.masterCategory}
    Price: ${validated_data.price}
    Base Color: {validated_data.baseColour}
    Usage: {validated_data.usage}
    
    Format the output as JSON with: title, description, features (list), keywords.
    """

    # 3. Call the API
    response = client.chat.completions.create(
        model="gpt-4o", # Or gpt-4-vision-preview
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
                    },
                ],
            }
        ],
        max_tokens=500
    )
    
    return response.choices[0].message.content

‚úÖ API Key successfully loaded!


Validate input before processing

In [17]:
def process_product_listing(file_path):
    """
    Step 4: The full pipeline. 
    Loads JSON -> Validates via Pydantic -> Calls ChatGPT only if valid.
    """
    # 1. Attempt to load and validate (using the function we built in Step 3)
    # This checks categories, price, and ID logic
    product_data = load_and_validate_product(file_path)

    # 2. VALIDATION CHECK: This is the 'Logic' part of the checkpoint
    if product_data is None:
        print(f"üõë PROCESS ABORTED for {file_path}: Invalid data detected.")
        return None

    # 3. IF VALID: Proceed to ChatGPT
    print(f"‚úÖ PROCESS STARTED for {product_data.productDisplayName}...")
    
    try:
        # Call the API using the validated fields
        listing_result = generate_product_listing(product_data)
        
        print("‚ú® Listing successfully generated!")
        return listing_result

    except Exception as e:
        print(f"‚ùå API Error: Something went wrong during the ChatGPT call: {e}")
        return None

# --- TESTING THE GATEKEEPER ---
# Run it with your two files from Step 3
print("--- TEST 1: The Valid File ---")
process_product_listing("valid_product.json")

print("\n--- TEST 2: The Invalid File ---")
process_product_listing("invalid_product.json")

--- TEST 1: The Valid File ---

üîç Loading and Validating: valid_product.json
‚úÖ VALIDATION SUCCESS: 'Turtle Check Men Navy Blue Shirt' is ready.
‚úÖ PROCESS STARTED for Turtle Check Men Navy Blue Shirt...
‚ùå API Error: Something went wrong during the ChatGPT call: [Errno 2] No such file or directory: 'images/15970.jpg'

--- TEST 2: The Invalid File ---

üîç Loading and Validating: invalid_product.json
‚ùå DATA VALIDATION ERROR in 'invalid_product.json':
   - PRODUCTDISPLAYNAME: String should have at least 5 characters
   - USAGE: Value error, Invalid usage type. Must be one of: ['Casual', 'Formal', 'Sports', 'Ethnic', 'Party', 'Travel']
üõë PROCESS ABORTED for invalid_product.json: Invalid data detected.


Process only validated requests

In [18]:
import os

def process_single_product(json_file):
    print(f"\n--- üöÄ Processing: {json_file} ---")
    
    # 1. Structural & Business Logic Validation (Pydantic)
    product = load_and_validate_product(json_file)
    
    if not product:
        print(f"üõë ABORTED: {json_file} failed data validation. No API call made.")
        return None

    # 2. Physical Asset Validation (File System check)
    if not os.path.exists(product.image_url):
        print(f"üõë ABORTED: Image '{product.image_url}' not found. No API call made.")
        return None

    # 3. Only now do we proceed to the "Expensive" step
    print(f"üíé Data & Assets Verified. Calling ChatGPT for {product.productDisplayName}...")
    try:
        listing = generate_product_listing(product)
        print("‚ú® Listing Generated Successfully!")
        return listing
    except Exception as e:
        print(f"‚ùå API Error: {e}")
        return None

Return appropriate responses

In [19]:
def process_single_product(json_file):
    # 1. Validation Logic
    product = load_and_validate_product(json_file)
    
    # Check 1: Did Pydantic fail?
    if not product:
        return {"status": "error", "reason": "Data Validation Failed"}

    # Check 2: Is the image missing?
    if not os.path.exists(product.image_url):
        return {"status": "error", "reason": f"File Not Found: {product.image_url}"}

    # 2. Execution Logic
    try:
        listing = generate_product_listing(product)
        # SUCCESS: Return the actual data
        return {"status": "success", "data": listing, "product_id": product.id}
    
    except Exception as e:
        # API FAILURE: Return the technical error
        return {"status": "error", "reason": f"API Error: {str(e)}"}

# --- How to use these responses ---
result = process_single_product("valid_product.json")

if result["status"] == "success":
    print(f"üéâ Success for ID {result['product_id']}!")
    # Save the 'data' to your final report file
else:
    print(f"‚ö†Ô∏è Skipped: {result['reason']}")


üîç Loading and Validating: valid_product.json
‚úÖ VALIDATION SUCCESS: 'Turtle Check Men Navy Blue Shirt' is ready.
‚ö†Ô∏è Skipped: File Not Found: images/15970.jpg


Validate that the output is also in the stardadize format (i.e using Pydantic models)

Define the output model

In [20]:
from pydantic import BaseModel, Field
from typing import List

class AIProductListing(BaseModel):
    title: str = Field(..., min_length=10)
    description: str = Field(..., min_length=50)
    features: List[str] = Field(..., min_items=3)
    keywords: List[str]

C:\Users\pbiai\AppData\Local\Temp\ipykernel_24992\1298919023.py:7: PydanticDeprecatedSince20: `min_items` is deprecated and will be removed, use `min_length` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  features: List[str] = Field(..., min_items=3)


2. Refine the Prompt (Forcing JSON)

In [21]:
def generate_product_listing(validated_data):
    # ... (previous setup code) ...
    
    prompt = f"Create a professional listing for {validated_data.productDisplayName}."
    
    response = client.chat.completions.create(
        model="gpt-4o",
        # We tell the API we want JSON back
        response_format={ "type": "json_object" }, 
        messages=[
            {"role": "system", "content": "You are a marketing expert that outputs strictly JSON matching the requested schema."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

3. The Validation Logic for AI Output

In [22]:
import json

def process_and_validate_ai_output(validated_input_data):
    # 1. Get raw string from AI
    raw_ai_response = generate_product_listing(validated_input_data)
    
    try:
        # 2. Convert string to dict
        ai_data_dict = json.loads(raw_ai_response)
        
        # 3. VALIDATE against our second Pydantic model
        final_listing = AIProductListing(**ai_data_dict)
        
        print("‚úÖ AI Output is Standardized and Valid!")
        return final_listing.model_dump()

    except ValidationError as e:
        print("‚ùå AI returned invalid formatting:")
        print(e.json())
        return None

Full loop script

In [23]:
import os
import json
from pydantic import BaseModel, Field, ValidationError, field_validator
from typing import List
from openai import OpenAI
from dotenv import load_dotenv

# --- STEP 2 & 5: MODELS ---

class FashionProduct(BaseModel):
    """Input Model: Validates your local JSON data."""
    id: int = Field(..., gt=0)
    productDisplayName: str = Field(..., min_length=5)
    masterCategory: str
    subCategory: str
    baseColour: str
    usage: str
    price: float = Field(..., gt=0, lt=5000)
    image_url: str

    @field_validator('usage')
    @classmethod
    def validate_usage(cls, v):
        allowed = ['Casual', 'Formal', 'Sports', 'Ethnic', 'Party', 'Travel']
        if v not in allowed:
            raise ValueError(f"Invalid usage. Must be one of {allowed}")
        return v

class AIProductListing(BaseModel):
    """Output Model: Validates that ChatGPT's response is correct."""
    title: str = Field(..., min_length=10)
    description: str = Field(..., min_length=50)
    features: List[str] = Field(..., min_items=3)
    keywords: List[str]

# --- STEP 4: API LOGIC ---

load_dotenv("../.env")
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

def call_chatgpt(product: FashionProduct):
    """Calls ChatGPT and requests a JSON response."""
    prompt = f"Create a professional listing for {product.productDisplayName}. Price: ${product.price}."
    
    response = client.chat.completions.create(
        model="gpt-4o",
        response_format={ "type": "json_object" }, # Forces JSON
        messages=[
            {"role": "system", "content": "Return a JSON object with keys: title, description, features (list), keywords."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

# --- THE CHECKPOINT LOGIC ---

def process_pipeline(file_path):
    print(f"\n--- üõ† Processing: {file_path} ---")
    
    # 1. Reject Invalid Input before API Call
    try:
        with open(file_path, 'r') as f:
            raw_data = json.load(f)
        product_input = FashionProduct(**raw_data)
        print("‚úÖ Input Validated.")
    except (ValidationError, ValueError, FileNotFoundError) as e:
        print(f"üõë REJECTED: {e}")
        return None

    # 2. Process Valid Data
    try:
        ai_raw_response = call_chatgpt(product_input)
        
        # 3. Validate AI Output
        ai_json = json.loads(ai_raw_response)
        validated_output = AIProductListing(**ai_json)
        
        print("‚ú® AI Output Standardized & Validated!")
        return validated_output.model_dump()
        
    except Exception as e:
        print(f"‚ùå Graceful Error Handling: {e}")
        return None

# --- EXECUTION ---
process_pipeline("valid_product.json")
process_pipeline("invalid_product.json")


--- üõ† Processing: valid_product.json ---
‚úÖ Input Validated.


C:\Users\pbiai\AppData\Local\Temp\ipykernel_24992\540605924.py:33: PydanticDeprecatedSince20: `min_items` is deprecated and will be removed, use `min_length` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  features: List[str] = Field(..., min_items=3)


‚ú® AI Output Standardized & Validated!

--- üõ† Processing: invalid_product.json ---
üõë REJECTED: 4 validation errors for FashionProduct
id
  Input should be greater than 0 [type=greater_than, input_value=-1, input_type=int]
    For further information visit https://errors.pydantic.dev/2.12/v/greater_than
productDisplayName
  String should have at least 5 characters [type=string_too_short, input_value='Tee', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/string_too_short
usage
  Value error, Invalid usage. Must be one of ['Casual', 'Formal', 'Sports', 'Ethnic', 'Party', 'Travel'] [type=value_error, input_value='Space-Travel', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/value_error
price
  Input should be less than 5000 [type=less_than, input_value=9999.0, input_type=float]
    For further information visit https://errors.pydantic.dev/2.12/v/less_than


Testing and saving valid and invalid json input

In [24]:
import json

# Create Valid File
valid_data = {
    "id": 15970,
    "productDisplayName": "Turtle Check Men Navy Blue Shirt",
    "masterCategory": "Apparel",
    "subCategory": "Topwear",
    "baseColour": "Navy Blue",
    "usage": "Casual",
    "price": 35.99,
    "image_url": "images/15970.jpg"
}
with open('valid_product.json', 'w') as f:
    json.dump(valid_data, f, indent=4)

# Create Invalid File
invalid_data = {
    "id": -1,
    "productDisplayName": "T",
    "masterCategory": "Apparel",
    "subCategory": "Topwear",
    "baseColour": "Green",
    "usage": "Swimming",
    "price": -5.0,
    "image_url": "images/missing.jpg"
}
with open('invalid_product.json', 'w') as f:
    json.dump(invalid_data, f, indent=4)

print("üìÇ Request files created successfully!")

üìÇ Request files created successfully!
