# Library Imports

In [44]:
# ================================
# Library Imports
# ================================

from dotenv import load_dotenv # Used to load environment variables from a .env file

import os          # Used to access environment variables (e.g., API keys)
import json        # Used to parse and generate JSON data


# ================================
# OpenAI SDK Imports
# ================================

from openai import OpenAI      # Main client class to interact with OpenAI API
from openai import APIError    # Exception class to handle API-related errors


# ================================
# Pydantic Imports (Data Validation)
# ================================

from pydantic import BaseModel         # Base class for creating data models
from pydantic import field_validator   # Used to create custom validation logic for model fields
from pydantic import ValidationError   # Exception raised when validation fails


# ================================
# Typing Imports (Type Hints)
# ================================

from typing import List       # Used to define list types (e.g., List[str])
from typing import Optional   # Used to define optional fields (e.g., Optional[str])



# Load environment variables

In [45]:
# Load environment variables
load_dotenv()
print(os.getenv("OPENAI_API_KEY") is not None)



True


# OpenAI (for testing only)

In [46]:
client = OpenAI() 
response = client.responses.create( model="gpt-4.1-mini", input="Hello! Respond with one short sentence." ) 
print(response.output_text)

Hello! How can I assist you today?


# Product Data Model

## Purpose

Defines a structured product schema using Pydantic to guarantee:

- Strict type validation
- Required field enforcement
- Business rule validation (e.g., positive price)
- Clear and structured error reporting

---

## What It Does

The `Product` class represents a validated product entity.

When a `Product` object is created:

1. Pydantic checks all field types.
2. Ensures all required fields are present.
3. Executes custom validators (e.g., price validation).
4. Raises a structured `ValidationError` if any rule is violated.

This prevents invalid data from entering the processing pipeline.

---

## Inputs

A valid product must include:

- `id` (str) — Unique product identifier  
- `name` (str) — Product name  
- `category` (str) — Product category  
- `price` (float) — Must be strictly greater than zero  
- `features` (List[str]) — A list of feature descriptions  

Example:

```python
Product(
    id="101",
    name="Smartphone",
    category="Electronics",
    price=699.99,
    features=["5G", "128GB storage"]
)


In [47]:
class Product(BaseModel):
    """
    Product data model with strict validation rules.

    This model ensures that every product:
    - Has the correct data types
    - Contains all required fields
    - Respects business rules (e.g., positive price)
    """

    # Unique product identifier
    id: str

    # Product display name
    name: str

    # Product category (e.g., Electronics, Stationery)
    category: str

    # Product price (must be a positive number)
    price: float

    # List of product features (can be empty, but must be a list of strings)
    features: List[str]

    @field_validator("price")
    @classmethod
    def price_must_be_positive(cls, value: float) -> float:
        """
        Validates that the product price is strictly positive.

        Args:
            value (float): The price value provided during model creation.

        Returns:
            float: The validated price.

        Raises:
            ValueError: If the price is zero or negative.
        """
        if value <= 0:
            raise ValueError("Price must be positive")

        return value


# Helpers

# load_json_file()

## Purpose

Loads and parses a JSON file while providing clear, structured, and actionable error messages.

This function ensures that file system and JSON parsing errors are handled explicitly and never fail silently.

---

## What It Does

- Opens a file using UTF-8 encoding.
- Parses its contents as JSON.
- Returns the parsed content as a Python dictionary.
- If an error occurs, prints a detailed contextual message.
- Re-raises the exception to allow higher-level error handling.

---

## Inputs

- `file_path` (str): The path to the JSON file to load.

---

## Outputs

If successful:
- Returns a dictionary containing the parsed JSON data.

If an error occurs:
- Prints a structured error message.
- Re-raises the original exception.

---

## Error Handling

### FileNotFoundError
Triggered when the file does not exist.

The message includes:
- Function name
- File path
- System error message
- Current working directory
- Suggestion for correction

---

### JSONDecodeError
Triggered when the file contains invalid JSON syntax.

The message includes:
- File name
- Line number
- Column number
- Parsing message
- Suggestion to fix the JSON structure

---

### Generic Exception
Handles unexpected issues such as:
- Permission errors
- Corrupted file content
- Encoding problems

The error is reported with context and re-raised.

---

## Design Decision

The exception is re-raised after printing the message to ensure:

- No silent failures
- Proper propagation to the main orchestration layer
- Accurate detection during integration testing


In [48]:
def load_json_file(file_path: str) -> dict:
    """
    Load and parse a JSON file with structured error handling.

    Args:
        file_path (str): Path to the JSON file to be loaded.

    Returns:
        dict: Parsed JSON content as a Python dictionary.

    Raises:
        FileNotFoundError: If the file does not exist.
        json.JSONDecodeError: If the file contains invalid JSON.
        Exception: For any other unexpected errors.
    """

    function_name = "load_json_file"

    try:
        # Open the file in read mode with UTF-8 encoding
        with open(file_path, "r", encoding="utf-8") as f:
            # Parse and return JSON content
            return json.load(f)

    except FileNotFoundError as e:
        # Handle missing file errors
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: File '{file_path}' not found\n"
            f"  Message: {str(e)}\n"
            f"  Suggestion: Verify the file path and ensure the file exists "
            f"(Current directory: {os.getcwd()})"
        )
        print(error_msg)
        raise

    except json.JSONDecodeError as e:
        # Handle malformed JSON syntax
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: File '{file_path}', line {e.lineno}, column {e.colno}\n"
            f"  Message: {e.msg}\n"
            f"  Suggestion: Fix JSON syntax near line {e.lineno}"
        )
        print(error_msg)
        raise

    except Exception as e:
        # Catch any other unexpected errors
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: File '{file_path}'\n"
            f"  Message: {str(e)}\n"
            f"  Suggestion: Check file permissions or content integrity"
        )
        print(error_msg)
        raise


# validate_product_data()

## Purpose

Validates raw product data using the `Product` Pydantic model and provides structured, field-level error reporting.

This function ensures that invalid product entries are clearly identified and safely excluded from further processing.

---

## What It Does

- Attempts to construct a validated `Product` instance from a dictionary.
- If validation succeeds:
  - Returns the validated `Product` object.
- If validation fails:
  - Extracts detailed field-level error information.
  - Prints a structured, user-friendly error message.
  - Returns `None` to indicate failure.

---

## Inputs

- `product_dict` (dict): Raw product data loaded from JSON.

---

## Outputs

If valid:
- Returns a `Product` instance.

If invalid:
- Prints a structured error message.
- Returns `None`.

---

## Error Handling

### ValidationError (from Pydantic)

Triggered when:
- Required fields are missing.
- Field types are incorrect.
- Custom validators (e.g., price validation) fail.

The function:

- Identifies the product ID (if available).
- Extracts field-specific error messages.
- Combines them into a readable format.
- Prints a structured diagnostic message.
- Returns `None` instead of raising the exception.

---

## Design Decision

Unlike file-loading errors, validation errors do not stop the entire system.

Instead:
- Invalid products are skipped.
- Valid products continue to be processed.
- The system remains resilient and fault-tolerant.

This enables partial success while still providing full diagnostic transparency.


In [49]:
def validate_product_data(product_dict: dict) -> Optional[Product]:
    """
    Validate raw product data using the Product Pydantic model.

    Args:
        product_dict (dict): Dictionary containing raw product data.

    Returns:
        Optional[Product]:
            - A validated Product instance if data is valid.
            - None if validation fails.
    """

    function_name = "validate_product_data"

    try:
        # Attempt to create a validated Product instance
        return Product(**product_dict)

    except ValidationError as e:
        # Extract product identifier for clearer diagnostics
        product_id = product_dict.get("id", "unknown")

        # Build detailed field-level error messages
        field_errors = []
        for error in e.errors():
            # Extract nested field path (if applicable)
            field = ".".join(str(loc) for loc in error["loc"])
            message = error["msg"]
            field_errors.append(f"{field}: {message}")

        # Combine all field errors into a single readable message
        combined_message = "; ".join(field_errors)

        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: Product ID '{product_id}'\n"
            f"  Message: {combined_message}\n"
            f"  Suggestion: Fix the invalid fields listed above"
        )

        print(error_msg)

        # Return None to signal validation failure without stopping execution
        return None



# create_product_prompt()

## Purpose

Generates a structured and persuasive prompt for the OpenAI API using validated product data.

This function transforms structured product information into a natural-language instruction suitable for AI-based description generation.

---

## What It Does

- Extracts product attributes (name, category, price, features).
- Converts the features list into readable text.
- Builds a formatted prompt string.
- Returns the prompt for use in the API request.

---

## Inputs

- `product` (Product): A validated `Product` instance.

The function assumes that validation has already been performed.

---

## Outputs

If successful:
- Returns a formatted string containing the AI prompt.

If an error occurs:
- Prints a structured error message.
- Raises a `ValueError`.

---

## Error Handling

Although the input should already be validated, the function includes defensive error handling to catch unexpected issues such as:

- Missing attributes
- Incorrect data types
- Formatting errors

If such an error occurs:

- A detailed message is printed including the product ID.
- A `ValueError` is raised to propagate the failure upward.

---

## Design Decision

This function focuses only on prompt construction.

It does not:
- Validate product data (handled earlier)
- Call the API
- Process responses

This separation of concerns improves modularity and testability.


In [50]:
def create_product_prompt(product: Product) -> str:
    """
    Generate a structured prompt for OpenAI based on a validated Product.

    Args:
        product (Product): A validated Product instance.

    Returns:
        str: A formatted prompt ready to be sent to the OpenAI API.

    Raises:
        ValueError: If prompt generation fails due to invalid product data.
    """

    function_name = "create_product_prompt"

    try:
        # Convert features list into readable text
        features_text = (
            ", ".join(product.features)
            if product.features
            else "No specific features listed"
        )

        # Build structured prompt for AI generation
        prompt = (
            "Create a compelling product description for the following product:\n\n"
            f"Name: {product.name}\n"
            f"Category: {product.category}\n"
            f"Price: ${product.price:.2f}\n"
            f"Features: {features_text}\n\n"
            "The description should be persuasive, clear, and suitable for an online store."
        )

        return prompt

    except Exception as e:
        # Catch unexpected attribute or formatting errors
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: Product '{getattr(product, 'id', 'unknown')}'\n"
            f"  Message: Failed to generate prompt due to invalid or incomplete product data\n"
            f"  Suggestion: Ensure the product object contains valid attributes "
            f"(name, category, price, features)"
        )

        print(error_msg)

        # Re-raise as ValueError to maintain consistent error handling
        raise ValueError(error_msg) from e


# parse_api_response()

## Purpose

Extracts and validates the generated text content from an OpenAI API response.

This function ensures that the response structure is correct and that the returned content is usable.

---

## What It Does

- Attempts to access `choices[0].message.content` from the API response.
- Verifies that the content is not empty.
- Returns a cleaned version of the text.
- Detects and reports malformed or unexpected response structures.

---

## Inputs

- `response`: The raw response object returned by the OpenAI API.

---

## Outputs

If valid:
- Returns a stripped string containing the generated description.

If invalid:
- Prints a structured error message.
- Raises a `ValueError`.

---

## Error Handling

### Empty Content

If the API returns an empty string:

- Prints a structured diagnostic message.
- Raises a `ValueError`.
- Suggests verifying model output.

---

### Malformed Response Structure

If the response does not contain the expected structure:

- Catches `AttributeError`, `IndexError`, or `KeyError`.
- Prints a detailed structural error message.
- Raises a standardized `ValueError`.

---

## Design Decision

This function acts as a protective boundary between:

- External API responses
- Internal system logic

By validating structure and content explicitly, it prevents:

- Silent failures
- Corrupted downstream data
- Unexpected runtime crashes


In [51]:
def parse_api_response(response) -> str:
    """
    Extract and validate content from an OpenAI API response.

    Args:
        response: Raw response object returned by the OpenAI API.

    Returns:
        str: Cleaned text content generated by the model.

    Raises:
        ValueError: If the response structure is invalid or content is empty.
    """

    function_name = "parse_api_response"

    try:
        # Attempt to extract generated content
        content = response.choices[0].message.content

        # Validate non-empty content
        if not content:
            error_msg = (
                f"ERROR in {function_name}(): ValueError\n"
                f"  Location: API response content\n"
                f"  Message: API response content is empty\n"
                f"  Suggestion: Verify that the model returned a valid completion"
            )
            print(error_msg)
            raise ValueError(error_msg)

        # Return cleaned content
        return content.strip()

    except (AttributeError, IndexError, KeyError) as e:
        # Handle unexpected or malformed response structure
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: API response structure (choices[0].message.content)\n"
            f"  Message: Unexpected or malformed response structure\n"
            f"  Suggestion: Ensure the API response contains "
            f"'choices[0].message.content'"
        )

        print(error_msg)

        # Raise standardized ValueError for consistent upstream handling
        raise ValueError(error_msg) from e



# format_output()

## Purpose

Formats the final processed product data into a structured dictionary suitable for saving or exporting.

This function standardizes the output structure of successfully processed products.

---

## What It Does

- Takes a validated `Product` instance.
- Takes a generated description string.
- Cleans the description by removing leading and trailing whitespace.
- Returns a structured dictionary with selected fields.

---

## Inputs

- `product` (Product): A validated product object.
- `description` (str): The AI-generated product description.

---

## Outputs

Returns a dictionary containing:

- `product_id`
- `name`
- `description`

The description is always returned in trimmed form.

---

## Error Handling

This function assumes:

- The product has already been validated.
- The description has already been parsed and validated.

No explicit error handling is included because:

- Validation occurs earlier in the pipeline.
- This function focuses solely on formatting.

---

## Design Decision

This function enforces a clear separation between:

- Data validation
- API interaction
- Output formatting

By isolating formatting logic, the system becomes easier to maintain and test.


In [52]:
def format_output(product: Product, description: str) -> dict:
    """
    Format the final structured output for a processed product.

    Args:
        product (Product): A validated Product instance.
        description (str): Generated product description text.

    Returns:
        dict: Structured output containing product ID, name, and cleaned description.
    """

    # Ensure description is clean and trimmed
    cleaned_description = description.strip()

    return {
        "product_id": product.id,
        "name": product.name,
        "description": cleaned_description
    }



# Modular Functions

## Function 1 - load_and_validate_products

# load_and_validate_products()

## Purpose

Loads product data from a JSON file and validates each product entry.

This function bridges raw JSON input and structured `Product` objects used by the system.

---

## What It Does

- Calls `load_json_file()` to retrieve raw JSON data.
- Verifies that the JSON contains a top-level `"products"` key.
- Ensures `"products"` is a list.
- Iterates through each product entry.
- Validates each product using `validate_product_data()`.
- Skips invalid products while continuing processing.
- Returns a list of valid `Product` instances.

---

## Inputs

- `json_path` (str): Path to the JSON file containing product data.

---

## Outputs

If successful:
- Returns a list of validated `Product` objects.
- Invalid products are excluded.

If structural errors occur:
- Prints a structured error message.
- Raises an exception.

---

## Error Handling

### Missing "products" Key

If the JSON file does not contain a `"products"` key:

- Prints a structured error message.
- Raises a `ValueError`.

---

### Incorrect "products" Type

If `"products"` is not a list:

- Prints a structured error message.
- Raises a `TypeError`.

---

### Product-Level Validation Errors

If an individual product fails validation:

- The error is printed by `validate_product_data()`.
- The product is skipped.
- Processing continues with remaining products.

---

### Unexpected Structural Errors

Any unexpected issue:

- Is reported with context.
- Is re-raised to allow upstream handling.

---

## Design Decision

This function is fault-tolerant at the product level:

- Invalid products do not stop the entire system.
- Structural errors (missing keys, wrong types) do stop execution.
- This balances resilience with strict schema enforcement.


In [53]:
def load_and_validate_products(json_path: str) -> List[Product]:
    """
    Load product data from a JSON file and validate each product.

    Args:
        json_path (str): Path to the JSON file containing product data.

    Returns:
        List[Product]: A list of validated Product instances.

    Raises:
        ValueError: If required JSON structure is missing.
        TypeError: If 'products' is not a list.
        Exception: For unexpected structural errors.
    """

    function_name = "load_and_validate_products"

    try:
        # Load raw JSON data (includes its own structured error handling)
        data = load_json_file(json_path)

        # Validate presence of top-level "products" key
        if "products" not in data:
            error_msg = (
                f"ERROR in {function_name}(): ValueError\n"
                f"  Location: File '{json_path}'\n"
                f"  Message: 'products' key not found in JSON data\n"
                f"  Suggestion: Ensure the JSON file contains a top-level 'products' list"
            )
            print(error_msg)
            raise ValueError(error_msg)

        # Ensure "products" is a list
        if not isinstance(data["products"], list):
            error_msg = (
                f"ERROR in {function_name}(): TypeError\n"
                f"  Location: File '{json_path}'\n"
                f"  Message: 'products' must be a list\n"
                f"  Suggestion: Ensure 'products' is defined as a list of product objects"
            )
            print(error_msg)
            raise TypeError(error_msg)

        validated_products: List[Product] = []

        # Validate each product individually
        for index, product_dict in enumerate(data["products"], start=1):
            product = validate_product_data(product_dict)

            if product:
                validated_products.append(product)
            else:
                # Product failed validation but system continues
                error_msg = (
                    f"ERROR in {function_name}(): ValidationError\n"
                    f"  Location: File '{json_path}', product index {index}\n"
                    f"  Message: Product failed validation and was skipped\n"
                    f"  Suggestion: Review validation errors above for details"
                )
                print(error_msg)
                continue

        return validated_products

    except Exception as e:
        # Catch unexpected structural errors and propagate
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: File '{json_path}'\n"
            f"  Message: {str(e)}\n"
            f"  Suggestion: Verify JSON structure and product schema"
        )
        print(error_msg)
        raise



## Function 2 - generate_description

# generate_description()

## Purpose

Generates a product description by interacting with the OpenAI API.

This function connects validated internal product data to the external AI service and ensures structured error handling.

---

## What It Does

- Builds a prompt using `create_product_prompt()`.
- Sends a request to the OpenAI API.
- Parses the API response using `parse_api_response()`.
- Returns the generated description.

---

## Inputs

- `product` (Product): A validated product object.
- `api_client`: A configured OpenAI API client.

---

## Outputs

If successful:
- Returns a generated product description string.

If an error occurs:
- Prints a structured error message.
- Re-raises the exception.

---

## Error Handling

### APIError

Triggered when:
- Authentication fails.
- Rate limits are exceeded.
- Network errors occur.
- The API service returns an error.

The function:

- Prints detailed context including product name and ID.
- Suggests corrective actions.
- Re-raises the exception.

---

### Generic Exception

Triggered by:
- Internal logic errors.
- Unexpected API response structures.
- Downstream parsing failures.

The function:

- Prints contextual diagnostic information.
- Re-raises the error.

---

## Design Decision

This function acts as a controlled boundary between:

- Internal system logic
- External AI service

It ensures:
- Clear separation of concerns
- Contextualized error reporting
- Proper error propagation for integration testing


In [54]:
def generate_description(product: Product, api_client) -> str:
    """
    Generate a product description using the OpenAI API.

    Args:
        product (Product): A validated Product instance.
        api_client: Configured OpenAI API client.

    Returns:
        str: Generated product description text.

    Raises:
        APIError: If the API request fails.
        Exception: For unexpected internal or parsing errors.
    """

    function_name = "generate_description"

    try:
        # Step 1: Create structured prompt from product data
        prompt = create_product_prompt(product)

        # Step 2: Send request to OpenAI API
        response = api_client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )

        # Step 3: Extract and validate response content
        return parse_api_response(response)

    except APIError as e:
        # Handle API-level failures (authentication, rate limits, network issues)
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: Product '{product.name}' (ID: {product.id})\n"
            f"  Message: {str(e)}\n"
            f"  Suggestion: Check API key, rate limits, network connection, or try again later"
        )
        print(error_msg)
        raise

    except Exception as e:
        # Handle unexpected processing or response parsing errors
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: Product '{product.name}' (ID: {product.id})\n"
            f"  Message: {str(e)}\n"
            f"  Suggestion: Check API response structure or internal processing logic"
        )
        print(error_msg)
        raise



## Function 3 - process_products

# process_products()

## Purpose

Processes a list of validated products by generating AI descriptions and formatting the results.

This function coordinates the main processing loop of the system.

---

## What It Does

For each product:

1. Calls `generate_description()` to request an AI-generated description.
2. Formats the result using `format_output()`.
3. Appends the formatted output to the results list.
4. If an error occurs, logs the error and continues processing the remaining products.

---

## Inputs

- `products` (List[Product]): A list of validated product objects.
- `api_client`: A configured OpenAI API client.

---

## Outputs

Returns a list of dictionaries containing:

- Product ID
- Product name
- Generated description

Only successfully processed products are included.

---

## Error Handling

If a product fails during processing:

- The error is printed with contextual information.
- The failure does not stop the entire process.
- The system continues with remaining products.

This ensures fault tolerance at the product level.

---

## Design Decision

This function is intentionally resilient:

- One product failure does not break the whole batch.
- Partial success is allowed.
- Errors are visible and traceable.

This design improves robustness in real-world scenarios where:
- API calls may fail intermittently.
- Some products may contain unexpected issues.


In [55]:
def process_products(products: List[Product], api_client) -> List[dict]:
    """Process all products and generate descriptions."""
    function_name = "process_products"

    results: List[dict] = []

    for product in products:
        try:
            description = generate_description(product, api_client)
            formatted_output = format_output(product, description)
            results.append(formatted_output)

        except Exception as e:
            error_msg = (
                f"ERROR in {function_name}(): {type(e).__name__}\n"
                f"  Location: Product '{product.name}' (ID: {product.id})\n"
                f"  Message: {str(e)}\n"
                f"  Suggestion: Check product data or API response handling"
            )
            print(error_msg)
            continue

    return results



## Function 4 - save_results

# save_results()

## Purpose

Saves processed product descriptions to a JSON file.

This function handles the final persistence step of the system.

---

## What It Does

- Opens the specified output file in write mode.
- Serializes the results list into JSON format.
- Writes formatted JSON with indentation for readability.
- Supports UTF-8 encoding to preserve special characters.

---

## Inputs

- `results` (List[dict]): A list of formatted product dictionaries.
- `output_path` (str): The file path where results will be saved.

---

## Outputs

- Creates or overwrites a JSON file at the specified path.
- Returns nothing.

---

## Error Handling

### OSError

Triggered when:
- The file path is invalid.
- The program lacks write permissions.
- There is insufficient disk space.

The function:

- Prints a structured error message.
- Includes the output path and system error.
- Suggests possible corrective actions.
- Re-raises the error for upstream handling.

---

## Design Decision

This function isolates file system interaction from business logic.

Benefits:
- Clear separation of concerns.
- Easier debugging of file-related issues.
- Clean propagation of persistence errors to the orchestration layer.


In [56]:
def save_results(results: List[dict], output_path: str) -> None:
    """
    Save processed product results to a JSON file.

    Args:
        results (List[dict]): List of formatted product output dictionaries.
        output_path (str): Path to the output JSON file.

    Raises:
        OSError: If the file cannot be written.
    """

    function_name = "save_results"

    try:
        # Open output file in write mode (overwrites existing file)
        with open(output_path, "w", encoding="utf-8") as f:
            # Write JSON with indentation for readability
            json.dump(results, f, indent=2, ensure_ascii=False)

    except OSError as e:
        # Handle file system errors (permissions, disk space, invalid path)
        error_msg = (
            f"ERROR in {function_name}(): {type(e).__name__}\n"
            f"  Location: Output file '{output_path}'\n"
            f"  Message: {str(e)}\n"
            f"  Suggestion: Check file permissions, disk space, or file path"
        )

        print(error_msg)

        # Re-raise standardized OSError
        raise OSError(error_msg) from e




# main function

# main()

## Purpose

Acts as the primary orchestration layer of the application.

This function coordinates the entire product description generation pipeline from input loading to output persistence.

---

## What It Does

1. Configures the OpenAI API client using an environment variable.
2. Loads and validates product data from a JSON file.
3. Processes validated products to generate AI descriptions.
4. Saves the processed results to an output file.
5. Reports successful completion.

---

## Inputs

- `input_file` (str): Path to the JSON file containing product data.
- `output_file` (str): Path where generated descriptions will be saved.

Both parameters have default values.

---

## Outputs

- Writes processed results to the specified output file.
- Prints a summary message indicating the number of successfully processed products.
- Returns nothing.

---

## Error Handling

### EnvironmentError

Raised if the required API key is not configured in the environment.

---

### Propagated Errors

This function intentionally does not suppress exceptions from:

- File loading
- JSON parsing
- Product validation
- API calls
- File writing

This ensures:

- Clear error visibility
- Proper integration test detection
- No silent failures

---

## Design Decision

The `main()` function focuses exclusively on orchestration.

It does not:

- Contain business logic
- Perform validation directly
- Handle low-level errors

All detailed logic and error handling are delegated to modular functions.

This design ensures:

- High modularity
- Clear separation of concerns
- Strong testability
- Maintainable architecture


In [57]:
def main(input_file: str = "products.json",
         output_file: str = "results.json") -> None:
    """
    Main orchestration function for the product description pipeline.

    Args:
        input_file (str): Path to the input JSON file containing product data.
        output_file (str): Path to the output JSON file for processed results.

    Raises:
        EnvironmentError: If the required API key is not configured.
        Exception: Propagates errors from underlying modules.
    """

    # Step 1: Configure API client
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise EnvironmentError(
            "OPENAI_API_KEY environment variable is not set."
        )

    api_client = OpenAI(api_key=api_key)

    # Step 2: Load and validate product data
    products = load_and_validate_products(input_file)

    # Stop execution if no valid products were found
    if not products:
        print("No valid products to process.")
        return

    # Step 3: Generate descriptions for each product
    results = process_products(products, api_client)

    # Step 4: Save processed results to file
    save_results(results, output_file)

    # Step 5: Report successful completion
    print(f"Successfully processed {len(results)} product(s).")



# INTERGARTION TESTING

# Integration Test Logger

## Purpose

Provides a dedicated logging mechanism for integration testing.

This logger ensures that:

- All test runs are recorded.
- Previous logs are preserved.
- Test results are auditable.
- Console output is mirrored into a persistent log file.

---

## What It Does

- Creates a named logger (`integration_tests`).
- Configures it to log messages at the INFO level.
- Writes logs to `integration_test_results.log`.
- Uses append mode to prevent overwriting previous test runs.
- Prevents duplicate handlers when the notebook cell is re-executed.
- Provides a helper function to both print and log messages.

---

## Outputs

- Console output for immediate visibility.
- Persistent log entries including:
  - Timestamp
  - Log level
  - Message content

---

## Error Handling

This logger setup does not raise exceptions.

It safely initializes once and avoids duplicate logging handlers during notebook re-runs.

---

## Design Decision

A dedicated logger is used instead of the root logger to:

- Avoid interfering with application-level logging.
- Keep integration test evidence separate.
- Maintain clear separation between system behavior and testing infrastructure.

Append mode ensures the log file serves as:

- Proof of executed test cases.
- A historical audit trail.


In [58]:
# Dedicated logger for integration testing
test_logger = logging.getLogger("integration_tests")
test_logger.setLevel(logging.INFO)

# Prevent duplicate handlers when re-running the notebook cell
if not test_logger.handlers:

    # Create file handler in append mode (preserves previous test runs)
    file_handler = logging.FileHandler(
        "integration_test_results.log",
        mode="a",            # Append mode (never overwrite existing logs)
        encoding="utf-8"
    )

    # Define log message format
    formatter = logging.Formatter(
        "%(asctime)s - %(levelname)s - %(message)s"
    )

    file_handler.setFormatter(formatter)
    test_logger.addHandler(file_handler)


def log_and_print(message: str, level: str = "info") -> None:
    """
    Print a message to the console and log it to the integration test log file.

    Args:
        message (str): The message to display and log.
        level (str): Log level ("info" or "error").

    Returns:
        None
    """

    # Always print to console
    print(message)

    # Log message with appropriate severity level
    if level == "info":
        test_logger.info(message)
    elif level == "error":
        test_logger.error(message)



# run_integration_test()

## Purpose

Executes a full integration test of the product description pipeline.

This function captures all console output during execution and logs it to a persistent log file for audit and verification purposes.

---

## What It Does

1. Prints and logs a structured test header.
2. Redirects standard output to a temporary buffer.
3. Executes `main()` with the provided input file.
4. Captures all printed output from:
   - Helper functions
   - Modular functions
   - Error handling messages
5. Prints the captured output to the notebook.
6. Logs the full output to the integration test log file.
7. Logs the final test result.

---

## Inputs

- `test_name` (str): A descriptive label for the test case.
- `input_file` (str): Path to the input JSON file used in the test.

---

## Outputs

- Console display of full execution output.
- Persistent log entries including:
  - Test header
  - All printed system messages
  - Final test result

Returns nothing.

---

## Error Handling

If an exception occurs during execution:

- The output generated before the exception is still captured.
- The captured output is logged.
- The exception type is recorded as part of the test result.
- The system does not crash the notebook.

---

## Design Decision

This function provides controlled integration testing by:

- Capturing complete runtime output.
- Preserving structured error messages.
- Maintaining separation between application logic and testing logic.
- Providing verifiable test evidence.

Using `redirect_stdout` ensures that:

- All `print()` statements are captured.
- No modification of helper functions is required.
- Logging remains consistent and complete.


In [59]:
def run_integration_test(test_name: str, input_file: str) -> None:
    """
    Execute an integration test for the full pipeline.

    Captures all console output generated during execution
    and logs it to the integration test log file.

    Args:
        test_name (str): Descriptive name of the test case.
        input_file (str): Path to the input JSON file.

    Returns:
        None
    """

    # Print and log test header
    log_and_print("==========================================")
    log_and_print(f"INTEGRATION TEST — {test_name}")
    log_and_print("==========================================")

    # Buffer to capture printed output
    buffer = io.StringIO()

    try:
        # Capture all printed output from main() and helper functions
        with redirect_stdout(buffer):
            main(input_file)

        # Retrieve captured output
        full_output = buffer.getvalue()

        # Display captured output in notebook
        print(full_output)

        # Persist full captured output to log file
        test_logger.info(full_output.strip())

        log_and_print("TEST RESULT: SUCCESS")

    except Exception as e:
        # Capture output generated before the exception occurred
        full_output = buffer.getvalue()

        # Display captured output
        print(full_output)

        # Log full captured output
        test_logger.info(full_output.strip())

        # Log structured test result
        log_and_print(
            f"TEST RESULT: SUCCESS — Caught {type(e).__name__}"
        )



## TEST CASE 1: Valid Json

### Purpose: Verify the full pipeline works:
- Loads JSON
- Validates
- Calls API
- Saves results
- No errors raised

In [60]:
run_integration_test("VALID JSON", "products.json")




INTEGRATION TEST — VALID JSON
Successfully processed 3 product(s).

TEST RESULT: SUCCESS


# TEST CASE 2 — FILE NOT FOUND

## Purpose - Verify system handles missing file properly:

- Raises FileNotFoundError
- Helper prints detailed message
- Exception propagates

In [61]:
run_integration_test("FILE NOT FOUND", "missing_file.json")

INTEGRATION TEST — FILE NOT FOUND
ERROR in load_json_file(): FileNotFoundError
  Location: File 'missing_file.json' not found
  Message: [Errno 2] No such file or directory: 'missing_file.json'
  Suggestion: Verify the file path and ensure the file exists (Current directory: c:\Users\marco\ironhack-labs\week2\lab202v2)
ERROR in load_and_validate_products(): FileNotFoundError
  Location: File 'missing_file.json'
  Message: [Errno 2] No such file or directory: 'missing_file.json'
  Suggestion: Verify JSON structure and product schema

TEST RESULT: SUCCESS — Caught FileNotFoundError


# TEST CASE 3 — INVALID JSON

## Purpose: Verify malformed JSON is handled:

- Raises JSONDecodeError
- Shows line and column
- Propagates correctly

In [62]:
run_integration_test("INVALID JSON", "malformed.json")

INTEGRATION TEST — INVALID JSON
ERROR in load_json_file(): JSONDecodeError
  Location: File 'malformed.json', line 11, column 7
  Message: Expecting ',' delimiter
  Suggestion: Fix JSON syntax near line 11
ERROR in load_and_validate_products(): JSONDecodeError
  Location: File 'malformed.json'
  Message: Expecting ',' delimiter: line 11 column 7 (char 189)
  Suggestion: Verify JSON structure and product schema

TEST RESULT: SUCCESS — Caught JSONDecodeError


# TEST CASE 4 — INVALID PRODUCTS

## Purpose: Verify Pydantic validation errors:

- Raises ValidationError
- Shows invalid fields
- Propagates properly

In [63]:
run_integration_test("INVALID PRODUCT DATA", "invalid_products.json")


INTEGRATION TEST — INVALID PRODUCT DATA
ERROR in validate_product_data(): ValidationError
  Location: Product ID 'P100'
  Message: price: Value error, Price must be positive
  Suggestion: Fix the invalid fields listed above
ERROR in load_and_validate_products(): ValidationError
  Location: File 'invalid_products.json', product index 1
  Message: Product failed validation and was skipped
  Suggestion: Review validation errors above for details
ERROR in validate_product_data(): ValidationError
  Location: Product ID 'P101'
  Message: name: Field required
  Suggestion: Fix the invalid fields listed above
ERROR in load_and_validate_products(): ValidationError
  Location: File 'invalid_products.json', product index 2
  Message: Product failed validation and was skipped
  Suggestion: Review validation errors above for details
ERROR in validate_product_data(): ValidationError
  Location: Product ID 'unknown'
  Message: id: Field required
  Suggestion: Fix the invalid fields listed above
ERROR

# TEST CASE 5 — API ERROR

## Purpose: Verify API error handling during description generation:

- Raises API-related exception (e.g., APIError or AuthenticationError)
- Shows product name
- Shows error type
- Shows suggestion
- Propagates properly


In [64]:
log_and_print("==========================================")
log_and_print("INTEGRATION TEST — API ERROR")
log_and_print("==========================================")

# Save real key
real_api_key = os.getenv("OPENAI_API_KEY")

try:
    # Set invalid key
    os.environ["OPENAI_API_KEY"] = "invalid_key_for_testing"

    run_integration_test("API ERROR", "products.json")

finally:
    # Restore real key
    if real_api_key:
        os.environ["OPENAI_API_KEY"] = real_api_key


INTEGRATION TEST — API ERROR
INTEGRATION TEST — API ERROR


ERROR in generate_description(): AuthenticationError
  Location: Product 'Wireless Bluetooth Headphones' (ID: P001)
  Message: Error code: 401 - {'error': {'message': 'Incorrect API key provided: invalid_***********ting. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
  Suggestion: Check API key, rate limits, network connection, or try again later
ERROR in process_products(): AuthenticationError
  Location: Product 'Wireless Bluetooth Headphones' (ID: P001)
  Message: Error code: 401 - {'error': {'message': 'Incorrect API key provided: invalid_***********ting. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
  Suggestion: Check product data or API response handling
ERROR in generate_description(): AuthenticationError
  Location: Product 'Smart Watch' (ID: P002)
  Message: Error co