# Image Variant Generation with File-Based Prompts

This notebook:
1. Queries a database for deal IDs
2. Loads prompts from text files based on category/subcategory
3. Generates variant images using OpenAI
4. Stores images in S3
5. Implements async processing for efficiency


In [15]:
import pandas as pd
import os
import psycopg2
from dotenv import load_dotenv
import boto3
import time
import base64
import asyncio
import aiohttp
import tempfile
import sys
from botocore.exceptions import NoCredentialsError
from concurrent.futures import ThreadPoolExecutor
import subprocess
import json
from io import BytesIO
import requests
from openai import OpenAI
import random
import urllib.request
import logging
from tqdm.notebook import tqdm
import glob

# Load environment variables
load_dotenv()

# Configure AWS credentials
s3_client = boto3.client(
    's3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY')
)

# Set up logging to control verbosity
logging.basicConfig(level=logging.WARNING)

# Initialize OpenAI client
if 'client' not in locals():
    client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
    print(f"OpenAI client initialized with API key: {os.getenv('OPEN_AI_API_KEY')[:5]}...")


## Prompt Management System

Load prompts from text files based on category, subcategory, or vertical


In [16]:
subjects = [
  {
    "subject_type": "solo_female",
    "hair": "long, flowing, brunette"
  },
  {
    "subject_type": "solo_female",
    "hair": "short, pixie cut, blonde"
  },
  {
    "subject_type": "solo_female",
    "hair": "braided, shoulder length, black"
  },
  {
    "subject_type": "solo_female",
    "hair": "tied back, ponytail, red"
  },
  {
    "subject_type": "solo_female",
    "hair": "wavy, loose, auburn"
  },
  {
    "subject_type": "solo_female",
    "hair": "straight, long, dark brown"
  },
  {
    "subject_type": "solo_female",
    "hair": "bob cut, chestnut"
  },
  {
    "subject_type": "solo_female",
    "hair": "curly, tied up, light brown"
  },
  {
    "subject_type": "solo_female",
    "hair": "layered, shoulder length, blonde"
  },
  {
    "subject_type": "solo_female",
    "hair": "straight, medium length, black"
  },
  {
    "subject_type": "solo_female",
    "hair": "bob cut, red"
  },
  {
    "subject_type": "solo_female",
    "hair": "curly, long, dark brown"
  },
  {
    "subject_type": "solo_female",
    "hair": "wavy, shoulder length, light brown"
  },
  {
    "subject_type": "solo_male",
    "hair": "curly, medium length, dark blonde"
  },
  {
    "subject_type": "solo_male",
    "hair": "buzz cut, black"
  },
  {
    "subject_type": "solo_male",
    "hair": "wavy, shoulder length, brown"
  },
  {
    "subject_type": "solo_male",
    "hair": "short, spiky, blonde"
  },
  {
    "subject_type": "solo_male",
    "hair": "curly, tied back, auburn"
  },
  {
    "subject_type": "solo_male",
    "hair": "buzz cut, dark brown"
  },
  {
    "subject_type": "solo_male",
    "hair": "long, flowing, black"
  },
  {
    "subject_type": "solo_male",
    "hair": "medium length, tousled, light brown"
  },
  {
    "subject_type": "solo_male",
    "hair": "shaved, dark"
  },
  {
    "subject_type": "solo_male",
    "hair": "curly, tied up, chestnut"
  },
  {
    "subject_type": "solo_male",
    "hair": "short, neat, grey"
  },
  {
    "subject_type": "solo_male",
    "hair": "medium length, slicked back, black"
  }
]

class PromptManager:
    def __init__(self, prompts_folder="prompts"):
        self.prompts_folder = prompts_folder
        self.prompts_cache = {}
        self.subjects = subjects
        self.load_all_prompts()
    
    def load_all_prompts(self):
        """Load all prompt files into memory for faster access"""
        if not os.path.exists(self.prompts_folder):
            os.makedirs(self.prompts_folder)
            return
        
        # Find all .txt files in the prompts folder
        prompt_files = glob.glob(os.path.join(self.prompts_folder, "*.txt"))
        
        for file_path in prompt_files:
            filename = os.path.basename(file_path)
            key = filename.replace('.txt', '').lower().replace(' ', '_').replace('-', '_')
            
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read().strip()
                    self.prompts_cache[key] = content
            except Exception as e:
                print(f"Error loading {file_path}: {str(e)}")
    
    def normalize_key(self, text):
        """Normalize text to match file naming convention"""
        if not text:
            return ""
        return text.lower().replace(' ', '_').replace('-', '_')
    
    def get_random_subject_description(self):
        """Get a random subject description as formatted text"""
        if not self.subjects:
            return ""
        
        subject = random.choice(self.subjects)
        
        # Format subject as readable text
        description_parts = []
        if subject.get("subject_type"):
            description_parts.append(f"Subject: {subject['subject_type']}")
        if subject.get("pose"):
            description_parts.append(f"Pose: {subject['pose']}")
        if subject.get("look"):
            description_parts.append(f"Look: {subject['look']}")
        if subject.get("attire"):
            description_parts.append(f"Attire: {subject['attire']}")
        if subject.get("hair"):
            description_parts.append(f"Hair: {subject['hair']}")
        
        return "\n".join(description_parts)
    
    def get_prompt(self, vertical=None, category_name=None, sub_category_name=None, 
                   email_subject="", formatted_highlights=""):
        """Get the appropriate prompt based on deal characteristics with full placeholder support"""
        
        # Priority order for finding prompts:
        # 1. sub_category_name
        # 2. category_name
        # 3. vertical
        # 4. default
        
        candidates = []
        
        if sub_category_name:
            candidates.append(self.normalize_key(sub_category_name))
        
        if category_name:
            candidates.append(self.normalize_key(category_name))
        
        if vertical:
            candidates.append(self.normalize_key(vertical))
        
        # Add default fallback
        candidates.append('default')
        
        # Try to find a matching prompt
        for candidate in candidates:
            if candidate in self.prompts_cache:
                prompt = self.prompts_cache[candidate]
                
                # Replace all placeholders (flexible - unused ones are simply not replaced)
                prompt = prompt.replace('{email_subject}', email_subject or '')
                prompt = prompt.replace('{formatted_highlights}', formatted_highlights or '')
                
                # Get random subject if {subject} is used in the prompt
                if '{subject}' in prompt:
                    subject_description = self.get_random_subject_description()
                    prompt = prompt.replace('{subject}', subject_description)
                
                return prompt, candidate
        
        # If no prompt found, return a basic one
        basic_prompt = f"Create a high-quality promotional image for: {email_subject}"
        return basic_prompt, "fallback"
    
    def list_available_prompts(self):
        """List all available prompt files"""
        return list(self.prompts_cache.keys())

# Initialize the prompt manager
prompt_manager = PromptManager()


## Query Database for Deal IDs

Execute SQL to get deal information including:
- deal_voucher_id
- original_image_id
- variant_image_id
- category info
- revenue/visitor data


In [19]:
def get_deals_for_processing_revenue():
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST"),
        port=os.environ.get("REDSHIFT_PORT"),
        dbname=os.environ.get("REDSHIFT_DBNAME"),
        user=os.environ.get("REDSHIFT_USER"),
        password=os.environ.get("REDSHIFT_PASSWORD")
    )
    
    # Query to get deals by revenue
    query = """
WITH deal_revenue AS (
    SELECT
        t.deal_id,
        SUM(t.net) AS total_revenue
    FROM real.transactions t
    JOIN real.deal_voucher dv ON t.deal_id = dv.id
    WHERE t.order_date > TRUNC(SYSDATE - 8)
      AND dv.currency = 'GBP'
      AND t.brand_id = 1
      AND t.domain = 'WOWCHER'
    GROUP BY t.deal_id
)
SELECT
    CAST(dv.id AS INTEGER) AS id,
    CASE 
        WHEN dvc.canonical_path_type = 'NATIONAL' THEN dv.email_subject 
        WHEN dvc.canonical_path_type = 'LOCAL' THEN dv.email_subject 
        WHEN dvc.canonical_path_type = 'TRAVEL' THEN dv.email_subject 
    END AS email_subject,
    dvc.name AS category_name,
    dvc.canonical_path_type as vertical,
    dvsc.name AS sub_category_name,
    CAST(COALESCE(dr.total_revenue, 0) AS DECIMAL(10,2)) AS revenue_last_7_days,
    CAST(rank() OVER (ORDER BY COALESCE(dr.total_revenue, 0) DESC) AS INTEGER) AS revenue_rank,
    dvi.id AS image_id_pos_0,  -- Remove CAST to ensure it comes through as an integer
    'https://static.wowcher.co.uk/images/deal/' || dvi.deal_voucher_id || '/' || dvi.id || '.' || dvi.extension AS image_url_pos_0,
    dvi.extension
FROM real.deal_voucher dv
LEFT JOIN real.product p ON p.id = dv.id
LEFT JOIN deal_revenue dr ON dr.deal_id = dv.id
LEFT JOIN real.deal_voucher_site dvs ON dvs.deal_voucher_id = dv.id
LEFT JOIN real.deal_voucher_image dvi ON dvi.deal_voucher_id = dv.id AND dvi.position = 0
LEFT JOIN real.deal_voucher_category dvc ON dvc.id = dv.category_id
LEFT JOIN real.deal_voucher_sub_category dvsc ON dvsc.id = dv.sub_category_id
LEFT JOIN real.site s ON s.id = dv.deal_location_id AND s.site_name = 'National Deal'
WHERE trunc(dv.closing_date) >= trunc(sysdate) + 21
and p.status_id = 1
AND dv.currency = 'GBP'
AND dv.business_id not in (16456891,16512447)
AND NOT EXISTS (
    SELECT 1
      FROM temp.opt_image_variants oiv
      WHERE oiv.deal_voucher_id = dv.id
      AND (
      ((batch_name ILIKE '%manual%' AND status IN (1,3))
        or (batch_name = 'OPEN AI Images' AND status IN (1,3))
       OR (batch_name NOT IN ('Manual Opt', 'OPEN AI Images') AND status = 1)
        )
    )
)
AND dv.email_subject NOT ILIKE '%mystery%' 
and dv.email_subject not ilike '%lafufu%'
and dvc.canonical_path_type = 'NATIONAL'
GROUP BY dv.id, dv.email_subject, dvc.name, dvsc.name, dvc.canonical_path_type, dvi.id, dvi.deal_voucher_id, dvi.extension, dr.total_revenue, dv.deal_product
ORDER BY COALESCE(dr.total_revenue, 0) DESC
limit 250;

    """
    df = pd.read_sql(query, conn)
    conn.close()
    return df

# Get deals to process
deals_df = get_deals_for_processing_revenue()
display(deals_df.head())
print(f"{len(deals_df)} deals found")


  df = pd.read_sql(query, conn)


Unnamed: 0,id,email_subject,category_name,vertical,sub_category_name,revenue_last_7_days,revenue_rank,image_id_pos_0,image_url_pos_0,extension
0,39323077,AirPods Gen 3 with Charging Case,Electronics,NATIONAL,Headphones & Earphones,3760.35,1,1702697,https://static.wowcher.co.uk/images/deal/39323...,jpg
1,39790815,Emma Original Mattress,Home,NATIONAL,Beds & Mattresses,1907.83,2,1702692,https://static.wowcher.co.uk/images/deal/39790...,jpg
2,32708136,3PC Hard Shell Suitcase Set,Luggage & Travel,NATIONAL,Suitcases & Sets,1255.68,3,1701076,https://static.wowcher.co.uk/images/deal/32708...,jpg
3,41154460,Alivio Kids Trampoline with Enclosure,Children and Baby,NATIONAL,Toys,1003.88,4,1704286,https://static.wowcher.co.uk/images/deal/41154...,jpg
4,39618758,Outdoor Garden Pergola,Garden,NATIONAL,Garden Furniture,850.4,5,1597790,https://static.wowcher.co.uk/images/deal/39618...,jpg


250 deals found


## S3 Upload Functions


In [20]:
def upload_to_s3(file_content, bucket_name, s3_key):
    """
    Upload a file to S3
    
    Parameters:
    - file_content: Binary content of the file
    - bucket_name: S3 bucket name
    - s3_key: Path in S3 where file will be stored
    
    Returns:
    - URL of the uploaded file
    """
    try:
        # Determine content type based on file extension
        extension = os.path.splitext(s3_key)[1].lower()
        content_type = 'image/jpeg' if extension in ['.jpg', '.jpeg'] else \
                      'image/png' if extension == '.png' else \
                      'image/webp' if extension == '.webp' else \
                      'application/octet-stream'
                      
        s3_client.put_object(
            Body=file_content,
            Bucket=bucket_name,
            Key=s3_key,
            ContentType=content_type,
            CacheControl='no-cache, no-store, must-revalidate',
            Expires=0
        )
        return f"https://static.wowcher.co.uk/{s3_key}"
    except NoCredentialsError:
        print("Credentials not available")
        return None

## Image Generation Functions


In [21]:
def get_deal_data_for_image(deal_id, vertical, category_name, sub_category_name):
    """Get deal data needed for image generation"""
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST"),
        port=os.environ.get("REDSHIFT_PORT"),
        dbname=os.environ.get("REDSHIFT_DBNAME"),
        user=os.environ.get("REDSHIFT_USER"),
        password=os.environ.get("REDSHIFT_PASSWORD")
    )

    # Get email subject
    email_subject_query = """
    SELECT 
        CASE 
            WHEN POSITION('PRICE DROP!' IN email_subject) > 0 
            THEN REPLACE(REPLACE(email_subject, 'email_subject DROP!', ''), '24HR', '') 
            ELSE REPLACE(email_subject, '24HR', '') 
        END as email_subject 
    FROM wowdwhprod.real.deal_voucher
    WHERE id = %s
    """
    with conn.cursor() as cur:
        cur.execute(email_subject_query, (deal_id,))
        email_subject_result = cur.fetchone()
        email_subject = email_subject_result[0] if email_subject_result else "Deal"
    
    # Set image limit based on vertical
    image_limit = 10 if vertical == "NATIONAL" else (5 if vertical == "LOCAL" else 3)
    
    # Get image URLs and extract extension information
    image_query = f"""
    SELECT 
        'https://static.wowcher.co.uk/images/deal/' || deal_voucher_id || '/' || id || '.' || extension AS image_url,
        extension
    FROM wowdwhprod.real.deal_voucher_image
    WHERE deal_voucher_id = %s
    ORDER BY position
    LIMIT {image_limit}
    """
    with conn.cursor() as cur:
        cur.execute(image_query, (deal_id,))
        image_results = cur.fetchall()
        image_urls = [row[0] for row in image_results]
        extensions = [row[1] for row in image_results]
        original_extension = extensions[0] if extensions else "png"

    # Get highlights
    highlights_query = """
    SELECT
    SPLIT_PART(highlight, ':', 1) AS highlight
    FROM wowdwhprod.real.deal_voucher_highlight
    WHERE deal_voucher_id = %s
    LIMIT 3;
    """
    with conn.cursor() as cur:
        cur.execute(highlights_query, (deal_id,))
        highlights_results = cur.fetchall()
        highlights = [row[0] for row in highlights_results]

    conn.close()

    # Format highlights for use in prompts
    formatted_highlights = ""
    if highlights:
        formatted_highlights = "\n".join([f"• {h}" for h in highlights])

    # Get the appropriate prompt using the new prompt manager with all placeholders
    prompt, prompt_source = prompt_manager.get_prompt(
        vertical=vertical,
        category_name=category_name,
        sub_category_name=sub_category_name,
        email_subject=email_subject,
        formatted_highlights=formatted_highlights
    )

    return {
        'prompt': prompt,
        'prompt_source': prompt_source,
        'image_urls': image_urls,
        'original_extension': original_extension,
        'highlights': highlights,
        'formatted_highlights': formatted_highlights
    }

def download_image_to_file(url, filename):
    """Download an image from URL and save to file"""
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)
        return filename
    else:
        raise Exception(f"Failed to download image from {url}")


In [22]:
def generate_image_integrated(deal_id, original_id, temp_dir, vertical, category_name, sub_category_name, verbose=False):
    """
    Generate image using OpenAI's API with file-based prompts
    """
    try:
        if verbose:
            print(f"Processing deal {deal_id}, vertical: {vertical}, category: {category_name}, subcategory: {sub_category_name}")
        
        # Get data for the deal
        try:
            deal_data = get_deal_data_for_image(deal_id, vertical, category_name, sub_category_name)
            if verbose:
                print(f"Deal data retrieved successfully for {deal_id} using prompt: {deal_data['prompt_source']}")
        except Exception as e:
            if verbose:
                print(f"Error getting deal data for {deal_id}: {str(e)}")
            raise e
            
        prompt = deal_data['prompt']
        prompt_source = deal_data['prompt_source']
        image_urls = deal_data['image_urls']
        original_extension = deal_data['original_extension']
        
        if verbose:
            print(f"Found {len(image_urls)} images for deal {deal_id}")
            print(f"Prompt length: {len(prompt)} characters")
        
        # Create output filename
        output_filename = os.path.join(temp_dir, f"variant_{deal_id}_{original_id}.{original_extension}")
        
        if not image_urls:
            raise Exception("No images found for this deal")
        
        # Download images silently
        image_files = []
        temp_filenames = []
        for idx, url in enumerate(image_urls[:16]):
            try:
                temp_filename = os.path.join(temp_dir, f"temp_image_{deal_id}_{idx}.png")
                download_image_to_file(url, temp_filename)
                temp_filenames.append(temp_filename)
                image_files.append(open(temp_filename, "rb"))
                if verbose:
                    print(f"Downloaded image {idx+1}/{len(image_urls)} for deal {deal_id}")
            except Exception as e:
                if verbose:
                    print(f"Error downloading image {idx} for deal {deal_id}: {str(e)}")
                raise e
        
        # Call OpenAI API
        try:
            if verbose:
                print(f"Calling OpenAI API for deal {deal_id}...")
            result = client.images.edit(
                model="gpt-image-1",
                image=image_files,
                prompt=prompt,
                size="1536x1024",
                quality="high",
                background="auto",
                n=1
            )
            if verbose:
                print(f"OpenAI API call successful for deal {deal_id}")
        except Exception as e:
            if verbose:
                print(f"OpenAI API error for deal {deal_id}: {str(e)}")
            # Close file handles before raising
            for f in image_files:
                f.close()
            raise e
        
        # Process and save the response
        image_base64 = result.data[0].b64_json
        image_bytes = base64.b64decode(image_base64)
        with open(output_filename, "wb") as f:
            f.write(image_bytes)
            
        if verbose:
            print(f"Image saved successfully for deal {deal_id}")
            
        # Close file handles
        for f in image_files:
            f.close()
            
        # Delete temporary files
        for filename in temp_filenames:
            if os.path.exists(filename):
                try:
                    os.remove(filename)
                except:
                    pass
        
        # Process token usage details silently
        token_info = {}
        if hasattr(result, 'usage'):
            total_tokens = result.usage.total_tokens
            input_tokens = result.usage.input_tokens
            output_tokens = result.usage.output_tokens
            input_text_tokens = result.usage.input_tokens_details.text_tokens
            input_image_tokens = result.usage.input_tokens_details.image_tokens
            
            token_info["Total tokens"] = str(total_tokens)
            token_info["Input tokens"] = str(input_tokens)
            token_info["Output tokens"] = str(output_tokens)
            token_info["Input text tokens"] = str(input_text_tokens)
            token_info["Input image tokens"] = str(input_image_tokens)
            
            # Calculate cost
            cost = (input_text_tokens * 5 + input_image_tokens * 10 + output_tokens * 40) / 1000000
            token_info["Cost"] = f"${cost:.6f}"
            
            if verbose:
                print(f"Cost for deal {deal_id}: ${cost:.6f}")
        
        return output_filename, original_extension, token_info, prompt, prompt_source
        
    except Exception as e:
        if verbose:
            print(f"FINAL ERROR for deal {deal_id}: {str(e)}")
            import traceback
            print(f"Full traceback: {traceback.format_exc()}")
        return None, None, None, None, None


## Async Processing


In [23]:
async def process_deals_async(deals_df, max_workers=50, verbose=False):
    """
    Process multiple deals asynchronously with file-based prompts
    """
    results = []
    
    if verbose:
        print(f"Available columns in dataframe: {list(deals_df.columns)}")
    
    # Create a temporary directory for image files
    with tempfile.TemporaryDirectory() as temp_dir:
        # Use ThreadPoolExecutor for parallelization
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Create tasks for all deals
            futures = []
            for idx, row in deals_df.iterrows():
                # Get deal_id, original_id, and other info from the actual column names
                deal_id = row.get('id')
                original_id = row.get('image_id_pos_0', 'main')
                vertical = row.get('vertical', 'LOCAL')  # Default to LOCAL if not specified
                category_name = row.get('category_name', '')  # Get category_name from row
                sub_category_name = row.get('sub_category_name', '')  # Get sub_category_name from row
                    
                if deal_id is None:
                    if verbose:
                        print(f"Warning: Could not find deal ID in row: {row}")
                    continue
                    
                future = executor.submit(
                    generate_image_integrated, 
                    deal_id, 
                    original_id, 
                    temp_dir, 
                    vertical, 
                    category_name, 
                    sub_category_name, 
                    verbose
                )
                futures.append((future, deal_id, original_id, row))
            
            # Create progress bar
            print(f"Processing {len(futures)} deals...")
            progress_bar = tqdm(total=len(futures), desc="Overall progress")
            
            # Process results as they complete
            for future, deal_id, original_id, row in futures:
                try:
                    image_path, extension, token_info, prompt, prompt_source = future.result()
                    
                    if image_path:
                        # Read the image file
                        with open(image_path, 'rb') as img_file:
                            img_content = img_file.read()
                        
                        # Upload to S3 with correct extension
                        s3_key = f"images/deal/{deal_id}/{original_id}_variant.{extension}"
                        s3_url = upload_to_s3(img_content, 'static.wowcher.co.uk', s3_key)
                        
                        # Add to results
                        result_row = row.to_dict()
                        result_row.update({
                            'status': 'success',
                            's3_url': s3_url,
                            'token_info': token_info,
                            'extension': extension,
                            'processed_timestamp': pd.Timestamp.now(),
                            'prompt': prompt,
                            'prompt_source': prompt_source
                        })
                        results.append(result_row)
                    else:
                        # Add failure to results
                        result_row = row.to_dict() 
                        result_row.update({
                            'status': 'failed',
                            'error': 'Image generation failed',
                            'processed_timestamp': pd.Timestamp.now()
                        })
                        results.append(result_row)
                        
                except Exception as e:
                    if verbose:
                        print(f"Error processing deal {deal_id}: {str(e)}")
                    result_row = row.to_dict()
                    result_row.update({
                        'status': 'failed',
                        'error': str(e),
                        'processed_timestamp': pd.Timestamp.now()
                    })
                    results.append(result_row)
                
                # Update progress bar
                progress_bar.update(1)
            
            # Close progress bar
            progress_bar.close()
    
    return pd.DataFrame(results)


## Run the Process


In [24]:
# Select deals to process
deals_sample = deals_df

print(f"Selected {len(deals_sample)} deals")

# Process deals asynchronously
results_df = await process_deals_async(deals_sample, max_workers=50, verbose=False)

# Filter to get only successful results (the "winners")
winners_df = results_df[results_df['status'] == 'success'].copy()

print(f"\nSuccessful generations: {len(winners_df)} out of {len(results_df)}")

# Display the winners
display(winners_df)

# Calculate total cost for successful generations
if 'token_info' in winners_df.columns:
    total_cost = 0.0
    for _, row in winners_df.iterrows():
        if 'token_info' in row and row['token_info'] and 'Cost' in row['token_info']:
            cost_str = row['token_info']['Cost'].replace('$', '')
            try:
                total_cost += float(cost_str)
            except:
                pass
    print(f"Total cost for successful generations: ${total_cost:.4f}")

# Show which prompts were used
if 'prompt_source' in winners_df.columns:
    prompt_usage = winners_df['prompt_source'].value_counts()
    print(f"\nPrompt usage:")
    for prompt_name, count in prompt_usage.items():
        print(f"  {prompt_name}: {count} deal(s)")


Selected 250 deals
Processing 250 deals...


Overall progress:   0%|          | 0/250 [00:00<?, ?it/s]


Successful generations: 243 out of 250


Unnamed: 0,id,email_subject,category_name,vertical,sub_category_name,revenue_last_7_days,revenue_rank,image_id_pos_0,image_url_pos_0,extension,status,s3_url,token_info,processed_timestamp,prompt,prompt_source,error
0,39323077,AirPods Gen 3 with Charging Case,Electronics,NATIONAL,Headphones & Earphones,3760.35,1,1702697,https://static.wowcher.co.uk/images/deal/39323...,jpg,success,https://static.wowcher.co.uk/images/deal/39323...,"{'Total tokens': '10255', 'Input tokens': '404...",2025-07-22 14:52:57.473015,<imageRequest>\n <product>\n <name>AirPods...,headphones_&_earphones,
1,39790815,Emma Original Mattress,Home,NATIONAL,Beds & Mattresses,1907.83,2,1702692,https://static.wowcher.co.uk/images/deal/39790...,jpg,success,https://static.wowcher.co.uk/images/deal/39790...,"{'Total tokens': '10183', 'Input tokens': '397...",2025-07-22 14:52:57.612510,<imageRequest>\n <product>\n <name>Emma Or...,beds_&_mattresses,
2,32708136,3PC Hard Shell Suitcase Set,Luggage & Travel,NATIONAL,Suitcases & Sets,1255.68,3,1701076,https://static.wowcher.co.uk/images/deal/32708...,jpg,success,https://static.wowcher.co.uk/images/deal/32708...,"{'Total tokens': '10517', 'Input tokens': '430...",2025-07-22 14:53:18.445845,<imageRequest>\n <product>\n <name>3PC Har...,suitcases_&_sets,
3,41154460,Alivio Kids Trampoline with Enclosure,Children and Baby,NATIONAL,Toys,1003.88,4,1704286,https://static.wowcher.co.uk/images/deal/41154...,jpg,success,https://static.wowcher.co.uk/images/deal/41154...,"{'Total tokens': '9490', 'Input tokens': '3282...",2025-07-22 14:53:23.357782,<imageRequest>\n <product>\n <name>Alivio ...,toys,
4,39618758,Outdoor Garden Pergola,Garden,NATIONAL,Garden Furniture,850.40,5,1597790,https://static.wowcher.co.uk/images/deal/39618...,jpg,success,https://static.wowcher.co.uk/images/deal/39618...,"{'Total tokens': '9464', 'Input tokens': '3256...",2025-07-22 14:53:23.948637,<imageRequest>\n <product>\n <name>Outdoor...,garden_furniture,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,41009926,Hestia Smart Telescope,Electronics,NATIONAL,Camera & Photo,38.96,246,1676775,https://static.wowcher.co.uk/images/deal/41009...,jpg,success,https://static.wowcher.co.uk/images/deal/41009...,"{'Total tokens': '10538', 'Input tokens': '433...",2025-07-22 15:01:45.227588,<imageRequest>\n <product>\n <name>Hestia ...,camera_&_photo,
246,37343597,Large Electric Racing Track,Children and Baby,NATIONAL,Toys,38.64,247,1459076,https://static.wowcher.co.uk/images/deal/37343...,jpg,success,https://static.wowcher.co.uk/images/deal/37343...,"{'Total tokens': '10527', 'Input tokens': '431...",2025-07-22 15:01:45.650386,<imageRequest>\n <product>\n <name>Large E...,toys,
247,30825536,Children's Simulated Fryer Toy Set,Home,NATIONAL,Kitchen Appliances,38.34,248,1272795,https://static.wowcher.co.uk/images/deal/30825...,jpg,success,https://static.wowcher.co.uk/images/deal/30825...,"{'Total tokens': '8476', 'Input tokens': '2268...",2025-07-22 15:01:45.971237,<imageRequest>\n <product>\n <name>Childre...,kitchen_appliances,
248,34062283,Square BBQ Fire Pit,Garden,NATIONAL,BBQ & Picnic,38.22,249,1389893,https://static.wowcher.co.uk/images/deal/34062...,jpg,success,https://static.wowcher.co.uk/images/deal/34062...,"{'Total tokens': '8475', 'Input tokens': '2267...",2025-07-22 15:01:46.327087,<imageRequest>\n <product>\n <name>Square ...,bbq_&_picnic,


Total cost for successful generations: $66.9617

Prompt usage:
  garden_furniture: 33 deal(s)
  beds_&_mattresses: 23 deal(s)
  software: 11 deal(s)
  garden_tools: 10 deal(s)
  tablets_&_ipads: 9 deal(s)
  sofas_&_sofa_beds: 9 deal(s)
  bedding: 8 deal(s)
  toys: 8 deal(s)
  garden_games_&_leisure: 8 deal(s)
  bbq_&_picnic: 7 deal(s)
  beauty_electricals: 7 deal(s)
  bedroom_furniture: 7 deal(s)
  gym_equipment: 7 deal(s)
  watches: 7 deal(s)
  handbags: 6 deal(s)
  laptops_&_macbooks: 6 deal(s)
  gardening,_plants_&_flowers: 6 deal(s)
  mobile_phones: 5 deal(s)
  garden_buildings_&_storage_solutions: 4 deal(s)
  fans,_heating_&_air_conditioning: 4 deal(s)
  cleaning_&_home_maintenance: 4 deal(s)
  kitchen_appliances: 4 deal(s)
  living_room: 3 deal(s)
  dogs: 3 deal(s)
  camera_&_photo: 3 deal(s)
  headphones_&_earphones: 3 deal(s)
  camping: 3 deal(s)
  smart_watches_&_wearable_tech: 3 deal(s)
  suitcases_&_sets: 3 deal(s)
  chairs,_stools_&_beanbags: 2 deal(s)
  tops: 2 deal(s)
  h

## Display Results


In [26]:
winners_df.to_csv('nationalImages.csv', index=False)

In [27]:

def purge_cache(image_urls):
    """Purge Cloudflare cache for generated images"""
    import requests 
    import json
    
    chunk_size = 30 # cloudflare purge api takes max 30 urls per request
    chunked_list = [image_urls[i:i+chunk_size] for i in range(0, len(image_urls), chunk_size)]
    api_url = "https://api.cloudflare.com/client/v4/zones/4fec7e02d5c45deb9f67452873708896/purge_cache"
    api_key = "Bearer IT-Lr8A8LOipKdcGKkJU8Q5ciM8jfy3KbtfWkDnK"
    headers = {"Authorization": api_key,
               "Content-Type":"application/json"
               }
    for chunk in chunked_list:
        payload = {"files": []}
        payload['files'] = chunk
        response = requests.post(url=api_url, data=json.dumps(payload), headers=headers)
        print(response.json())

# Purge cache for generated images
if not winners_df.empty:
    image_urls = winners_df['s3_url'].to_list()
    print(f"Purging cache for {len(image_urls)} images...")
    purge_cache(image_urls)
    

Purging cache for 243 images...
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}


In [29]:
from IPython.display import Image, display_html
import time
import random

# Display random 10 images from the winners_df with a timestamp to ensure freshness
if not winners_df.empty:
    random_indices = random.sample(range(len(winners_df)), min(10, len(winners_df)))
    
    for index in random_indices:
        fresh_url = f"{winners_df['s3_url'].iloc[index]}?timestamp={int(time.time())}"
        original_url = f"{winners_df['image_url_pos_0'].iloc[index]}?timestamp={int(time.time())}"
        
        # Display images side by side
        display_html(
            f"""
            <div style="display: flex; justify-content: space-around; margin-bottom: 20px;">
                <div>
                    <p>Original image:</p>
                    <img src="{original_url}" width="300">
                </div>
                <div>
                    <p>Generated variant:</p>
                    <img src="{fresh_url}" width="300">
                </div>
            </div>
            """, 
            raw=True
        )
        
        print(f"Deal ID: {winners_df['id'].iloc[index]}")
        print(f"Prompt used: {winners_df['prompt_source'].iloc[index]}")
        #print(f"Prompt details: {winners_df['prompt'].iloc[index]}")


Deal ID: 41009926
Prompt used: camera_&_photo


Deal ID: 27920641
Prompt used: bbq_&_picnic


Deal ID: 31241509
Prompt used: cleaning_&_home_maintenance


Deal ID: 34600959
Prompt used: storage_solutions


Deal ID: 40998659
Prompt used: laptops_&_macbooks


Deal ID: 36995884
Prompt used: bedding


Deal ID: 38154194
Prompt used: cleaning_&_home_maintenance


Deal ID: 33795031
Prompt used: camping


Deal ID: 26742312
Prompt used: garden_furniture


Deal ID: 41087153
Prompt used: beds_&_mattresses


In [None]:
winners_df.to_csv('national_images', index=False)

In [None]:
# Save successful results
if not winners_df.empty:
    timestamp = int(time.time())
    filename = f'variant_results_{timestamp}.csv'
    winners_df.to_csv(filename, index=False)
    print(f"Results saved to {filename}")
else:
    print("No successful results to save")


generating_prompts

In [None]:
# df = pd.read_csv('subcats.csv')

In [None]:
# import pandas as pd
# import os
# from openai import OpenAI
# from concurrent.futures import ThreadPoolExecutor, as_completed
# import time

# # Initialize OpenAI client
# client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))

# def create_prompt_for_category(row):
#     """Create a prompt for a specific category/sub_category combination"""
#     category = row['category']
#     sub_category = row['sub_category']
    
#     try:
#         response = client.responses.create(
#             model="o3",
#             input=[
#                 {
#                     "role": "developer",
#                     "content": [
#                         {
#                             "type": "input_text",
#                             "text": "You are an expert in creating new prompts based on an existing prompt template. \n\nYou need to turn the following prompt which is used for image generation for product photography in an e-commerce business. In the prompt {email_subject} is a placeholder and you should return the prompt using the same placeholders. \n\nYour task is to return a prompt that is better suited to product photography of goods that belong to the users specified category and subcategory. So you need to consider how we should get a more appropriate scene in the prompt or camera settings. You could adding or alter more camera settings based on unique aspects of certain product categories (e.g., macro mode for jewelry, higher aperture for fashion apparel). You should also change it to choose a suitable environment for goods photography based on the sub category and category.\n\nBelow is the entire default prompt. Return to the user a tailored one based on the sub_category and category. Do not change sections apart from where to make them more appropriate to the specified category. Do not change instructions in the prompt if it is not making it more appropriate to the category. Do not ask any questions just return the tailored prompt: \n\nCreate a high-resolution, photo-realistic promotional image for {email_subject}.\n\nCamera Settings:\n- Model: Canon EOS R5\n- Lens: 50mm f/1.4\n- ISO: 100\n- Aperture: f/4.0\n- Shutter Speed: 1/125s\n- White Balance: auto\n- Style: proffessional quality product photography\n- Lighting: softbox or natural light — even, shadow-minimizing illumination\n- Focus: manual focus on product's front face or key feature\n- Depth of Field: shallow-to-moderate — product in sharp focus, background subtly blurred\n\nProduct Handling:\n- Select one color variant per image; but reflect available options. \n- Do not invent new features or alter the product\n- No floating objects unless contextually valid.\n- No unrealistic reflections or surfaces\n- No invented logos, textures, or packaging\n\nScene:\n- Type: realistic and suited to the product\n- Environment must reflect intended product use. \n- Background priority: clean, complementary, secondary to product\n- Product is the primary visual anchor\n- Soft and directional shadows if present, consistent with single light source\n- Natural highlights, never blown out\n\nCallouts (2–4 max):\n- Do not put the title of the product on the image. \n- Do not place callouts in the bottom right of the image. \n- Content: key features or differentiators only\n- Font: smaller, clean sans-serif\n- 4px minimum padding around text boxes\n- Semi-transparent or minimal background\n- Subtle connector lines, avoid clutter. Make sure the lines are connected to an appropriate part of the product. \n- Each callout should be ~ 3 words: \n\nDesign Constraints:\n- Bottom-right corner must be clear of any text or callouts. \n- Text box padding: 4px minimum\n- No pricing\n- No branding unless provided\n- Spelling must be 100% correct\n- No logo invention \n- Keep Product accurate. Do not alter shape of product. "

#                         }
#                     ]
#                 },
#                 {
#                     "role": "user",
#                     "content": [
#                         {
#                             "type": "input_text",
#                             "text": f"Please provide me with a prompt that i can use to enhance images of the goods in my {sub_category} subcategory which belongs in the {category} category"
#                         }
#                     ]
#                 }
#             ],
#             text={
#                 "format": {
#                     "type": "text"
#                 }
#             },
#             reasoning={
#                 "effort": "medium",
#                 "summary": "auto"
#             },
#             tools=[],
#             store=True
#         )
        
#         # Extract the prompt from the response
#         prompt_text = response.output_text
        
#         # Create filename from sub_category (clean it for filesystem)
#         filename = f"{sub_category.replace(' ', '_').replace('/', '_').replace('\\', '_')}.txt"
        
#         # Save to prompts folder
#         os.makedirs('prompts', exist_ok=True)
#         filepath = os.path.join('prompts', filename)
        
#         with open(filepath, 'w', encoding='utf-8') as f:
#             f.write(prompt_text)
        
#         print(f"✓ Created prompt for {sub_category} ({category})")
#         return f"Success: {sub_category}"
        
#     except Exception as e:
#         print(f"✗ Error creating prompt for {sub_category} ({category}): {str(e)}")
#         return f"Error: {sub_category} - {str(e)}"

# def process_dataframe_parallel(df, max_workers=5):
#     """Process the dataframe with parallel API calls"""
    
#     # Create prompts directory if it doesn't exist
#     os.makedirs('prompts', exist_ok=True)
    
#     print(f"Processing {len(df)} category/sub_category combinations...")
    
#     # Use ThreadPoolExecutor for parallel processing
#     with ThreadPoolExecutor(max_workers=max_workers) as executor:
#         # Submit all tasks
#         future_to_row = {
#             executor.submit(create_prompt_for_category, row): row 
#             for _, row in df.iterrows()
#         }
        
#         # Collect results as they complete
#         results = []
#         for future in as_completed(future_to_row):
#             row = future_to_row[future]
#             try:
#                 result = future.result()
#                 results.append(result)
#             except Exception as e:
#                 print(f"Exception for {row['sub_category']}: {str(e)}")
#                 results.append(f"Exception: {row['sub_category']} - {str(e)}")
    
#     print(f"\nCompleted processing {len(results)} items")
#     return results

# # Example usage:
# # Assuming your dataframe is called 'df' and has columns 'category' and 'sub_category'
# df_test = df
# # Process with parallel API calls (adjust max_workers based on your rate limits)
# results = process_dataframe_parallel(df_test, max_workers=25)

# # Print summary
# print("\nSummary:")
# for result in results:
#     print(result)

In [None]:
from openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))

response = client.responses.create(
  model="gpt-4.1",
  input=[
    {
      "role": "system",
      "content": [
        {
          "type": "input_text",
          "text": "you are an expert prompt editor who is going to be tasked with turning the users input image prompt which is written in text into a more structured xml format. \n\nYou also need to change the section of the prompt where it talks about callouts to talk about creating a headline. \n\nthis is an example input prompt the user would provide, leave the placeholders {email_subject} as is. \n\nStart of input prompt example: \nCreate a high-resolution, photo-realistic promotional image for {email_subject}.\n\nCamera Settings:\n- Model: Canon EOS R5\n- Lens: 35mm f/2.8\n- ISO: 200\n- Aperture: f/8.0\n- Shutter Speed: 1/160s\n- White Balance: Daylight\n- Style: professional quality garden furniture product photography\n- Lighting: diffused natural daylight with softbox fill — even illumination, gentle outdoor-style shadows\n- Focus: manual focus on the front edge of the primary seating surface\n- Depth of Field: moderate — entire furniture set in crisp focus, background softly blurred\n\nProduct Handling:\n- Select one color variant per image; but reflect available options. \n- Do not invent new features or alter the product\n- No floating objects unless contextually valid.\n- No unrealistic reflections or surfaces\n- No invented logos, textures, or packaging\n\nScene:\n- Type: realistic and suited to the product\n- Environment must reflect intended product use: patio, wooden deck, or manicured lawn with understated greenery or flowering plants\n- Background priority: clean, complementary, secondary to product\n- Product is the primary visual anchor\n- Soft and directional shadows if present, consistent with a single mid-morning light source\n- Natural highlights, never blown out\n\nCallouts (2–4 max):\n- Do not put the title of the product on the image. \n- Do not place callouts in the bottom right of the image. \n- Content: key features or differentiators only\n- Font: smaller, clean sans-serif\n- 4px minimum padding around text boxes\n- Semi-transparent or minimal background\n- Subtle connector lines, avoid clutter. Make sure the lines are connected to an appropriate part of the product. \n- Each callout should be ~ 3 words: \n\nDesign Constraints:\n- Bottom-right corner must be clear of any text or callouts. \n- Text box padding: 4px minimum\n- No pricing\n- No branding unless provided\n- Spelling must be 100% correct\n- No logo invention \n- Keep Product accurate. Do not alter shape of product.\n\nEnd of Input prompt example\n\nStart of improved prompt example: \n\n<imageRequest>\n  <product>\n    <name>{email_subject}</name>\n    <category>Garden Furniture</category>\n    <referenceImage>\n      <useAsPrimaryGuide>true</useAsPrimaryGuide>\n      <matchStructure>true</matchStructure>\n      <matchMaterial>true</matchMaterial>\n      <matchProportions>true</matchProportions>\n      <matchColor>true</matchColor>\n      <notes>Product shape, dimensions, texture, and color must remain visually consistent with the input image.</notes>\n    </referenceImage>\n    <colorVariants>Only show one per image; must exist on product</colorVariants>\n    <modifications>Do not alter product features or invent new elements</modifications>\n    <integrity>\n      <noFloatingObjects>true</noFloatingObjects>\n      <noUnrealisticReflections>true</noUnrealisticReflections>\n      <noInventedTextures>true</noInventedTextures>\n      <noInventedLogos>true</noInventedLogos>\n    </integrity>\n  </product>\n\n  <cameraSettings>\n    <model>Canon EOS R5</model>\n    <lens>35mm f/2.8</lens>\n    <ISO>200</ISO>\n    <aperture>f/8.0</aperture>\n    <shutterSpeed>1/160s</shutterSpeed>\n    <whiteBalance>Daylight</whiteBalance>\n    <focus>Manual focus on the front edge of the primary seating surface</focus>\n    <depthOfField>Moderate — entire product in crisp focus, background softly blurred</depthOfField>\n  </cameraSettings>\n\n  <lighting>\n    <type>Diffused natural daylight</type>\n    <fill>Softbox fill light</fill>\n    <style>Even illumination, outdoor-style shadows</style>\n    <direction>Single, soft directional light (mid-morning)</direction>\n    <highlights>Natural — do not overexpose</highlights>\n  </lighting>\n\n  <environment>\n    <sceneType>Realistic, lifestyle-appropriate outdoor</sceneType>\n    <location>Patio, wooden deck, or manicured lawn</location>\n    <plants>Understated greenery or flowering accents</plants>\n    <backgroundStyle>Secondary to product — clean, unobtrusive</backgroundStyle>\n  </environment>\n\n  <composition>\n    <primaryFocus>Product must be visually centered and undistracted</primaryFocus>\n    <realism>Photorealistic and natural</realism>\n    <scale>True to product dimensions</scale>\n    <shadows>Soft, consistent with light source</shadows>\n  </composition>\n\n  <headline>\n    <text>{email_subject}</text>\n    <font>Modern, clean sans-serif</font>\n    <placement>Top-left or top-center only</placement>\n    <background>Minimalist or semi-transparent</background>\n    <padding>6px minimum</padding>\n    <style>Subtle, should not obscure product or distract from detail</style>\n  </headline>\n\n  <designConstraints>\n    <textRules>\n      <noTextInBottomRight>true</noTextInBottomRight>\n      <noCallouts>true</noCallouts>\n      <noPricing>true</noPricing>\n      <noBrandingUnlessProvided>true</noBrandingUnlessProvided>\n      <padding>4px minimum for any text elements</padding>\n    </textRules>\n    <accuracy>\n      <maintainExactProductShape>true</maintainExactProductShape>\n      <maintainMaterial>true</maintainMaterial>\n      <maintainColor>true</maintainColor>\n      <matchInputImage>true</matchInputImage>\n    </accuracy>\n    <spelling>Must be 100% correct</spelling>\n  </designConstraints>\n</imageRequest>\n\nReturn the xml document. "
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": f"Please reformat and change the part of the prompt about callouts for the following prompt:{prompt_from_file}"
        }
      ]
    }
  ],
  text={
    "format": {
      "type": "text"
    }
  },
  reasoning={},
  tools=[],
  temperature=0.28,
  max_output_tokens=11967,
  top_p=1,
  store=True
)