# Image Variant Generation with S3 Storage

This notebook:
1. Queries a database for deal IDs
2. Generates variant images using OpenAI
3. Stores images in S3
4. Implements async processing for efficiency

In [104]:
import pandas as pd
import os
import psycopg2
from dotenv import load_dotenv
import boto3
import time
import base64
import asyncio
import aiohttp
import tempfile
import sys
from botocore.exceptions import NoCredentialsError
from concurrent.futures import ThreadPoolExecutor
import subprocess
import json
from io import BytesIO
# Load environment variables
load_dotenv()
# Configure AWS credentials
s3_client = boto3.client(
    's3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY')
)



## Query Database for Deal IDs

Execute SQL to get deal information including:
- deal_voucher_id
- original_image_id
- variant_image_id
- batch_name
- enter_test_ts
- exit_test_ts
- open_ai_prompt

In [111]:
def get_deals_for_processing_visits():
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST" ),
        port=os.environ.get("REDSHIFT_PORT"),
        dbname=os.environ.get("REDSHIFT_DBNAME"),
        user=os.environ.get("REDSHIFT_USER"),
        password=os.environ.get("REDSHIFT_PASSWORD")
    )
    
    # Example query - modify as needed
    query = """
WITH visitors AS (
    SELECT
        deal_id_evar,
        COUNT(DISTINCT visitor_id) AS visitors
    FROM real.omniture_events
    WHERE trunc(date_time) >= trunc(sysdate) - 7
      AND product = 'wowdtm'
      AND (
            url_evar LIKE '%/deal/%' OR
            url_evar LIKE '%/e/%' OR
            url_evar LIKE '%/email-deals/%'
          )
    GROUP BY deal_id_evar
)
SELECT
    CAST(dv.id AS INTEGER) AS id,
    CASE 
        WHEN dvc.canonical_path_type = 'NATIONAL' THEN dv.deal_product 
        WHEN dvc.canonical_path_type = 'LOCAL' THEN dv.deal_product 
        WHEN dvc.canonical_path_type = 'TRAVEL' THEN dv.deal_product 
    END AS email_subject,
    dvc.name AS category_name,
    dvc.canonical_path_type as vertical,
    dvsc.name AS sub_category_name,
    CAST(COALESCE(v.visitors, 0) AS INTEGER) AS visitors_last_7_days,
    CAST(rank() OVER (ORDER BY COALESCE(v.visitors, 0) DESC) AS INTEGER) AS visitor_rank,
    CAST(dvi.id AS INTEGER) AS image_id_pos_0,
    'https://static.wowcher.co.uk/images/deal/' || dvi.deal_voucher_id || '/' || dvi.id || '.' || dvi.extension AS image_url_pos_0,
    dvi.extension
FROM real.deal_voucher dv
left JOIN real.product p ON p.id = dv.id AND p.status_id = 1
JOIN visitors v ON v.deal_id_evar = dv.id
LEFT JOIN real.deal_voucher_site dvs ON dvs.deal_voucher_id = dv.id
LEFT JOIN real.deal_voucher_image dvi ON dvi.deal_voucher_id = dv.id AND dvi.position = 0
LEFT JOIN real.deal_voucher_category dvc ON dvc.id = dv.category_id
LEFT JOIN real.deal_voucher_sub_category dvsc ON dvsc.id = dv.sub_category_id
LEFT JOIN real.site s ON s.id = dv.deal_location_id AND s.site_name = 'National Deal'
WHERE trunc(dv.closing_date) >= trunc(sysdate) + 21
AND dv.currency = 'GBP'
and dv.business_id != 16456891
AND NOT EXISTS (
    SELECT 1
      FROM temp.opt_image_variants oiv
      WHERE oiv.deal_voucher_id = dv.id
      AND (
      ((batch_name ILIKE '%manual%' AND status IN (1,3))
        or (batch_name = 'OPEN AI Images' AND status IN (1,3))
       OR (batch_name NOT IN ('Manual Opt', 'OPEN AI Images') AND status = 1)
        )
    )
)
and dv.id = 40916854
and dv.email_subject not ilike '%mystery%'
GROUP BY dv.id, dv.email_subject, dvc.name, dvsc.name,dvc.canonical_path_type, dvi.id, dvi.deal_voucher_id, dvi.extension, v.visitors,dv.deal_product
ORDER BY COALESCE(v.visitors, 0) DESC
LIMIT 150;
    """
    df = pd.read_sql(query, conn)
    conn.close()
    return df

def get_deals_for_processing_revenue():
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST" ),
        port=os.environ.get("REDSHIFT_PORT"),
        dbname=os.environ.get("REDSHIFT_DBNAME"),
        user=os.environ.get("REDSHIFT_USER"),
        password=os.environ.get("REDSHIFT_PASSWORD")
    )
    
    # Query to get deals by revenue instead of visits
    query = """
WITH deal_revenue AS (
    SELECT
        t.deal_id,
        SUM(t.net) AS total_revenue
    FROM real.transactions t
    JOIN real.deal_voucher dv ON t.deal_id = dv.id
    WHERE t.order_date > TRUNC(SYSDATE - 8)
      AND dv.currency = 'GBP'
      AND t.brand_id = 1
      AND t.domain = 'WOWCHER'
    GROUP BY t.deal_id
)
SELECT
    CAST(dv.id AS INTEGER) AS id,
    CASE 
        WHEN dvc.canonical_path_type = 'NATIONAL' THEN dv.deal_product 
        WHEN dvc.canonical_path_type = 'LOCAL' THEN dv.deal_product 
        WHEN dvc.canonical_path_type = 'TRAVEL' THEN dv.deal_product 
    END AS email_subject,
    dvc.name AS category_name,
    dvc.canonical_path_type as vertical,
    dvsc.name AS sub_category_name,
    CAST(COALESCE(dr.total_revenue, 0) AS DECIMAL(10,2)) AS revenue_last_7_days,
    CAST(rank() OVER (ORDER BY COALESCE(dr.total_revenue, 0) DESC) AS INTEGER) AS revenue_rank,
    CAST(dvi.id AS INTEGER) AS image_id_pos_0,
    'https://static.wowcher.co.uk/images/deal/' || dvi.deal_voucher_id || '/' || dvi.id || '.' || dvi.extension AS image_url_pos_0,
    dvi.extension
FROM real.deal_voucher dv
LEFT JOIN real.product p ON p.id = dv.id AND p.status_id = 1
LEFT JOIN deal_revenue dr ON dr.deal_id = dv.id
LEFT JOIN real.deal_voucher_site dvs ON dvs.deal_voucher_id = dv.id
LEFT JOIN real.deal_voucher_image dvi ON dvi.deal_voucher_id = dv.id AND dvi.position = 0
LEFT JOIN real.deal_voucher_category dvc ON dvc.id = dv.category_id
LEFT JOIN real.deal_voucher_sub_category dvsc ON dvsc.id = dv.sub_category_id
LEFT JOIN real.site s ON s.id = dv.deal_location_id AND s.site_name = 'National Deal'
WHERE trunc(dv.closing_date) >= trunc(sysdate) + 21
AND dv.currency = 'GBP'
AND dv.business_id not in (16456891,16512447)
AND NOT EXISTS (
    SELECT 1
      FROM temp.opt_image_variants oiv
      WHERE oiv.deal_voucher_id = dv.id
      AND (
      ((batch_name ILIKE '%manual%' AND status IN (1,3))
        or (batch_name = 'OPEN AI Images' AND status IN (1,3))
       OR (batch_name NOT IN ('Manual Opt', 'OPEN AI Images') AND status = 1)
        )
    )
)
AND dv.email_subject NOT ILIKE '%mystery%'
and dvsc.name = 'Spa'
GROUP BY dv.id, dv.email_subject, dvc.name, dvsc.name, dvc.canonical_path_type, dvi.id, dvi.deal_voucher_id, dvi.extension, dr.total_revenue, dv.deal_product
ORDER BY COALESCE(dr.total_revenue, 0) DESC
LIMIT 500;
    """
    df = pd.read_sql(query, conn)
    conn.close()
    return df

# Get deals to process
deals_df = get_deals_for_processing_revenue()
display(deals_df.head())
print(len(deals_df) ,': Deals Found')


  df = pd.read_sql(query, conn)


Unnamed: 0,id,email_subject,category_name,vertical,sub_category_name,revenue_last_7_days,revenue_rank,image_id_pos_0,image_url_pos_0,extension
0,39394863,"Bannatyne Spa Day for 2 with Treatments, Lunch...",Beauty,LOCAL,Spa,6583.36,1,1555151.0,https://static.wowcher.co.uk/images/deal/39394...,jpg
1,39263347,"Bannatyne Spa Day with Up to 4 Treatments, Spa...",Beauty,LOCAL,Spa,1665.81,2,1550504.0,https://static.wowcher.co.uk/images/deal/39263...,jpg
2,39215726,"Bannatyne Spa Day with Cream Tea: 1 Treatment,...",Beauty,LOCAL,Spa,1100.25,3,1544446.0,https://static.wowcher.co.uk/images/deal/39215...,jpg
3,40810751,5* The Chelsea Harbour Hotel: Spa Day with 2-C...,Beauty,LOCAL,Spa,784.56,4,1664533.0,https://static.wowcher.co.uk/images/deal/40810...,jpg
4,40479853,"4* Westerwood Spa Day with ELEMIS Treatment, 3...",Beauty,LOCAL,Spa,711.12,5,1678335.0,https://static.wowcher.co.uk/images/deal/40479...,jpg


197 : Deals Found


In [112]:
deals_df.columns

Index(['id', 'email_subject', 'category_name', 'vertical', 'sub_category_name',
       'revenue_last_7_days', 'revenue_rank', 'image_id_pos_0',
       'image_url_pos_0', 'extension'],
      dtype='object')

In [113]:
def calc_sum_of_deals(deal_ids):
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST"),
        port=os.environ.get("REDSHIFT_PORT"),
        dbname=os.environ.get("REDSHIFT_DBNAME"),
        user=os.environ.get("REDSHIFT_USER"),
        password=os.environ.get("REDSHIFT_PASSWORD")
    )
    # Example query - modify as needed
    query = f"""
    WITH txns AS (
        SELECT t.deal_id,
            t.net,
            dvc.canonical_path_type as vertical
        FROM real.transactions t
                JOIN real.deal_voucher dv ON t.deal_id = dv.id
                join real.deal_voucher_category dvc on dvc.id = dv.category_id
        WHERE t.order_date > TRUNC(SYSDATE - 8)
        AND dv.currency = 'GBP'
        AND t.brand_id = 1
        AND t.domain = 'WOWCHER'
    )
    SELECT
        t.vertical,
        ROUND(SUM(CASE WHEN t.deal_id IN ({','.join(map(str, deal_ids))}) THEN t.net ELSE 0 END), 2) AS selected_net,
        ROUND(SUM(t.net), 2) AS total_net,
        ROUND(
            100 * SUM(CASE WHEN t.deal_id IN ({','.join(map(str, deal_ids))}) THEN t.net ELSE 0 END)
            / NULLIF(SUM(t.net), 0), 2
        ) AS selected_net_pct
    FROM txns t
    group by t.vertical
    """
    df = pd.read_sql(query, conn)
    conn.close()
    return df

# Example usage with a list of deal IDs
deal_ids = deals_df['id'].to_list()[:10]  # Replace with actual deal IDs
prop_net = calc_sum_of_deals(deal_ids)
prop_net

  df = pd.read_sql(query, conn)


Unnamed: 0,vertical,selected_net,total_net,selected_net_pct
0,NATIONAL,0.0,229061.28,0.0
1,TRAVEL,0.0,162090.6,0.0
2,LOCAL,12850.91,135443.98,9.49


## S3 Upload Functions

Functions to upload generated images to S3

In [115]:
def upload_to_s3(file_content, bucket_name, s3_key):
    """
    Upload a file to S3
    
    Parameters:
    - file_content: Binary content of the file
    - bucket_name: S3 bucket name
    - s3_key: Path in S3 where file will be stored
    
    Returns:
    - URL of the uploaded file
    """
    try:
        # Determine content type based on file extension
        extension = os.path.splitext(s3_key)[1].lower()
        content_type = 'image/jpeg' if extension in ['.jpg', '.jpeg'] else \
                      'image/png' if extension == '.png' else \
                      'image/webp' if extension == '.webp' else \
                      'application/octet-stream'
                      
        s3_client.put_object(
            Body=file_content,
            Bucket=bucket_name,
            Key=s3_key,
            ContentType=content_type,
            CacheControl='no-cache, no-store, must-revalidate',
            Expires=0
        )
        return f"https://static.wowcher.co.uk/{s3_key}"
    except NoCredentialsError:
        print("Credentials not available")
        return None

# Prompts 

In [116]:
national_prompt_json = """{
  "camera": {
    "model": "Canon EOS R5",
    "lens": "50mm f/1.4",
    "iso": 100,
    "aperture": "f/4.0",
    "shutter_speed": "1/125s",
    "white_balance": "auto",
    "style": "studio-quality product photography",
    "lighting": "softbox or natural window light — even, shadow-minimizing illumination",
    "focus": "manual focus on product's front face or key feature",
    "depth_of_field": "shallow-to-moderate — product in sharp focus, background subtly blurred"
  },
  "product_handling": {
    "email_subject": "{email_subject}",
    "color_consistency": "select one color variant per image; reflect available options in text only",
    "feature_accuracy": "do not invent new features or alter the product",
    "realism_rules": {
      "no floating objects unless contextually valid": true,
      "no unrealistic reflections or surfaces": true,
      "no invented logos, textures, or packaging": true
    }
  },
  "scene": {
    "type": "realistic and aspirational",
    "contextual_relevance": "environment must reflect intended product use (e.g., kitchen for blender, desk for laptop)",
    "background_priority": "clean, complementary, secondary to product",
    "lighting_and_focus": {
      "emphasis": "product is the primary visual anchor",
      "shadows": "soft and directional if present, consistent with single light source",
      "highlights": "natural, never blown out"
    }
  },
  "text_overlay": {
    "headline": {
      "position": "top or top-left of frame",
      "style": {
        "font": "modern sans-serif or minimal serif",
        "contrast": "high enough for readability",
        "padding": "min 4px"
      },
      "rules": {
        "no duplication elsewhere in the image": true,
        "no pricing": true,
        "no spelling errors": true
      }
    },
    "callouts": {
      "count": "2–4 max",
      "placement": "outside bottom-right 20% of image",
      "content": "key features or differentiators only (e.g., 'Wireless Charging', '100% Recyclable', 'Designed in Japan')",
      "style": {
        "font": "smaller, clean sans-serif",
        "padding": "4px min around text boxes",
        "background": "semi-transparent or minimal",
        "connector_lines": "subtle, avoid clutter"
      }
    }
  },
  "design_constraints": {
    "bottom_right_clear": true,
    "text_box_padding_px": 4,
    "no_pricing": true,
    "no_branding_unless_provided": true,
    "spelling_must_be_100_correct": true,
    "no_logo_invention": true
  }
}"""

local_prompt = """ 
Create ONE high-resolution, photo-realistic promotional image advertising {email_subject}. The Image should look like it was taken by a proffessional phographer, well lit and using a proffessional camera. Dont put the whole text from the product name in the image, condense it. 

Final image must contain zero spelling mistakes.

1. Service Representation
Visually communicate the core experience or service in an authentic and aspirational way.
Include people only if appropriate — expressions must look natural, relaxed, and genuinely engaged. The people should be objectively attractive.
Avoid exaggerated or artificial staging.
Focus on one setting or moment that clearly represents the value of the experience. 
Make sure any people providing the service are dressed in a proffesional manner. 
Make sure any people's bodies in the image are positioned in a way that is not physically possible. 
If applicable, include accurate tools, furnishings, or attire that reflect the service.

2. Scene & Background
Place the experience in a realistic, appealing setting suited to the type of service.
Use natural lighting and soft depth of field to keep the service or activity as the clear focal point.
The background should feel aspirational but believable — it must not compete with or overshadow the subject.
Avoid busy or generic backdrops; choose settings that suggest quality, comfort, or excitement.

3. Infographic & Text Elements
Add title text towards the top of the image.
Do not repeat the same text elsewhere in the image.
Ensure all text and stickers avoid the bottom-right 20% of the image.
Choose clear, legible text with 4px padding around all elements.

4. Design Constraints
Keep the bottom-right corner completely clear of any graphics or text.
Do not include any pricing or logos unless specifically provided.
Ensure realism — no invented props, features, or environments.
Colour palettes, uniforms, and tools must accurately reflect the service being portrayed. 
"""

spa_prompt = """
**Create ONE high-resolution, photo-realistic promotional image for {email_subject}.**
• Shoot should evoke authentic, aspirational quality as if taken with a pro-grade DSLR in perfect, natural lighting.
• **Condense the on-image title to ≤ 4 clear words** that capture the offer (do **not** print the full 10-word product name).
• **Zero spelling errors** anywhere in the image.
### 1 — Service Representation
• Show a single, decisive moment that communicates the core benefit of the service. Prioritise the swimming pool faciltiies and subject recieving a specificed treatment.
• Include people only if it adds clarity. They must appear naturally attractive, relaxed, and **in physically possible poses**. Do not cut off any legs.
• People included should be positioned within the centre of the image. Their heads should not be placed in the bottom left hand corner.
• Any staff shown should wear professional, service-appropriate attire.
• Use only accurate tools, furnishings, and attire—no invented props or environments.
### 2 — Scene & Background
• Place the scene in a realistic, appealing setting that reinforces the value of the service. Prioritise Swimming Pools and Treatment rooms
• Employ natural lighting plus a soft depth of field so the service/activity is the unmistakable focal point.
• Scene should primarily focus on the spa treatment aspects of the deal. Do not make food the main component of the image. Where appropriate, subject may be shown relaxing poolside with prosecco **ONLY if "Prosecco" is included in the deal.
• If the deal is for 2 people include 2 subjects. Show them relaxing by facilities if applicable or enjoying their spa experience if applicable.
• Background should feel premium yet believable; keep it uncluttered and secondary.
### 3 — Infographic & Text Elements
• Position the short headline near the top of the frame.
• Do **not** repeat text elsewhere.
• Text should emphasise
• Leave a 4 px padding around every text or sticker element.
• Avoid the lower-right 20 % of the image for any graphics or text.
• Font: "Bold Arial",
• Contrast: "high contrast with background"
• For Spa Day's include the Spa Name and indication of the deal e.g "Spa Pamper Day", "Spa Day for 2", "Mother Daughter Spa Day".
### 4 — Design Constraints
• Bottom-right corner must stay completely clear.
• Exclude pricing, logos, or extra icons unless explicitly supplied.
• Maintain strict realism in color palette, uniforms, props, and setting.
### 6 - Deal Camera and Lighting
• Camera: Canon EOS 5D Mark IV
• Wide Angle Lens: 16-35mm.
• Iso: 100,
• Professional Spa and Hotel style photography. Primary concern is showcasing the facilities and treatments.
• Lighting: Images should be visually brighter and appealing to grab attention. Lower lighting for relaxation purpose is appropriate but avoid the image looking dull or dark.
"""


uk_travel_prompt = """
Create one high-resolution, photo-realistic collage-style image to promote: {email_subject}.
It should look like a polished travel magazine spread—natural light, crisp detail, and elegant blending between photos.

1 | Collage Layout & Content
Use 2–3 softly blended vignettes (vertical or horizontal strips with feathered edges).

Do not make the hotel room the main image—spread focus across:

A scenic teaser (e.g. skyline, hotel exterior, landmark, or nature view).

A travel experience (e.g. welcome drink, rooftop bar, plated meal at a stylish table).

An interior shot (e.g. hotel room or restaurant).

Transitions between images should feel natural—no harsh borders or scrapbook-style effects.

2 | People & Vibe
If people appear, show just 1–3 well-dressed travelers suited to each setting.

Show authentic atmosphere matching the destination and hotel level.

3 | Lighting & Scene
Use soft, realistic lighting that fits each vignette (e.g. golden hour outdoors, cozy indoor light).

Use realistic decor, food, and architecture—no generic or fake-looking stock elements.

4 | Text & Layout
Add a short, clear headline (trip title) in the upper third—not across image seams.

Leave the bottom-right 20% blank (no text or graphics).

Use Arial font with 4 px padding, clean and legible.

5 | Design Rules
No prices, logos, or made-up props.

No surreal effects or CGI.

All colors, uniforms, and furnishings must reflect the real hotel and location.
"""

international_travel_prompt = """ 
{
  "camera": {
    "model": "Canon EOS 5D Mark IV",
    "lens": "85mm f/1.8",
    "iso": 100,
    "aperture": "f/1.8",
    "style": "professional travel influencer photography",
    "lighting": "natural light — golden hour or bright midday sun, or ambient dusk depending on whats appropriate."
  },
  "subject": {subject_placeholder},
  "scene": {
    "location": "{email_subject}",
    "backdrop": "recognisable or breathtaking view — e.g., choose one of the provided images of a landmark, beach, mountains, cityscape, or countryside. Do not merge them",
    "realism": true,
    "elements": [
      "local architecture or nature",
      "environment appropriate to the destination"
    ]
  },
  "image_composition": {
    "framing": "subject in foreground or mid-ground with depth and scenic context",
    "focus": "shallow depth of field with natural background blur",
    "color_palette": "natural and warm",
    "resolution": "high",
    "style_keywords": [
      "aspirational",
      "wanderlust",
      "authentic",
      "lifestyle",
      "editorial travel",
      "quiet luxury"
    ]
  },
  "text_overlay": {
    "position": "top 20% of image only",
    "style": {
      "font": "Bold Arial",
      "contrast": "high contrast with background",
      "padding": "4px minimum around all text"
    },
    "content": {
      "headline": "Condensed, slightly differently worded version of: {email_subject}",
      "rules": {
        "no_full_product_name": true,
        "no_logos": true,
        "no_pricing": true,
        "no_spelling_errors": true
      }
    }
  },
  "generation_constraints": {
    "avoid_text_in_bottom_right": true,
    "no surreal composites": true,
    "no unrealistic lighting": true,
    "no fabricated architecture": true,
    "real_skin_tones_and_shadows": true,
    "no stiff posing": true,
    "avoid_stock_image_look": true
  }
"""

subjects = [
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "long, flowing, brunette"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "short, pixie cut, blonde"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "braided, shoulder length, black"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "tied back, ponytail, red"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "wavy, loose, auburn"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "straight, long, dark brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "bob cut, chestnut"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "curly, tied up, light brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "layered, shoulder length, blonde"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "straight, medium length, black"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "bob cut, red"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "curly, long, dark brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_female",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "wavy, shoulder length, light brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "curly, medium length, dark blonde"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "buzz cut, black"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "wavy, shoulder length, brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "short, spiky, blonde"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "curly, tied back, auburn"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "buzz cut, dark brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "long, flowing, black"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "medium length, tousled, light brown"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "shaved, dark"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "curly, tied up, chestnut"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "short, neat, grey"
  },
  {
    "include_person": True,
    "subject_type": "solo_male",
    "pose": "natural, candid — walking, gazing, appreciating view",
    "look": "stylish, relaxed, immersed in the environment",
    "attire": "seasonal and appropriate to destination, subtly fashionable",
    "hair": "medium length, slicked back, black"
  }
]

## Image Generation

Function to call the generate_image.py script and process the results

In [117]:
# Cell to replace external script dependency with integrated functionality
import pandas as pd
import os
import psycopg2
from dotenv import load_dotenv
import requests
import base64
from openai import OpenAI
from io import BytesIO
import json
import random
import tempfile
from concurrent.futures import ThreadPoolExecutor
from tqdm.notebook import tqdm
import urllib.request
import logging

# Set up logging to control verbosity
logging.basicConfig(level=logging.WARNING)

# Load environment variables if not already done
if 'client' not in locals():
    load_dotenv()
    client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
    print(f"OpenAI client initialized with API key: {os.getenv('OPEN_AI_API_KEY')[:5]}...")



def process_json_prompt(json_prompt_template, email_subject, formatted_highlights=""):
    """
    Process a JSON prompt template and convert it to a string prompt for OpenAI
    """
    try:
        # Replace placeholders in the original JSON string BEFORE parsing
        modified_json = json_prompt_template.replace("{email_subject}", email_subject)
        if formatted_highlights:
            modified_json = modified_json.replace("{formatted_highlights}", formatted_highlights)
        
        # Replace subject placeholder with random subject
        if "{subject_placeholder}" in modified_json:
            random_subject = random.choice(subjects)
            subject_json = json.dumps(random_subject, separators=(',', ':'))
            modified_json = modified_json.replace("{subject_placeholder}", subject_json)
        
        # Parse the modified JSON
        prompt_data = json.loads(modified_json)
        
        # Convert JSON structure to a readable prompt using recursive function
        def json_to_prompt(obj, indent=0):
            """Recursively convert JSON object to readable prompt string"""
            lines = []
            indent_str = "  " * indent
            
            if isinstance(obj, dict):
                for key, value in obj.items():
                    if isinstance(value, (dict, list)):
                        readable_key = key.replace('_', ' ').title()
                        lines.append(f"{indent_str}{readable_key}:")
                        lines.extend(json_to_prompt(value, indent + 1))
                    else:
                        readable_key = key.replace('_', ' ').title()
                        lines.append(f"{indent_str}- {readable_key}: {value}")
            elif isinstance(obj, list):
                for item in obj:
                    if isinstance(item, (dict, list)):
                        lines.extend(json_to_prompt(item, indent))
                    else:
                        lines.append(f"{indent_str}- {item}")
            
            return lines
        
        # Convert the entire JSON structure
        prompt_lines = json_to_prompt(prompt_data)
        
        return "\n".join(prompt_lines)
        
    except json.JSONDecodeError as e:
        return json_prompt_template.format(
            email_subject=email_subject,
            formatted_highlights=formatted_highlights
        )

def get_deal_data_for_image(deal_id, vertical, category_name, sub_category_name):
    """Get deal data needed for image generation"""
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST"),
        port=os.environ.get("REDSHIFT_PORT"),
        dbname=os.environ.get("REDSHIFT_DBNAME"),
        user=os.environ.get("REDSHIFT_USER"),
        password=os.environ.get("REDSHIFT_PASSWORD")
    )

    # Get email subject
    email_subject_query = """
    SELECT 
        CASE 
            WHEN POSITION('PRICE DROP!' IN deal_product) > 0 
            THEN REPLACE(REPLACE(deal_product, 'PRICE DROP!', ''), '24HR', '') 
            ELSE REPLACE(deal_product, '24HR', '') 
        END as email_subject 
    FROM wowdwhprod.real.deal_voucher
    WHERE id = %s
    """
    with conn.cursor() as cur:
        cur.execute(email_subject_query, (deal_id,))
        email_subject_result = cur.fetchone()
        email_subject = email_subject_result[0] if email_subject_result else "Deal"
    
    # Set image limit based on vertical
    image_limit = 10 if vertical == "NATIONAL" else (5 if vertical == "LOCAL" else 3)
    
    # Get image URLs and extract extension information
    image_query = f"""
    SELECT 
        'https://static.wowcher.co.uk/images/deal/' || deal_voucher_id || '/' || id || '.' || extension AS image_url,
        extension
    FROM wowdwhprod.real.deal_voucher_image
    WHERE deal_voucher_id = %s
    ORDER BY position
    LIMIT {image_limit}
    """
    with conn.cursor() as cur:
        cur.execute(image_query, (deal_id,))
        image_results = cur.fetchall()
        image_urls = [row[0] for row in image_results]
        extensions = [row[1] for row in image_results]
        original_extension = extensions[0] if extensions else "png"

    # Get highlights
    highlights_query = """
    SELECT
    SPLIT_PART(highlight, ':', 1) AS highlight
    FROM wowdwhprod.real.deal_voucher_highlight
    WHERE deal_voucher_id = %s
    LIMIT 3;
    """
    with conn.cursor() as cur:
        cur.execute(highlights_query, (deal_id,))
        highlights_results = cur.fetchall()
        highlights = [row[0] for row in highlights_results]

    conn.close()

    # Build the prompt - updated logic
    formatted_highlights = "\n".join([f"• {h}" for h in highlights]) if highlights else ""
    
    if vertical == "NATIONAL":
        prompt = process_json_prompt(national_prompt_json, email_subject)
    elif vertical == "TRAVEL" and category_name not in ['Spas & Country House','UK Seaside','UK City Breaks','Holiday Extras']:
        # Use the new JSON prompt processing for international travel
        prompt = process_json_prompt(international_travel_prompt, email_subject, formatted_highlights)
    elif category_name in ['Spas & Country House','UK Seaside','UK City Breaks','Holiday Extras']:
        prompt = uk_travel_prompt.format(email_subject=email_subject)
    elif sub_category_name == 'Spa':
        prompt = spa_prompt.format(email_subject=email_subject)
    else:
        prompt = local_prompt.format(email_subject=email_subject)

    return {
        'prompt': prompt,
        'image_urls': image_urls,
        'original_extension': original_extension
    }

def download_image_to_file(url, filename):
    """Download an image from URL and save to file"""
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)
        return filename
    else:
        raise Exception(f"Failed to download image from {url}")
def generate_image_integrated(deal_id, original_id, temp_dir, vertical, category_name, sub_category_name, verbose=False):
    """
    Generate image using OpenAI's API with minimal output
    """
    try:
        if verbose:
            print(f"Processing deal {deal_id}, vertical: {vertical}, category: {category_name}")
        
        # Get data for the deal
        try:
            deal_data = get_deal_data_for_image(deal_id, vertical, category_name)
            if verbose:
                print(f"Deal data retrieved successfully for {deal_id}")
        except Exception as e:
            if verbose:
                print(f"Error getting deal data for {deal_id}: {str(e)}")
            raise e
            
        prompt = deal_data['prompt']
        image_urls = deal_data['image_urls']
        original_extension = deal_data['original_extension']
        
        if verbose:
            print(f"Found {len(image_urls)} images for deal {deal_id}")
            print(f"Prompt length: {len(prompt)} characters")
        
        # Create output filename
        output_filename = os.path.join(temp_dir, f"variant_{deal_id}_{original_id}.{original_extension}")
        
        if not image_urls:
            raise Exception("No images found for this deal")
        
        # Download images silently
        image_files = []
        temp_filenames = []
        for idx, url in enumerate(image_urls[:16]):
            try:
                temp_filename = os.path.join(temp_dir, f"temp_image_{deal_id}_{idx}.png")
                download_image_to_file(url, temp_filename)
                temp_filenames.append(temp_filename)
                image_files.append(open(temp_filename, "rb"))
                if verbose:
                    print(f"Downloaded image {idx+1}/{len(image_urls)} for deal {deal_id}")
            except Exception as e:
                if verbose:
                    print(f"Error downloading image {idx} for deal {deal_id}: {str(e)}")
                raise e
        
        # Call OpenAI API
        try:
            if verbose:
                print(f"Calling OpenAI API for deal {deal_id}...")
            result = client.images.edit(
                model="gpt-image-1",
                image=image_files,
                prompt=prompt,
                size="1536x1024",
                quality="high",
                background="auto",
                n=1
            )
            if verbose:
                print(f"OpenAI API call successful for deal {deal_id}")
        except Exception as e:
            if verbose:
                print(f"OpenAI API error for deal {deal_id}: {str(e)}")
            # Close file handles before raising
            for f in image_files:
                f.close()
            raise e
        
        # Process and save the response
        image_base64 = result.data[0].b64_json
        image_bytes = base64.b64decode(image_base64)
        with open(output_filename, "wb") as f:
            f.write(image_bytes)
            
        if verbose:
            print(f"Image saved successfully for deal {deal_id}")
            
        # Close file handles
        for f in image_files:
            f.close()
            
        # Delete temporary files
        for filename in temp_filenames:
            if os.path.exists(filename):
                try:
                    os.remove(filename)
                except:
                    pass
        
        # Process token usage details silently
        token_info = {}
        if hasattr(result, 'usage'):
            total_tokens = result.usage.total_tokens
            input_tokens = result.usage.input_tokens
            output_tokens = result.usage.output_tokens
            input_text_tokens = result.usage.input_tokens_details.text_tokens
            input_image_tokens = result.usage.input_tokens_details.image_tokens
            
            token_info["Total tokens"] = str(total_tokens)
            token_info["Input tokens"] = str(input_tokens)
            token_info["Output tokens"] = str(output_tokens)
            token_info["Input text tokens"] = str(input_text_tokens)
            token_info["Input image tokens"] = str(input_image_tokens)
            
            # Calculate cost
            cost = (input_text_tokens * 5 + input_image_tokens * 10 + output_tokens * 40) / 1000000
            token_info["Cost"] = f"${cost:.6f}"
            
            if verbose:
                print(f"Cost for deal {deal_id}: ${cost:.6f}")
        
        return output_filename, original_extension, token_info, prompt
        
    except Exception as e:
        if verbose:
            print(f"FINAL ERROR for deal {deal_id}: {str(e)}")
            import traceback
            print(f"Full traceback: {traceback.format_exc()}")
        return None, None, None, None
    
async def process_deals_async(deals_df, max_workers=4, verbose=False):
    """
    Process multiple deals asynchronously with clean, minimal output
    """
    results = []
    
    if verbose:
        print(f"Available columns in dataframe: {list(deals_df.columns)}")
    
    # Create a temporary directory for image files
    with tempfile.TemporaryDirectory() as temp_dir:
        # Use ThreadPoolExecutor for parallelization
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Create tasks for all deals
            futures = []
            for idx, row in deals_df.iterrows():
                # Get deal_id, original_id, and vertical from the actual column names
                deal_id = row.get('id')
                original_id = row.get('image_id_pos_0', 'main')
                vertical = row.get('vertical', 'LOCAL')  # Default to LOCAL if not specified
                category_name = row.get('category_name', '')  # Get category_name from row
                sub_category_name = row.get('sub_category_name', '')  # Get sub_category_name from row
                    
                if deal_id is None:
                    if verbose:
                        print(f"Warning: Could not find deal ID in row: {row}")
                    continue
                    
                future = executor.submit(generate_image_integrated, deal_id, original_id, temp_dir, vertical, row.get('category_name', ''), verbose)
                futures.append((future, deal_id, original_id, row))
            # Create just ONE overall progress bar
            print(f"Processing {len(futures)} deals...")
            progress_bar = tqdm(total=len(futures), desc="Overall progress")
            
            # Process results as they complete
            for future, deal_id, original_id, row in futures:
                try:
                    image_path, extension, token_info, prompt = future.result()
                    
                    if image_path:
                        # Read the image file
                        with open(image_path, 'rb') as img_file:
                            img_content = img_file.read()
                        
                        # Upload to S3 with correct extension
                        s3_key = f"images/deal/{deal_id}/{original_id}_variant.{extension}"
                        s3_url = upload_to_s3(img_content, 'static.wowcher.co.uk', s3_key)
                        
                        # Add to results
                        result_row = row.to_dict()
                        result_row.update({
                            'status': 'success',
                            's3_url': s3_url,
                            'token_info': token_info,
                            'extension': extension,
                            'processed_timestamp': pd.Timestamp.now(),
                            'prompt': prompt
                        })
                        results.append(result_row)
                    else:
                        # Add failure to results
                        result_row = row.to_dict() 
                        result_row.update({
                            'status': 'failed',
                            'error': 'Image generation failed',
                            'processed_timestamp': pd.Timestamp.now()
                        })
                        results.append(result_row)
                        
                except Exception as e:
                    if verbose:
                        print(f"Error processing deal {deal_id}: {str(e)}")
                    result_row = row.to_dict()
                    result_row.update({
                        'status': 'failed',
                        'error': str(e),
                        'processed_timestamp': pd.Timestamp.now()
                    })
                    results.append(result_row)
                
                # Update progress bar
                progress_bar.update(1)
            
            # Close progress bar
            progress_bar.close()
    
    return pd.DataFrame(results)

## Run the Process

Execute the async processing and display results

In [118]:
deals_sample = deals_df.head(1)

print(f"Selected {len(deals_sample)} deals")

# Process deals asynchronously WITH VERBOSE LOGGING
results_df = await process_deals_async(deals_sample, max_workers=50, verbose=False)

# Filter to get only successful results (the "winners")
winners_df = results_df[results_df['status'] == 'success'].copy()

print(f"\nSuccessful generations: {len(winners_df)} out of {len(results_df)}")

# Display the winners
display(winners_df)

# Calculate total cost for successful generations
if 'token_info' in winners_df.columns:
    total_cost = 0.0
    for _, row in winners_df.iterrows():
        if 'token_info' in row and row['token_info'] and 'Cost' in row['token_info']:
            cost_str = row['token_info']['Cost'].replace('$', '')
            try:
                total_cost += float(cost_str)
            except:
                pass
    print(f"Total cost for successful generations: ${total_cost:.4f}")

Selected 1 deals
Processing 1 deals...


Overall progress:   0%|          | 0/1 [00:00<?, ?it/s]


Successful generations: 0 out of 1


Unnamed: 0,id,email_subject,category_name,vertical,sub_category_name,revenue_last_7_days,revenue_rank,image_id_pos_0,image_url_pos_0,extension,status,error,processed_timestamp


In [None]:
image_urls = winners_df['s3_url'].to_list()
import requests 
import json
def purge_cache(image_urls):
    chunk_size = 30 # cloudflare purge api takes max 30 urls per request
    chunked_list = [image_urls[i:i+chunk_size] for i in range(0, len(image_urls), chunk_size)]
    api_url = "https://api.cloudflare.com/client/v4/zones/4fec7e02d5c45deb9f67452873708896/purge_cache"
    api_key = "Bearer IT-Lr8A8LOipKdcGKkJU8Q5ciM8jfy3KbtfWkDnK"
    headers = {"Authorization": api_key,
               "Content-Type":"application/json"
               }
    for chunk in chunked_list:
        payload = {"files": []}
        #print("Clearing cache of: ", chunk)
        payload['files'] = chunk
        response = requests.post(url=api_url, data=json.dumps(payload), headers=headers)
        print(response.json())
purge_cache(image_urls)


In [103]:
from IPython.display import Image, display
import time
import random

# Display a random image from the winners_df with a timestamp to ensure freshness
if not winners_df.empty:
    random_index = random.randint(0, len(winners_df) - 1)
    fresh_url = f"{winners_df['s3_url'][random_index]}?timestamp={int(time.time())}"
    original_url = f"{winners_df['image_url_pos_0'][random_index]}?timestamp={int(time.time())}"
    display(Image(url=original_url, width=500))
    display(Image(url=fresh_url, width=500))

In [97]:
winners_df.to_csv('national_top_up.csv', index=False)

In [61]:
winners_df.columns

Index(['id', 'email_subject', 'category_name', 'vertical', 'sub_category_name',
       'revenue_last_7_days', 'revenue_rank', 'image_id_pos_0',
       'image_url_pos_0', 'extension', 'status', 's3_url', 'token_info',
       'processed_timestamp', 'prompt', 'error'],
      dtype='object')