# Image Variant Generation with S3 Storage

This notebook:
1. Queries a database for deal IDs
2. Generates variant images using OpenAI
3. Stores images in S3
4. Implements async processing for efficiency

In [1]:
import pandas as pd
import os
import psycopg2
from dotenv import load_dotenv
import boto3
import time
import base64
import asyncio
import aiohttp
import tempfile
import sys
from botocore.exceptions import NoCredentialsError
from concurrent.futures import ThreadPoolExecutor
import subprocess
import json
from io import BytesIO

# Load environment variables
load_dotenv()

# Configure AWS credentials
s3_client = boto3.client(
    's3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY')
)

## Query Database for Deal IDs

Execute SQL to get deal information including:
- deal_voucher_id
- original_image_id
- variant_image_id
- batch_name
- enter_test_ts
- exit_test_ts
- open_ai_prompt

In [4]:
def get_deals_for_processing():
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST", "bi-redshift.intwowcher.co.uk"),
        port=os.environ.get("REDSHIFT_PORT", "5439"),
        dbname=os.environ.get("REDSHIFT_DBNAME", "wowdwhprod"),
        user=os.environ.get("REDSHIFT_USER", "jenkins"),
        password=os.environ.get("REDSHIFT_PASSWORD", "9SDy1ffdfTV7")
    )
    
    # Example query - modify as needed
    query = """
WITH visitors AS (
    SELECT
        deal_id_evar,
        COUNT(DISTINCT visitor_id) AS visitors
    FROM real.omniture_events
    WHERE trunc(date_time) >= trunc(sysdate) - 7
      AND product = 'wowdtm'
      AND (
            url_evar LIKE '%/deal/%' OR
            url_evar LIKE '%/e/%' OR
            url_evar LIKE '%/email-deals/%'
          )
    GROUP BY deal_id_evar
)
SELECT
    CAST(dv.id AS INTEGER) AS id,
    dv.email_subject,
    dvc.name AS category_name,
    dvsc.name AS sub_category_name,
    CAST(COALESCE(v.visitors, 0) AS INTEGER) AS visitors_last_7_days,
    CAST(rank() OVER (ORDER BY COALESCE(v.visitors, 0) DESC) AS INTEGER) AS visitor_rank,
    CAST(dvi.id AS INTEGER) AS image_id_pos_0,
    'https://static.wowcher.co.uk/images/deal/' || dvi.deal_voucher_id || '/' || dvi.id || '.' || dvi.extension AS image_url_pos_0,
    dvi.extension
FROM real.deal_voucher dv
JOIN real.product p ON p.id = dv.id AND p.status_id = 1
LEFT JOIN visitors v ON v.deal_id_evar = dv.id
LEFT JOIN real.deal_voucher_site dvs ON dvs.deal_voucher_id = dv.id
LEFT JOIN real.deal_voucher_image dvi ON dvi.deal_voucher_id = dv.id AND dvi.position = 0
LEFT JOIN real.deal_voucher_category dvc ON dvc.id = dv.category_id
LEFT JOIN real.deal_voucher_sub_category dvsc ON dvsc.id = dv.sub_category_id
LEFT JOIN real.site s ON s.id = dv.deal_location_id AND s.site_name = 'National Deal'
WHERE trunc(dv.closing_date) >= trunc(sysdate)
AND dv.currency = 'GBP'
AND NOT EXISTS (
    SELECT 1
    FROM temp.opt_image_variants oiv
    WHERE oiv.deal_voucher_id = dv.id
    AND (
        ((batch_name ILIKE '%manual%' AND status IN (1,3,5))
        OR (batch_name = 'OPEN AI Images' AND status IN (1,3))
        OR (batch_name NOT IN ('Manual Opt', 'OPEN AI Images') AND status = 1)
        )
    )
)
and dvc.name = 'Garden'
AND dvc.canonical_path_type = 'NATIONAL'
GROUP BY dv.id, dv.email_subject, dvc.name, dvsc.name, dvi.id, dvi.deal_voucher_id, dvi.extension, v.visitors
ORDER BY COALESCE(v.visitors, 0) DESC
LIMIT 2000;
    """
    
    df = pd.read_sql(query, conn)
    conn.close()
    
    return df
# Get deals to process
deals_df = get_deals_for_processing()
deals_df.head()

  df = pd.read_sql(query, conn)


Unnamed: 0,id,email_subject,category_name,sub_category_name,visitors_last_7_days,visitor_rank,image_id_pos_0,image_url_pos_0,extension
0,40499780,Five Tier Metal Storage Shelf,Garden,Garden Buildings & Storage Solutions,8418,1,1642547,https://static.wowcher.co.uk/images/deal/40499...,jpg
1,28071319,5ft Canary Island Date Palm Tree,Garden,"Gardening, Plants & Flowers",4719,2,1602784,https://static.wowcher.co.uk/images/deal/28071...,jpg
2,40479560,Premium Outdoor BBQ Gazebo,Garden,Garden Furniture,4710,3,1627162,https://static.wowcher.co.uk/images/deal/40479...,jpg
3,40064660,Heavy Textoline Zero Gravity Garden Chairs,Garden,Garden Furniture,1867,4,1595029,https://static.wowcher.co.uk/images/deal/40064...,jpg
4,40556411,Berlin Four Seater Corner Sofa Set,Garden,Garden Furniture,1720,5,1631090,https://static.wowcher.co.uk/images/deal/40556...,jpg


In [15]:
df_already_made_deals = pd.read_csv('all_deals.csv')
deals_df = deals_df[~deals_df['id'].isin(df_already_made_deals['id'])]
deals_df = deals_df.head(500) 
deals_df

Unnamed: 0,id,email_subject,category_name,sub_category_name,visitors_last_7_days,visitor_rank,image_id_pos_0,image_url_pos_0,extension
0,40499780,Five Tier Metal Storage Shelf,Garden,Garden Buildings & Storage Solutions,8418,1,1642547,https://static.wowcher.co.uk/images/deal/40499...,jpg
2,40479560,Premium Outdoor BBQ Gazebo,Garden,Garden Furniture,4710,3,1627162,https://static.wowcher.co.uk/images/deal/40479...,jpg
3,40064660,Heavy Textoline Zero Gravity Garden Chairs,Garden,Garden Furniture,1867,4,1595029,https://static.wowcher.co.uk/images/deal/40064...,jpg
4,40556411,Berlin Four Seater Corner Sofa Set,Garden,Garden Furniture,1720,5,1631090,https://static.wowcher.co.uk/images/deal/40556...,jpg
6,36844066,Modern Rattan 4 Seater Garden Set,Garden,Garden Furniture,1699,7,1635712,https://static.wowcher.co.uk/images/deal/36844...,jpg
...,...,...,...,...,...,...,...,...,...
920,28013809,Outsunny Resin Rattan Dining Set,Garden,Garden Furniture,16,920,1094127,https://static.wowcher.co.uk/images/deal/28013...,jpg
921,28813810,Outsunny Gazebo Party Tent,Garden,Garden Games & Leisure,16,920,1143520,https://static.wowcher.co.uk/images/deal/28813...,jpg
923,40500442,Wooden Folding Picnic Wine Table,Garden,BBQ & Picnic,16,920,1620231,https://static.wowcher.co.uk/images/deal/40500...,jpg
925,40351892,Topiary Buxus Ball 1 or 2 Potted Plants,Garden,"Gardening, Plants & Flowers",16,920,1604724,https://static.wowcher.co.uk/images/deal/40351...,jpg


## S3 Upload Functions

Functions to upload generated images to S3

In [11]:
# Split the dataframe into 10 parts
df_splits = [deals_df.iloc[i:i + 1000] for i in range(0, len(deals_df), 1000)]
# Assign each split to a separate variable
df_1000, df_2000, df_3000, df_4000, df_5000 = df_splits

ValueError: not enough values to unpack (expected 5, got 1)

In [12]:
def upload_to_s3(file_content, bucket_name, s3_key):
    """
    Upload a file to S3
    
    Parameters:
    - file_content: Binary content of the file
    - bucket_name: S3 bucket name
    - s3_key: Path in S3 where file will be stored
    
    Returns:
    - URL of the uploaded file
    """
    try:
        # Determine content type based on file extension
        extension = os.path.splitext(s3_key)[1].lower()
        content_type = 'image/jpeg' if extension in ['.jpg', '.jpeg'] else \
                      'image/png' if extension == '.png' else \
                      'image/webp' if extension == '.webp' else \
                      'application/octet-stream'
                      
        s3_client.put_object(
            Body=file_content,
            Bucket=bucket_name,
            Key=s3_key,
            ContentType=content_type
        )
        return f"https://static.wowcher.co.uk/{s3_key}"
    except NoCredentialsError:
        print("Credentials not available")
        return None

## Image Generation

Function to call the generate_image.py script and process the results

In [13]:
# Cell to replace external script dependency with integrated functionality
import pandas as pd
import os
import psycopg2
from dotenv import load_dotenv
import requests
import base64
from openai import OpenAI
from io import BytesIO

# Load environment variables if not already done
if 'client' not in locals():
    load_dotenv()
    client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
    print(f"OpenAI client initialized with API key: {os.getenv('OPEN_AI_API_KEY')[:5]}...")

def get_deal_data_for_image(deal_id):
    """Get deal data needed for image generation"""
    # Establish connection to Redshift
    conn = psycopg2.connect(
        host=os.environ.get("REDSHIFT_HOST", "bi-redshift.intwowcher.co.uk"),
        port=os.environ.get("REDSHIFT_PORT", "5439"),
        dbname=os.environ.get("REDSHIFT_DBNAME", "wowdwhprod"),
        user=os.environ.get("REDSHIFT_USER", "jenkins"),
        password=os.environ.get("REDSHIFT_PASSWORD", "9SDy1ffdfTV7")
    )

    # Get email subject
    email_subject_query = """
    SELECT email_subject 
    FROM wowdwhprod.real.deal_voucher
    WHERE id = %s
    """
    with conn.cursor() as cur:
        cur.execute(email_subject_query, (deal_id,))
        email_subject_result = cur.fetchone()
        email_subject = email_subject_result[0] if email_subject_result else "Deal"

    # Get image URLs and extract extension information
    image_query = """
    SELECT 
        'https://static.wowcher.co.uk/images/deal/' || deal_voucher_id || '/' || id || '.' || extension AS image_url,
        extension
    FROM wowdwhprod.real.deal_voucher_image
    WHERE deal_voucher_id = %s
    ORDER BY position
    LIMIT 10
    """
    with conn.cursor() as cur:
        cur.execute(image_query, (deal_id,))
        image_results = cur.fetchall()
        image_urls = [row[0] for row in image_results]
        extensions = [row[1] for row in image_results]
        original_extension = extensions[0] if extensions else "png"

    # Get highlights
    highlights_query = """
    SELECT
    SPLIT_PART(highlight, ':', 1) AS highlight
    FROM wowdwhprod.real.deal_voucher_highlight
    WHERE deal_voucher_id = %s
    LIMIT 3;

    """
    with conn.cursor() as cur:
        cur.execute(highlights_query, (deal_id,))
        highlights_results = cur.fetchall()
        highlights = [row[0] for row in highlights_results]

    conn.close()

    # Build the prompt
    formatted_highlights = "\n".join([f"• {h}" for h in highlights]) if highlights else ""
    prompt = f"""
Create ONE high-resolution hero image advertising **{email_subject}**.

Final image must contain **zero spelling mistakes**.  

1. **Source images** – You have multiple angles.  
   • Accurately represent the product; do **not** invent new colours or features.  
   • If colour variants exist, PICK ONE colour and keep it consistent, though you should highlight the colours available in the add or the fact that there are multiple colours.
   - do not put any prices in the image. 

2. **Scene & background**  
   • Place the product in a realistic, aspirational environment that makes sense for its use.  
   • Adjust lighting and depth of field so the product is the clear focal point.  
   • Background must not overpower or obscure the product.

3. **Infographic & text elements**  
    • Do **not** repeat the headline anywhere else in the artwork.  
    • Any additional text or graphics must be limited to the 2-4 call-outs listed below.
    - Try and place the additional text or stickers on the left of the image. 
   • Overlay 2-4 concise call-outs drawn from these highlights:  
     {formatted_highlights}  
   • Position all call-outs **outside** the bottom-right 20% of the frame.

4. **Design constraints**  
   • Keep bottom-right area completely free of any graphics or text. 
   • Maintain 4 px padding around all text boxes.  
   • No brand logos unless provided in the source images.

    """
    return {
        'prompt': prompt,
        'image_urls': image_urls,
        'original_extension': original_extension
    }

def download_image_to_file(url, filename):
    """Download an image from URL and save to file"""
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)
        return filename
    else:
        raise Exception(f"Failed to download image from {url}")
# Add this to your imports
from tqdm.notebook import tqdm
import urllib.request
import logging

# Set up logging to control verbosity
logging.basicConfig(level=logging.WARNING)  # Set to WARNING to hide INFO and DEBUG messages

def generate_image_integrated(deal_id, original_id, temp_dir, verbose=False):
    """
    Generate image using OpenAI's API with minimal output
    """
    try:
        if verbose:
            print(f"Processing deal {deal_id}")
        
        # Get data for the deal
        deal_data = get_deal_data_for_image(deal_id)
        prompt = deal_data['prompt']
        image_urls = deal_data['image_urls']
        original_extension = deal_data['original_extension']
        
        # Create output filename
        output_filename = os.path.join(temp_dir, f"variant_{deal_id}_{original_id}.{original_extension}")
        
        if not image_urls:
            raise Exception("No images found for this deal")
        
        # Download images silently
        image_files = []
        temp_filenames = []
        for idx, url in enumerate(image_urls[:16]):
            temp_filename = os.path.join(temp_dir, f"temp_image_{deal_id}_{idx}.png")
            download_image_to_file(url, temp_filename)
            temp_filenames.append(temp_filename)
            image_files.append(open(temp_filename, "rb"))
        
        # Call OpenAI API
        result = client.images.edit(
            model="gpt-image-1",
            image=image_files,
            prompt=prompt,
            size="1536x1024",
            quality="high",
            background="auto",
            n=1
        )
        
        # Process and save the response
        image_base64 = result.data[0].b64_json
        image_bytes = base64.b64decode(image_base64)
        with open(output_filename, "wb") as f:
            f.write(image_bytes)
            
        # Close file handles
        for f in image_files:
            f.close()
            
        # Delete temporary files
        for filename in temp_filenames:
            if os.path.exists(filename):
                try:
                    os.remove(filename)
                except:
                    pass
        
        # Process token usage details silently
        token_info = {}
        if hasattr(result, 'usage'):
            total_tokens = result.usage.total_tokens
            input_tokens = result.usage.input_tokens
            output_tokens = result.usage.output_tokens
            input_text_tokens = result.usage.input_tokens_details.text_tokens
            input_image_tokens = result.usage.input_tokens_details.image_tokens
            
            token_info["Total tokens"] = str(total_tokens)
            token_info["Input tokens"] = str(input_tokens)
            token_info["Output tokens"] = str(output_tokens)
            token_info["Input text tokens"] = str(input_text_tokens)
            token_info["Input image tokens"] = str(input_image_tokens)
            
            # Calculate cost
            cost = (input_text_tokens * 5 + input_image_tokens * 10 + output_tokens * 40) / 1000000
            token_info["Cost"] = f"${cost:.6f}"
            
            if verbose:
                print(f"Cost: ${cost:.6f}")
        
        return output_filename, original_extension, token_info
        
    except Exception as e:
        if verbose:
            print(f"Error generating image for deal {deal_id}: {str(e)}")
        return None, None, None

async def process_deals_async(deals_df, max_workers=4, verbose=False):
    """
    Process multiple deals asynchronously with clean, minimal output
    """
    results = []
    
    if verbose:
        print(f"Available columns in dataframe: {list(deals_df.columns)}")
    
    # Create a temporary directory for image files
    with tempfile.TemporaryDirectory() as temp_dir:
        # Use ThreadPoolExecutor for parallelization
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Create tasks for all deals
            futures = []
            for idx, row in deals_df.iterrows():
                # Get deal_id and original_id from the actual column names
                deal_id = row.get('id')
                original_id = row.get('image_id_pos_0', 'main')
                    
                if deal_id is None:
                    if verbose:
                        print(f"Warning: Could not find deal ID in row: {row}")
                    continue
                    
                future = executor.submit(generate_image_integrated, deal_id, original_id, temp_dir, verbose)
                futures.append((future, deal_id, original_id, row))
            
            # Create just ONE overall progress bar
            print(f"Processing {len(futures)} deals...")
            progress_bar = tqdm(total=len(futures), desc="Overall progress")
            
            # Process results as they complete
            for future, deal_id, original_id, row in futures:
                try:
                    image_path, extension, token_info = future.result()
                    
                    if image_path:
                        # Read the image file
                        with open(image_path, 'rb') as img_file:
                            img_content = img_file.read()
                        
                        # Upload to S3 with correct extension
                        s3_key = f"images/deal/{deal_id}/{original_id}_variant.{extension}"
                        s3_url = upload_to_s3(img_content, 'static.wowcher.co.uk', s3_key)
                        
                        # Add to results
                        result_row = row.to_dict()
                        result_row.update({
                            'status': 'success',
                            's3_url': s3_url,
                            'token_info': token_info,
                            'extension': extension,
                            'processed_timestamp': pd.Timestamp.now()
                        })
                        results.append(result_row)
                    else:
                        # Add failure to results
                        result_row = row.to_dict()
                        result_row.update({
                            'status': 'failed',
                            'error': 'Image generation failed',
                            'processed_timestamp': pd.Timestamp.now()
                        })
                        results.append(result_row)
                        
                except Exception as e:
                    if verbose:
                        print(f"Error processing deal {deal_id}: {str(e)}")
                    result_row = row.to_dict()
                    result_row.update({
                        'status': 'failed',
                        'error': str(e),
                        'processed_timestamp': pd.Timestamp.now()
                    })
                    results.append(result_row)
                
                # Update progress bar
                progress_bar.update(1)
            
            # Close progress bar
            progress_bar.close()
    
    return pd.DataFrame(results)

OpenAI client initialized with API key: sk-sv...


## Run the Process

Execute the async processing and display results

In [14]:
deals_sample = deals_df

print(f"Selected {len(deals_sample)} deals")

# Process deals asynchronously
results_df = await process_deals_async(deals_sample, max_workers=50, verbose=False)

# Filter to get only successful results (the "winners")
winners_df = results_df[results_df['status'] == 'success'].copy()
print(f"\nSuccessful generations: {len(winners_df)} out of {len(results_df)}")

# Display the winners
display(winners_df)

# Calculate total cost for successful generations
if 'token_info' in winners_df.columns:
    total_cost = 0.0
    for _, row in winners_df.iterrows():
        if 'token_info' in row and row['token_info'] and 'Cost' in row['token_info']:
            cost_str = row['token_info']['Cost'].replace('$', '')
            try:
                total_cost += float(cost_str)
            except:
                pass
    print(f"Total cost for successful generations: ${total_cost:.4f}")
    
winners_df.to_csv('4000.csv')

Selected 500 deals
Processing 500 deals...


Overall progress:   0%|          | 0/500 [00:00<?, ?it/s]


Successful generations: 496 out of 500


Unnamed: 0,id,email_subject,category_name,sub_category_name,visitors_last_7_days,visitor_rank,image_id_pos_0,image_url_pos_0,extension,status,s3_url,token_info,processed_timestamp,error
0,40499780,Five Tier Metal Storage Shelf,Garden,Garden Buildings & Storage Solutions,8418,1,1642547,https://static.wowcher.co.uk/images/deal/40499...,jpg,success,https://static.wowcher.co.uk/images/deal/40499...,"{'Total tokens': '10038', 'Input tokens': '383...",2025-06-02 11:01:55.438828,
1,40479560,Premium Outdoor BBQ Gazebo,Garden,Garden Furniture,4710,3,1627162,https://static.wowcher.co.uk/images/deal/40479...,jpg,success,https://static.wowcher.co.uk/images/deal/40479...,"{'Total tokens': '8996', 'Input tokens': '2788...",2025-06-02 11:01:55.771517,
2,40064660,Heavy Textoline Zero Gravity Garden Chairs,Garden,Garden Furniture,1867,4,1595029,https://static.wowcher.co.uk/images/deal/40064...,jpg,success,https://static.wowcher.co.uk/images/deal/40064...,"{'Total tokens': '10043', 'Input tokens': '383...",2025-06-02 11:01:56.014049,
3,40556411,Berlin Four Seater Corner Sofa Set,Garden,Garden Furniture,1720,5,1631090,https://static.wowcher.co.uk/images/deal/40556...,jpg,success,https://static.wowcher.co.uk/images/deal/40556...,"{'Total tokens': '8999', 'Input tokens': '2791...",2025-06-02 11:02:00.752668,
4,36844066,Modern Rattan 4 Seater Garden Set,Garden,Garden Furniture,1699,7,1635712,https://static.wowcher.co.uk/images/deal/36844...,jpg,success,https://static.wowcher.co.uk/images/deal/36844...,"{'Total tokens': '9699', 'Input tokens': '3491...",2025-06-02 11:02:01.086987,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,28013809,Outsunny Resin Rattan Dining Set,Garden,Garden Furniture,16,920,1094127,https://static.wowcher.co.uk/images/deal/28013...,jpg,success,https://static.wowcher.co.uk/images/deal/28013...,"{'Total tokens': '8306', 'Input tokens': '2098...",2025-06-02 11:13:30.542320,
496,28813810,Outsunny Gazebo Party Tent,Garden,Garden Games & Leisure,16,920,1143520,https://static.wowcher.co.uk/images/deal/28813...,jpg,success,https://static.wowcher.co.uk/images/deal/28813...,"{'Total tokens': '8647', 'Input tokens': '2439...",2025-06-02 11:13:30.755218,
497,40500442,Wooden Folding Picnic Wine Table,Garden,BBQ & Picnic,16,920,1620231,https://static.wowcher.co.uk/images/deal/40500...,jpg,success,https://static.wowcher.co.uk/images/deal/40500...,"{'Total tokens': '8677', 'Input tokens': '2469...",2025-06-02 11:13:41.421320,
498,40351892,Topiary Buxus Ball 1 or 2 Potted Plants,Garden,"Gardening, Plants & Flowers",16,920,1604724,https://static.wowcher.co.uk/images/deal/40351...,jpg,success,https://static.wowcher.co.uk/images/deal/40351...,"{'Total tokens': '8350', 'Input tokens': '2142...",2025-06-02 11:13:41.618241,


Total cost for successful generations: $133.3580


### Moving winners into test


In [7]:
import pandas as pd

df = pd.read_csv('All500Approved.csv', index_col=0)
df = df.head(1)

In [None]:
# Import necessary libraries
import pandas as pd
import boto3
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.notebook import tqdm
import psycopg2
from datetime import datetime

# Configuration variables
BATCH_NAME = "AI_Replicate_rattan"
MAX_WORKERS = 10
CSV_OUTPUT_FILE = 'processed_images_with_variants.csv'
DB_RECORDS_FILE = 'db_records.csv'
REDSHIFT_TABLE = 'temp.opt_image_variants'

# Assuming results_df is your dataframe with the processed images
# If you need to load it from CSV instead:
# results_df = pd.read_csv('Rattan_FurnitureALL.csv')

def process_single_image(args):
    index, row, s3_client, bucket_name = args
    try:
        # Get the generated image URL
        generated_url = row['generated_url']
        deal_id = str(row['deal_id'])
        image_id = str(row['image_id'])
        
        # Skip if no generated URL
        if pd.isna(generated_url):
            print(f"Skipping row {index}: No generated URL")
            return index, None, None
            
        # Download the generated image
        response = requests.get(generated_url)
        if response.status_code != 200:
            print(f"Failed to download image for deal {deal_id}")
            return index, None, None
            
        # Create variant image ID (original ID * 100000)
        variant_image_id = int(image_id) * 100000
        
        # Create new key with variant ID
        new_key = f"images/deal/{deal_id}/{variant_image_id}.jpg"
        
        # Upload to S3
        s3_client.put_object(
            Bucket=bucket_name,
            Key=new_key,
            Body=response.content,
            ContentType='image/jpeg',
            CacheControl='no-cache'
        )
        
        # Return the final URL
        final_url = f"https://{bucket_name}/{new_key}"
        return index, final_url, variant_image_id
        
    except Exception as e:
        print(f"Error processing deal {deal_id}: {str(e)}")
        return index, None, None

def copy_generated_images(df):
    # Connect to S3
    s3_client = boto3.client('s3', **AWS_CONFIG)
    bucket_name = S3_CONFIG['bucket_name']
    
    # Add new columns for final URL and variant image ID
    df['final_url'] = None
    df['variant_image_id'] = None

    # Create arguments for each row
    args_list = [(idx, row, s3_client, bucket_name) 
                 for idx, row in df.iterrows()]

    # Process images concurrently using ThreadPoolExecutor
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [executor.submit(process_single_image, args) 
                  for args in args_list]
        
        with tqdm(total=len(df), desc="Copying Images") as pbar:
            for future in as_completed(futures):
                idx, final_url, variant_id = future.result()
                if final_url:
                    df.loc[idx, 'final_url'] = final_url
                    df.loc[idx, 'variant_image_id'] = variant_id
                pbar.update(1)

    print("Processing complete")
    return df


def copy_s3_to_redshift(s3_url):
    """Copy data from S3 to Redshift"""
    try:
        connection = psycopg2.connect(**REDSHIFT_CONFIG)
        
        cursor = connection.cursor()
        
        s3_path = s3_url.replace(f'https://{S3_CONFIG["bucket_name"]}/', f's3://{S3_CONFIG["bucket_name"]}/')
        
        # Added explicit column mapping
        copy_command = f"""
        COPY {REDSHIFT_TABLE}(
            deal_voucher_id,
            claid_prompt,
            status,
            original_image_id,
            variant_image_id,
            batch_name, 
            enter_test_ts
        )
        FROM '{s3_path}'
        ACCESS_KEY_ID '{AWS_CONFIG["aws_access_key_id"]}'
        SECRET_ACCESS_KEY '{AWS_CONFIG["aws_secret_access_key"]}'
        CSV
        IGNOREHEADER 1
        ACCEPTINVCHARS AS '^'
        MAXERROR 10;
        """
        
        cursor.execute(copy_command)
        connection.commit()
        
        cursor.execute(f"SELECT COUNT(*) FROM {REDSHIFT_TABLE} WHERE batch_name = '{BATCH_NAME}'")
        row_count = cursor.fetchone()[0]
        
        print(f"Successfully copied {row_count} rows to Redshift table")
        
    except Exception as e:
        print(f"Error copying to Redshift: {str(e)}")
        if 'connection' in locals() and connection:
            connection.rollback()
    finally:
        if 'cursor' in locals() and cursor:
            cursor.close()
        if 'connection' in locals() and connection:
            connection.close()

# Use the function with the S3 URL from previous upload
if s3_url:
    copy_s3_to_redshift(s3_url)
    
def update_test_list():
    """Update the test list in the API after adding new variants"""
    try:
        # Initialize connection
        connection = psycopg2.connect(**REDSHIFT_CONFIG)
        cursor = connection.cursor()
        
        # Get all active image IDs
        cursor.execute("""
            SELECT original_image_id 
            FROM temp.opt_image_variants
            WHERE status = 1
            GROUP BY original_image_id
        """)
        
        # Format image IDs for API
        image_ids = [f":{str(row[0])}" for row in cursor.fetchall()]
        
        # Set up API headers
        headers = {
            "x-wowsecret": API_CONFIG['secret'],
            "Content-Type": "application/json",
            'Cookie': 'coreCookie=react; dp=c; landing_page=a; landing_page2=a'
        }
        
        # Update the API with the list
        response = requests.post(
            "https://www.wowcher.co.uk/deal-variant-db/deal/set?dv_id=imgv_list_wow_uk",
            headers=headers,
            json=image_ids
        )
        response.raise_for_status()
        
        print(f"Successfully updated test list with {len(image_ids)} images")
        return True
        
    except Exception as e:
        print(f"Error updating test list: {e}")
        return False
    finally:
        if 'cursor' in locals() and cursor:
            cursor.close()
        if 'connection' in locals() and connection:
            connection.close()

# Use the function with the S3 URL from previous upload
if s3_url:
    print("Updating test list with new variants...")
    update_test_list()
else:
    print("Skipping test list update due to database error")