# Process Approved Variants

This notebook:
1. Loads approved variants from the CSV file
2. Copies variants to S3 with new variant IDs (original ID * 100000)
3. Updates the Redshift database
4. Updates the API image list

In [10]:
# Import the module with all the functions
from process_approved_variants import *

In [23]:
import pandas as pd
df = pd.read_csv('gfurnitureapproved.csv')
df = df.drop(columns=['Unnamed: 0'])
df_rejected = df[df['review_result']!='approved']


## 1. Load and filter approved images

In [24]:
import pandas as pd
import json
import re


# Function to safely parse the token info column
def extract_cost(token_info):
    try:
        # Handle if it's already a dict
        if isinstance(token_info, dict):
            return float(token_info.get('Cost', '0').replace('$', ''))
        
        # Try to parse as JSON if it's a string
        if isinstance(token_info, str):
            # Clean up any potential invalid JSON formatting
            cleaned = token_info.replace("'", '"')
            data = json.loads(cleaned)
            return float(data.get('Cost', '0').replace('$', ''))
    except:
        # If JSON parsing fails, try regex
        try:
            cost_match = re.search(r"'Cost': '\$([\d.]+)'", str(token_info))
            if cost_match:
                return float(cost_match.group(1))
        except:
            pass
    return 0.0

# Extract cost from token info column
df['extracted_cost'] = df['token_info'].apply(extract_cost)

# Calculate total cost and count by review result
cost_summary = df.groupby('review_result').agg(
    total_cost=('extracted_cost', 'sum'),
    count=('extracted_cost', 'count')
).reset_index()

# Format the cost with $ sign
cost_summary['total_cost'] = cost_summary['total_cost'].apply(lambda x: f"${x:.6f}")

print("Cost summary by approval status:")
print(cost_summary)

# Calculate overall total
overall_total_cost = df['extracted_cost'].sum()
overall_count = len(df)

print(f"\nOverall total cost: ${overall_total_cost:.6f}")
print(f"Overall count: {overall_count}")

Cost summary by approval status:
  review_result  total_cost  count
0      approved  $89.378990    333
1      rejected  $10.990325     41

Overall total cost: $100.369315
Overall count: 374


In [25]:
approved_df = df[df['review_result']=='approved']
len(approved_df)

333

## 2. Prepare data for Redshift

In [26]:
# Prepare the data for Redshift
redshift_df = prepare_for_redshift(approved_df)

# Display the first few rows of the prepared data
redshift_df.head()

Preparing data for Redshift...
Prepared 333 rows for Redshift


Unnamed: 0,deal_voucher_id,claid_prompt,status,original_image_id,variant_image_id,batch_name,enter_test_ts,exit_test_ts,list_name,s3_url,original_url
1,23894300,,1,898038,89803800000,OPEN AI Images,2025-06-05 11:27:34,,imgv_list_wow_uk,https://static.wowcher.co.uk/images/deal/23894...,https://static.wowcher.co.uk/images/deal/23894...
2,32597979,,1,1346322,134632200000,OPEN AI Images,2025-06-05 11:27:34,,imgv_list_wow_uk,https://static.wowcher.co.uk/images/deal/32597...,https://static.wowcher.co.uk/images/deal/32597...
3,24108244,,1,906653,90665300000,OPEN AI Images,2025-06-05 11:27:34,,imgv_list_wow_uk,https://static.wowcher.co.uk/images/deal/24108...,https://static.wowcher.co.uk/images/deal/24108...
4,32031360,,1,1319363,131936300000,OPEN AI Images,2025-06-05 11:27:34,,imgv_list_wow_uk,https://static.wowcher.co.uk/images/deal/32031...,https://static.wowcher.co.uk/images/deal/32031...
5,24085071,,1,1390050,139005000000,OPEN AI Images,2025-06-05 11:27:34,,imgv_list_wow_uk,https://static.wowcher.co.uk/images/deal/24085...,https://static.wowcher.co.uk/images/deal/24085...


## 3. Copy variants to S3 with new variant IDs

In [27]:
# Copy the variants to S3
processed_df = copy_approved_variants_to_s3(redshift_df)

# Display a summary of the processed data
success_count = processed_df['processed_status'].sum()
print(f"Successfully processed {success_count} of {len(processed_df)} variants")

Connecting to S3...


Copying Variants to S3: 100%|██████████| 333/333 [01:10<00:00,  4.73it/s]

Successfully copied 333 of 333 variants to S3
Saved processed data to processed_approved_variants.csv
Successfully processed 333 of 333 variants





## 4. Upload prepared data to S3 for Redshift

In [28]:
# Upload the prepared data to S3 for Redshift
s3_url = upload_to_s3_for_redshift(processed_df)

if s3_url:
    print(f"Successfully uploaded data to S3: {s3_url}")
else:
    print("Failed to upload data to S3")

Preparing data for Redshift...
Saved Redshift data to redshift_upload_data.csv
Uploaded data to S3: https://static.wowcher.co.uk/temp/ai_image_variants_20250605112852.csv
Successfully uploaded data to S3: https://static.wowcher.co.uk/temp/ai_image_variants_20250605112852.csv


## 5. Copy data from S3 to Redshift

In [29]:
# Only proceed if we have a valid S3 URL
if s3_url:
    # Copy the data from S3 to Redshift
    success = copy_s3_to_redshift(s3_url)    
    if success:
        print("Successfully copied data to Redshift")
    else:
        print("Failed to copy data to Redshift")
else:
    print("Skipping Redshift update due to missing S3 URL")
    
    

Copying data to Redshift...
Successfully copied 3598 rows to Redshift table
Successfully copied data to Redshift


## 6. Update the image list in the API

In [30]:
# Only proceed if we successfully copied to Redshift
if 'success' in locals() and success:
    # Update the image list in the API
    result = update_image_list()
    if result:
        print("Successfully updated the image list in the API")
    else:
        print("Failed to update the image list in the API")
else:
    print("Skipping API update due to failed Redshift update")

Updating image list in API...
Found 4586 unique images to add to list
Successfully updated test list with 4586 images
Successfully updated the image list in the API


## Processing Complete

All approved variant images have been processed, copied to S3, added to Redshift, and the API has been updated.

# Purge Cache on image urls. Usefull if making images where a variant already existed and now need it to show the new variant


In [32]:
import psycopg2
import pandas as pd

sql = """

select 
'https://static.wowcher.co.uk/images/deal/'||oiv.deal_voucher_id||'/'||oiv.original_image_id||'.'||dvi.extension as url
from temp.opt_image_variants oiv
join real.deal_voucher_image dvi on dvi.id = oiv.original_image_id
where oiv.status = 1
"""

REDSHIFT_CONFIG = {
    'host': 'bi-redshift.intwowcher.co.uk',
    'port': 5439,
    'dbname': 'wowdwhprod',
    'user': 'jenkins',
    'password': '9SDy1ffdfTV7'
}


conn = psycopg2.connect(**REDSHIFT_CONFIG)

# Use pandas to read SQL query
urls =  pd.read_sql_query(sql, conn)['url'].to_list()

# Close connection
conn.close()

  urls =  pd.read_sql_query(sql, conn)['url'].to_list()


In [34]:
import requests 
import json
def purge_cache(image_urls):
    chunk_size = 30 # cloudflare purge api takes max 30 urls per request

    chunked_list = [image_urls[i:i+chunk_size] for i in range(0, len(image_urls), chunk_size)]

    api_url = "https://api.cloudflare.com/client/v4/zones/4fec7e02d5c45deb9f67452873708896/purge_cache"
    api_key = "Bearer IT-Lr8A8LOipKdcGKkJU8Q5ciM8jfy3KbtfWkDnK"
    headers = {"Authorization": api_key,
               "Content-Type":"application/json"
               }
    
    for chunk in chunked_list:
        payload = {"files": []}
        #print("Clearing cache of: ", chunk)
        payload['files'] = chunk
        response = requests.post(url=api_url, data=json.dumps(payload), headers=headers)
        print(response.json())
purge_cache(urls)



{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb9f67452873708896'}}
{'success': True, 'errors': [], 'messages': [], 'result': {'id': '4fec7e02d5c45deb