# Merge Dalle3 and Flux (schnell) images

We have csv files linking the path and name of the files

## load csv files

In [4]:
flux_prompts_path = "/content/drive/MyDrive/AI/together_flux/generated_prompts.csv"
dalle_prompts_paths = "/content/drive/MyDrive/AI/dalle3/generated_prompts.csv"

In [5]:
import pandas as pd

In [6]:
flux_df = pd.read_csv(flux_prompts_path)
dalle_df = pd.read_csv(dalle_prompts_paths)

In [7]:
flux_df.head()

Unnamed: 0,prompt,file,error
0,"impressive oil painting, intimate portrait of ...",/content/drive/MyDrive/AI/together_flux/202510...,
1,"powerful oil painting, portrayal of a strong f...",/content/drive/MyDrive/AI/together_flux/202510...,
2,"intriguing oil painting, enigmatic portrait of...",/content/drive/MyDrive/AI/together_flux/202510...,
3,"vibrant oil painting, lively interior scene wi...",/content/drive/MyDrive/AI/together_flux/202510...,
4,"striking woodblock print, dramatic seascape wi...",/content/drive/MyDrive/AI/together_flux/202510...,


In [8]:
dalle_df.head()

Unnamed: 0,prompt,file,error
0,"impressive oil painting, intimate portrait of ...",/content/drive/MyDrive/AI/dalle3/20251019-0038...,
1,"awe-inspiring fresco, depiction of the creatio...",,Error code: 400 - {'error': {'message': 'Your ...
2,"powerful oil painting, portrayal of a strong f...",/content/drive/MyDrive/AI/dalle3/20251019-0040...,
3,"intriguing oil painting, enigmatic portrait of...",/content/drive/MyDrive/AI/dalle3/20251019-0041...,
4,"vibrant oil painting, lively interior scene wi...",/content/drive/MyDrive/AI/dalle3/20251019-0041...,


## Remove empty files from openai pipeline

In [9]:
dalle_df = dalle_df[~dalle_df["file"].isna()]

## merge by prompt

In [14]:
del dalle_df['error']

In [13]:
del flux_df['error']

In [18]:
full_df = flux_df.set_index("prompt").join(
    dalle_df.set_index("prompt"),
    how="inner",
    lsuffix="_flux",
    rsuffix="_dalle"
).reset_index()


In [19]:
full_df.head()

Unnamed: 0,prompt,file_flux,file_dalle
0,"impressive oil painting, intimate portrait of ...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0038...
1,"powerful oil painting, portrayal of a strong f...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0040...
2,"intriguing oil painting, enigmatic portrait of...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0041...
3,"vibrant oil painting, lively interior scene wi...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0041...
4,"striking woodblock print, dramatic seascape wi...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0042...


## convert to webp (and save filenames)

In [22]:
df = full_df.copy()

In [23]:
import os
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm

# --- Assume df already loaded ---
# df = ...

out_dir = "./webp"
os.makedirs(out_dir, exist_ok=True)

def to_webp(src_path: str) -> str:
    if not isinstance(src_path, str) or not os.path.exists(src_path):
        return None
    # build output filename
    base = os.path.basename(src_path)
    name, _ = os.path.splitext(base)
    out_path = os.path.join(out_dir, name + ".webp")

    # convert
    with Image.open(src_path) as im:
        im.save(out_path, "WEBP", quality=95)

    return out_path

# Prepare new columns
df["file_flux_webp"] = None
df["file_dalle_webp"] = None

for i, row in tqdm(df.iterrows(), total=len(df)):
    flux_path = row["file_flux"]
    dalle_path = row["file_dalle"]

    df.at[i, "file_flux_webp"] = to_webp(flux_path)
    df.at[i, "file_dalle_webp"] = to_webp(dalle_path)


  0%|          | 0/186 [00:00<?, ?it/s]

In [25]:
df.head()

Unnamed: 0,prompt,file_flux,file_dalle,file_flux_webp,file_dalle_webp
0,"impressive oil painting, intimate portrait of ...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0038...,./webp/20251019-105250-impressive-oil-painting...,./webp/20251019-003857-impressive-oil-painting...
1,"powerful oil painting, portrayal of a strong f...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0040...,./webp/20251019-105303-powerful-oil-painting-p...,./webp/20251019-004024-powerful-oil-painting-p...
2,"intriguing oil painting, enigmatic portrait of...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0041...,./webp/20251019-105316-intriguing-oil-painting...,./webp/20251019-004112-intriguing-oil-painting...
3,"vibrant oil painting, lively interior scene wi...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0041...,./webp/20251019-105329-vibrant-oil-painting-li...,./webp/20251019-004138-vibrant-oil-painting-li...
4,"striking woodblock print, dramatic seascape wi...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0042...,./webp/20251019-105341-striking-woodblock-prin...,./webp/20251019-004236-striking-woodblock-prin...


In [24]:
df.to_csv("/content/drive/MyDrive/AI/together_flux/full_df_with_webp_paths.csv", index=False)

## save to bucket (gcp)

### install GCP

In [27]:
!pip install -q google-cloud-storage google-auth google-auth-oauthlib

from google.colab import auth
auth.authenticate_user()  # Opens a Google OAuth popup. After success, ADC is set.

In [28]:
import os
os.environ["GCLOUD_PROJECT"] = "groq-endpoint"

### define functionto upload one image

In [30]:
import os
from google.cloud import storage
from tqdm.auto import tqdm

BUCKET_NAME = "dalle-flux-comparison"        # <-- change me
GCS_BASE = f"https://storage.googleapis.com/{BUCKET_NAME}"
DEST_PREFIX = "art/"                     # optional folder in bucket

storage_client = storage.Client()
bucket = storage_client.bucket(BUCKET_NAME)

def upload_one(local_path: str, dest_blob: str, make_public: bool = True) -> str:
    """
    Upload a local file to GCS and return its HTTPS URL.
    If make_public=True, object is publicly readable (bucket must allow it).
    """
    if not isinstance(local_path, str) or not os.path.exists(local_path):
        return None
    blob = bucket.blob(dest_blob)
    # Cache aggressively; tweak if you expect frequent overwrites.
    blob.cache_control = "public, max-age=31536000, immutable"
    # Ensure correct content-type
    blob.content_type = "image/webp"
    blob.upload_from_filename(local_path)
    if make_public:
        try:
            blob.make_public()
        except Exception:
            # If bucket doesn't allow public ACLs, you'll need to set bucket-level IAM (see note below)
            pass
    return f"{GCS_BASE}/{dest_blob}"


### perform the upload

In [31]:
# Assumes df already has:
#   df["file_flux_webp"]  and  df["file_dalle_webp"]

def gcs_key_for(local_path: str) -> str:
    """Turn a local path into a stable blob name; here we use the filename under a prefix."""
    fname = os.path.basename(local_path)
    return f"{DEST_PREFIX}{fname}"

# New URL columns
df["file_flux_url"] = None
df["file_dalle_url"] = None

# De-duplicate uploads across both columns
webp_paths = set()
webp_paths.update([p for p in df["file_flux_webp"].dropna().tolist() if isinstance(p, str)])
webp_paths.update([p for p in df["file_dalle_webp"].dropna().tolist() if isinstance(p, str)])

# Upload all unique files once
uploaded_map = {}  # local_path -> https_url
for p in tqdm(sorted(webp_paths), desc="Uploading to GCS"):
    key = gcs_key_for(p)
    url = upload_one(p, key, make_public=True)
    uploaded_map[p] = url

# Map back to row-level URL columns
for i, row in df.iterrows():
    flux_p = row.get("file_flux_webp")
    dalle_p = row.get("file_dalle_webp")
    df.at[i, "file_flux_url"] = uploaded_map.get(flux_p)
    df.at[i, "file_dalle_url"] = uploaded_map.get(dalle_p)


Uploading to GCS:   0%|          | 0/371 [00:00<?, ?it/s]

### update the main CSV with URLs

In [32]:
# (Optional) Save checkpoint with URLs
df.to_csv("/content/drive/MyDrive/AI/together_flux/full_df_with_webp_gcs_urls.csv", index=False)


In [33]:
df.head()

Unnamed: 0,prompt,file_flux,file_dalle,file_flux_webp,file_dalle_webp,file_flux_url,file_dalle_url
0,"impressive oil painting, intimate portrait of ...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0038...,./webp/20251019-105250-impressive-oil-painting...,./webp/20251019-003857-impressive-oil-painting...,https://storage.googleapis.com/dalle-flux-comp...,https://storage.googleapis.com/dalle-flux-comp...
1,"powerful oil painting, portrayal of a strong f...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0040...,./webp/20251019-105303-powerful-oil-painting-p...,./webp/20251019-004024-powerful-oil-painting-p...,https://storage.googleapis.com/dalle-flux-comp...,https://storage.googleapis.com/dalle-flux-comp...
2,"intriguing oil painting, enigmatic portrait of...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0041...,./webp/20251019-105316-intriguing-oil-painting...,./webp/20251019-004112-intriguing-oil-painting...,https://storage.googleapis.com/dalle-flux-comp...,https://storage.googleapis.com/dalle-flux-comp...
3,"vibrant oil painting, lively interior scene wi...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0041...,./webp/20251019-105329-vibrant-oil-painting-li...,./webp/20251019-004138-vibrant-oil-painting-li...,https://storage.googleapis.com/dalle-flux-comp...,https://storage.googleapis.com/dalle-flux-comp...
4,"striking woodblock print, dramatic seascape wi...",/content/drive/MyDrive/AI/together_flux/202510...,/content/drive/MyDrive/AI/dalle3/20251019-0042...,./webp/20251019-105341-striking-woodblock-prin...,./webp/20251019-004236-striking-woodblock-prin...,https://storage.googleapis.com/dalle-flux-comp...,https://storage.googleapis.com/dalle-flux-comp...


## Make sure the images are public in the bucket

In [34]:
!gsutil iam ch allUsers:objectViewer gs://dalle-flux-comparison

## Export only the base 3 columns

In [38]:
df[['prompt', 'file_flux_url', 'file_dalle_url']].head().to_csv('short_export.csv', index=False)