In [None]:
!pip install huggingface_hub
!pip install -U diffusers

In [None]:
!huggingface-cli login

In [None]:
# Importing necessary libraries
# In case your API key hits limit please use another one and then enter the total number of images which were generated earlier by this code into the input at the very bottom of code
import os
import requests
import pandas as pd
import time
import shutil
import json
import io
from PIL import Image
from google.colab import files
from tqdm import tqdm

# Configuring parameters
csv_path = "mscoco-blip-vit_captions.csv"
model_name = "Flux-Dev-Pregen"
save_dir = model_name
os.makedirs(save_dir, exist_ok=True)

# Requesting user input for skipping rows
try:
    skip_rows = int(input("Enter number of rows to skip from start: "))
except:
    skip_rows = 0
    print("Using default: 0 rows skipped")

print(f"Skipping first {skip_rows} rows from CSV")

# Using Hugging Face Inference API
API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
API_TOKEN = "YOUR_API_KEY" # <-------Add your access token from hugging face here
headers = {"Authorization": f"Bearer {API_TOKEN}"}

def query_flux_api(prompt):
    """Using HF Inference API with proper error handling"""
    payload = {
        "inputs": prompt,
        "parameters": {
            "height": 512,
            "width": 512,
            "guidance_scale": 3.5,
            "num_inference_steps": 20,
            "max_sequence_length": 512
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload)

    # Checking if response is JSON error instead of image
    if response.headers.get('content-type') == 'application/json':
        error_data = response.json()
        raise Exception(f"API Error: {error_data.get('error', 'Unknown error')}")

    # Checking if response is actually an image
    if response.status_code != 200:
        raise Exception(f"HTTP Error {response.status_code}: {response.text[:100]}...")

    return response.content

def is_valid_image(image_data):
    """Checking if the response data is a valid image"""
    try:
        image = Image.open(io.BytesIO(image_data))
        image.verify()  # Verifying it's a valid image file
        return True
    except:
        return False

# Processing CSV
print("Using Hugging Face API to generate FLUX images...")

successful_count = 0
failed_count = 0

try:
    # Reading CSV and skipping rows
    df = pd.read_csv(csv_path)

    # Calculating total rows after skipping
    total_rows = len(df)
    remaining_rows = total_rows - skip_rows

    if skip_rows >= total_rows:
        print(f"Cannot skip {skip_rows} rows from CSV with only {total_rows} rows")
        exit()

    print(f"CSV has {total_rows} rows, processing {remaining_rows} rows after skipping")

    max_images = min(200, remaining_rows)  # Processing up to 200 images or remaining rows

    # Getting the subset of dataframe after skipping rows
    df_subset = df.iloc[skip_rows:skip_rows + max_images]

    # Creating progress bar
    pbar = tqdm(df_subset.iterrows(), total=len(df_subset), desc="Generating images")

    for i, row in pbar:
        image_name = row['image_name']
        caption = row['mscoco_caption'][:200]

        try:
            # Updating progress bar with actual row number
            current_row = skip_rows + i + 1
            pbar.set_postfix_str(f"Row {current_row}: {image_name[:15]}...")

            # Getting image from API
            image_data = query_flux_api(caption)

            # Validating it's actually an image
            if not is_valid_image(image_data):
                raise Exception("API returned invalid image data")

            # Saving image as JPEG
            save_path = os.path.join(save_dir, f"{model_name}-{image_name}.jpeg")
            with open(save_path, "wb") as f:
                f.write(image_data)

            successful_count += 1
            pbar.set_postfix_str(f"Saved: {image_name[:15]}...")

            # Adding slight delay to avoid rate limiting
            time.sleep(1.5)

        except Exception as e:
            failed_count += 1
            error_msg = str(e)[:50] + "..." if len(str(e)) > 50 else str(e)
            pbar.set_postfix_str(f"Failed: {error_msg}")
            print(f"\nFailed '{image_name}': {e}")
            continue

except Exception as e:
    print(f"CSV error: {e}")

print(f"Generation completed!")
print(f"Successful: {successful_count}")
print(f"Failed: {failed_count}")
print(f"Started from row: {skip_rows + 1}")
print(f"Processed rows: {skip_rows + 1} to {skip_rows + successful_count + failed_count}")

# Downloading results
if successful_count > 0:
    print("Creating zip file...")
    shutil.make_archive(model_name, 'zip', save_dir)
    files.download(f"{model_name}.zip")
    print("Download started!")
else:
    print("No images were generated successfully")

# Checking generated files
if successful_count > 0:
    print("Checking generated files:")
    image_files = [f for f in os.listdir(save_dir) if f.endswith('.jpeg')][:5]
    for img_file in image_files:
        file_path = os.path.join(save_dir, img_file)
        file_size = os.path.getsize(file_path) / 1024  # KB
        print(f"   {img_file}: {file_size:.1f} KB")

        # Validating image
        try:
            with Image.open(file_path) as img:
                print(f"     Valid {img.format} image, size: {img.size}")
        except:
            print(f"     Invalid image file")

# Saving progress information
progress_info = {
    'skip_rows': skip_rows,
    'processed_rows': successful_count + failed_count,
    'successful': successful_count,
    'failed': failed_count,
    'last_processed_row': skip_rows + successful_count + failed_count
}

with open('generation_progress.json', 'w') as f:
    json.dump(progress_info, f)

print(f"Progress saved: {progress_info}")