In [1]:
import torch
import pandas as pd
import json
import re
from pathlib import Path
import numpy as np
from PIL import Image
import os
import time
import warnings
import gc
from tqdm import tqdm
BATCH_SIZE = 2 
MAX_NEW_TOKENS = 150
USE_FP16 = True

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
BASE_DIR = Path("/kaggle/input/nlp-fashionpedia")
ATTR_CSV = BASE_DIR/"predicted_attributes_final.csv"
IMG_DIR = BASE_DIR/"train_sample/train_sample"
LABELS_JSON = "/kaggle/input/nlp-fashionpedia/label_descriptions.json"
OUTPUT_CSV = "LLaVADescriptions.csv"
CHECKPOINT_FILE = "llava_checkpoint.json"

Using device: cuda


In [4]:
with open(LABELS_JSON) as f:
    label_data = json.load(f)
    attr_id_to_name = {a["id"]: a["name"] for a in label_data["attributes"]}


In [5]:
def build_prompt_llava_v2(attr_vec):
    try:
        if isinstance(attr_vec, str):
            vec = eval(attr_vec, {'np': np, 'int32': np.int32})
        elif isinstance(attr_vec, list):
            vec = attr_vec
        elif isinstance(attr_vec, np.ndarray):
            vec = attr_vec.tolist()
        else:
            vec = list(attr_vec)
        if not isinstance(vec, (list, tuple)):
            raise TypeError(f"Attribute vector is not a list or tuple after processing: {type(vec)} -> {vec}")
    except Exception as e:
        names = ["attribute data error"]
    else:
        names = [attr_id_to_name.get(i) for i, v in enumerate(vec) if v == 1 and attr_id_to_name.get(i)]
        names = [re.sub(r"\s*\(.*?\)", "", n).strip() for n in names if n]
    attr_str = ", ".join(names) if names else "no specific attributes detected"
    return (
      "USER: <image>\n"
      "Describe this clothing item in detail. Do not mention words like lady, woman, man, model, etc. "
      f"It has the following characteristics: {attr_str}. Focus on its visual appearance, including the color, material, any patterns, the style of the garment (e.g., casual, formal), and specific features like the neckline, sleeves, etc., and overall silhouette. "
      "ASSISTANT:"
    )


In [6]:
def clean_up_memory():

    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

def load_checkpoint():

    if os.path.exists(CHECKPOINT_FILE):
        with open(CHECKPOINT_FILE, 'r') as f:
            return json.load(f)
    return {"processed_ids": [], "descriptions": []}

In [7]:
def save_checkpoint(checkpoint_data):
   
    with open(CHECKPOINT_FILE, 'w') as f:
        json.dump(checkpoint_data, f)

def preprocess_image(img_path, target_size=(336, 336)):
   
    try:
        img = Image.open(img_path).convert("RGB")
        img = img.resize(target_size, Image.LANCZOS)
        return img
    except Exception as e:
        print(f"Error loading image {img_path}: {e}")
        return None

In [8]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("HF_TOKEN")


In [9]:
from transformers import AutoProcessor, AutoModelForVision2Seq

model_id = "llava-hf/llava-1.5-13b-hf"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForVision2Seq.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16 if USE_FP16 else torch.float32,
    device_map="auto",
)

model.gradient_checkpointing_enable()

checkpoint = load_checkpoint()
processed_ids = set(checkpoint["processed_ids"])
generated = checkpoint["descriptions"]
df = pd.read_csv(ATTR_CSV)
df = df[~df["image_id"].isin(processed_ids)]

print(f"Found {len(df)} images to process")

num_batches = (len(df) + BATCH_SIZE - 1) // BATCH_SIZE

2025-04-17 13:19:08.474219: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744895948.716881      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744895948.784705      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


processor_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.45k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.62M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/77.2k [00:00<?, ?B/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/2.02G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

Found 2000 images to process


In [10]:
try:
    for batch_idx in tqdm(range(num_batches), desc="Generating"):
        
        clean_up_memory()
        
        batch_df = df.iloc[batch_idx*BATCH_SIZE : (batch_idx+1)*BATCH_SIZE]
        images, prompts, ids = [], [], []
        
        for _, row in batch_df.iterrows():
            img_id = row["image_id"]
            img_path = Path(IMG_DIR) / f"{img_id}.jpg"
            
            img = preprocess_image(img_path)
            if img is None:
                continue
            
            images.append(img)
            prompts.append(build_prompt_llava_v2(row["attributes"]))
            ids.append(img_id)
        
        if not images:
            continue
        
        try:
            with torch.no_grad():
                inputs = processor(
                    images=images,
                    text=prompts,
                    return_tensors="pt",
                    padding=True,
                    truncation=True
                ).to(device)
                
                
                with torch.cuda.amp.autocast(enabled=USE_FP16):
                    gen_ids = model.generate(
                        **inputs,
                        max_new_tokens=MAX_NEW_TOKENS,
                        do_sample=False,
                        use_cache=True
                    )
            
            outputs = processor.batch_decode(gen_ids, skip_special_tokens=True)
            

            for img_id, raw_output in zip(ids, outputs):
               
                if "ASSISTANT:" in raw_output:
                    description = raw_output.split("ASSISTANT:", 1)[1].strip()
                else:
                    description = raw_output
                

                sentences = re.split(r'(?<=[.!?])\s+', description)
                if sentences and not re.search(r'[.!?]$', sentences[-1]):
                    
                    if len(sentences) > 1:
                        description = ' '.join(sentences[:-1])
                    else:
                        description = sentences[0] + "."
                
                generated.append({"image_id": img_id, "description": description})
                processed_ids.add(img_id)
            
            del inputs, gen_ids, outputs
            clean_up_memory()
            
            if batch_idx % 5 == 0 or batch_idx == num_batches - 1:
                checkpoint = {
                    "processed_ids": list(processed_ids),
                    "descriptions": generated
                }
                save_checkpoint(checkpoint)
                
                
                temp_df = pd.DataFrame(generated)
                temp_df.to_csv(OUTPUT_CSV, index=False)
                print(f"✓ Checkpoint saved: {len(generated)} descriptions processed")
                
        except RuntimeError as e:
            if "out of memory" in str(e).lower():
                print(f"OOM error in batch {batch_idx}. Saving checkpoint and exiting.")
                checkpoint = {
                    "processed_ids": list(processed_ids),
                    "descriptions": generated
                }
                save_checkpoint(checkpoint)
            
                temp_df = pd.DataFrame(generated)
                temp_df.to_csv(OUTPUT_CSV, index=False)
                print(f"✓ Saved {len(generated)} descriptions before OOM error")
                break

except KeyboardInterrupt:
    checkpoint = {
        "processed_ids": list(processed_ids),
        "descriptions": generated
    }
    save_checkpoint(checkpoint)

finally:
    out_df = pd.DataFrame(generated)
    out_df.to_csv(OUTPUT_CSV, index=False)
    print(f" Wrote {len(out_df)} descriptions to {OUTPUT_CSV}")

Generating:   0%|          | 0/1000 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  with torch.cuda.amp.autocast(enabled=USE_FP16):
Generating:   0%|          | 1/1000 [00:22<6:07:57, 22.10s/it]

✓ Checkpoint saved: 2 descriptions processed


Generating:   1%|          | 6/1000 [01:55<5:01:44, 18.21s/it]

✓ Checkpoint saved: 12 descriptions processed


Generating:   1%|          | 11/1000 [03:28<5:12:58, 18.99s/it]

✓ Checkpoint saved: 22 descriptions processed


Generating:   2%|▏         | 16/1000 [05:00<4:58:33, 18.21s/it]

✓ Checkpoint saved: 32 descriptions processed


Generating:   3%|▎         | 26/1000 [08:12<4:41:44, 17.36s/it]

✓ Checkpoint saved: 52 descriptions processed


Generating:   3%|▎         | 31/1000 [09:53<4:57:46, 18.44s/it]

✓ Checkpoint saved: 62 descriptions processed


Generating:   4%|▎         | 36/1000 [11:24<4:50:47, 18.10s/it]

✓ Checkpoint saved: 72 descriptions processed


Generating:   4%|▍         | 41/1000 [13:04<5:18:27, 19.92s/it]

✓ Checkpoint saved: 82 descriptions processed


Generating:   5%|▍         | 46/1000 [14:32<4:48:09, 18.12s/it]

✓ Checkpoint saved: 92 descriptions processed


Generating:   5%|▌         | 51/1000 [15:59<4:35:50, 17.44s/it]

✓ Checkpoint saved: 102 descriptions processed


Generating:   6%|▌         | 56/1000 [17:27<4:52:38, 18.60s/it]

✓ Checkpoint saved: 112 descriptions processed


Generating:   6%|▌         | 61/1000 [18:59<4:54:13, 18.80s/it]

✓ Checkpoint saved: 122 descriptions processed


Generating:   7%|▋         | 66/1000 [20:31<4:49:57, 18.63s/it]

✓ Checkpoint saved: 132 descriptions processed


Generating:   7%|▋         | 71/1000 [22:04<4:52:04, 18.86s/it]

✓ Checkpoint saved: 142 descriptions processed


Generating:   8%|▊         | 76/1000 [23:33<4:37:19, 18.01s/it]

✓ Checkpoint saved: 152 descriptions processed


Generating:   8%|▊         | 81/1000 [24:57<4:17:47, 16.83s/it]

✓ Checkpoint saved: 162 descriptions processed


Generating:   9%|▊         | 86/1000 [26:32<4:41:47, 18.50s/it]

✓ Checkpoint saved: 172 descriptions processed


Generating:   9%|▉         | 91/1000 [28:11<5:05:55, 20.19s/it]

✓ Checkpoint saved: 182 descriptions processed


Generating:  10%|▉         | 96/1000 [29:54<5:11:56, 20.70s/it]

✓ Checkpoint saved: 192 descriptions processed


Generating:  10%|█         | 101/1000 [31:25<4:40:50, 18.74s/it]

✓ Checkpoint saved: 202 descriptions processed


Generating:  11%|█         | 106/1000 [33:00<4:43:20, 19.02s/it]

✓ Checkpoint saved: 212 descriptions processed


Generating:  11%|█         | 111/1000 [34:37<4:38:09, 18.77s/it]

✓ Checkpoint saved: 222 descriptions processed


Generating:  12%|█▏        | 116/1000 [36:10<4:35:16, 18.68s/it]

✓ Checkpoint saved: 232 descriptions processed


Generating:  12%|█▏        | 121/1000 [37:50<4:49:27, 19.76s/it]

✓ Checkpoint saved: 242 descriptions processed


Generating:  13%|█▎        | 126/1000 [39:22<4:30:52, 18.60s/it]

✓ Checkpoint saved: 252 descriptions processed


Generating:  13%|█▎        | 131/1000 [41:01<4:49:43, 20.00s/it]

✓ Checkpoint saved: 262 descriptions processed


Generating:  14%|█▎        | 136/1000 [42:37<4:44:34, 19.76s/it]

✓ Checkpoint saved: 272 descriptions processed


Generating:  14%|█▍        | 141/1000 [44:09<4:26:31, 18.62s/it]

✓ Checkpoint saved: 282 descriptions processed


Generating:  15%|█▍        | 146/1000 [45:35<4:07:36, 17.40s/it]

✓ Checkpoint saved: 292 descriptions processed


Generating:  15%|█▌        | 151/1000 [47:13<4:37:25, 19.61s/it]

✓ Checkpoint saved: 302 descriptions processed


Generating:  16%|█▌        | 156/1000 [48:48<4:24:24, 18.80s/it]

✓ Checkpoint saved: 312 descriptions processed


Generating:  16%|█▌        | 161/1000 [50:22<4:32:06, 19.46s/it]

✓ Checkpoint saved: 322 descriptions processed


Generating:  17%|█▋        | 166/1000 [51:57<4:20:34, 18.75s/it]

✓ Checkpoint saved: 332 descriptions processed


Generating:  17%|█▋        | 171/1000 [53:29<4:11:40, 18.22s/it]

✓ Checkpoint saved: 342 descriptions processed


Generating:  18%|█▊        | 176/1000 [55:08<4:26:01, 19.37s/it]

✓ Checkpoint saved: 352 descriptions processed


Generating:  18%|█▊        | 181/1000 [56:43<4:28:18, 19.66s/it]

✓ Checkpoint saved: 362 descriptions processed


Generating:  19%|█▊        | 186/1000 [58:25<4:38:07, 20.50s/it]

✓ Checkpoint saved: 372 descriptions processed


Generating:  19%|█▉        | 191/1000 [1:00:09<4:53:51, 21.79s/it]

✓ Checkpoint saved: 382 descriptions processed


Generating:  20%|█▉        | 196/1000 [1:01:45<4:17:50, 19.24s/it]

✓ Checkpoint saved: 392 descriptions processed


Generating:  20%|██        | 201/1000 [1:03:17<4:03:39, 18.30s/it]

✓ Checkpoint saved: 402 descriptions processed


Generating:  21%|██        | 206/1000 [1:04:55<4:22:18, 19.82s/it]

✓ Checkpoint saved: 412 descriptions processed


Generating:  21%|██        | 211/1000 [1:06:29<4:08:44, 18.92s/it]

✓ Checkpoint saved: 422 descriptions processed


Generating:  22%|██▏       | 216/1000 [1:08:07<4:20:09, 19.91s/it]

✓ Checkpoint saved: 432 descriptions processed


Generating:  22%|██▏       | 221/1000 [1:09:43<4:03:59, 18.79s/it]

✓ Checkpoint saved: 442 descriptions processed


Generating:  23%|██▎       | 226/1000 [1:11:17<4:10:00, 19.38s/it]

✓ Checkpoint saved: 452 descriptions processed


Generating:  23%|██▎       | 231/1000 [1:12:46<3:47:28, 17.75s/it]

✓ Checkpoint saved: 462 descriptions processed


Generating:  24%|██▎       | 236/1000 [1:14:19<3:48:43, 17.96s/it]

✓ Checkpoint saved: 472 descriptions processed


Generating:  24%|██▍       | 241/1000 [1:15:58<4:11:24, 19.87s/it]

✓ Checkpoint saved: 482 descriptions processed


Generating:  25%|██▍       | 246/1000 [1:17:26<3:44:25, 17.86s/it]

✓ Checkpoint saved: 492 descriptions processed


Generating:  25%|██▌       | 251/1000 [1:19:01<3:56:33, 18.95s/it]

✓ Checkpoint saved: 502 descriptions processed


Generating:  26%|██▌       | 256/1000 [1:20:36<3:53:01, 18.79s/it]

✓ Checkpoint saved: 512 descriptions processed


Generating:  26%|██▌       | 261/1000 [1:22:10<3:54:18, 19.02s/it]

✓ Checkpoint saved: 522 descriptions processed


Generating:  27%|██▋       | 266/1000 [1:23:42<3:40:54, 18.06s/it]

✓ Checkpoint saved: 532 descriptions processed


Generating:  27%|██▋       | 271/1000 [1:25:29<4:05:32, 20.21s/it]

✓ Checkpoint saved: 542 descriptions processed


Generating:  28%|██▊       | 276/1000 [1:26:59<3:42:02, 18.40s/it]

✓ Checkpoint saved: 552 descriptions processed


Generating:  28%|██▊       | 281/1000 [1:28:39<3:50:38, 19.25s/it]

✓ Checkpoint saved: 562 descriptions processed


Generating:  29%|██▊       | 286/1000 [1:30:17<3:53:10, 19.59s/it]

✓ Checkpoint saved: 572 descriptions processed


Generating:  29%|██▉       | 291/1000 [1:31:56<3:49:05, 19.39s/it]

✓ Checkpoint saved: 582 descriptions processed


Generating:  30%|██▉       | 296/1000 [1:33:24<3:30:12, 17.91s/it]

✓ Checkpoint saved: 592 descriptions processed


Generating:  30%|███       | 301/1000 [1:35:03<3:43:25, 19.18s/it]

✓ Checkpoint saved: 602 descriptions processed


Generating:  31%|███       | 306/1000 [1:36:36<3:38:20, 18.88s/it]

✓ Checkpoint saved: 612 descriptions processed


Generating:  31%|███       | 311/1000 [1:38:10<3:40:16, 19.18s/it]

✓ Checkpoint saved: 622 descriptions processed


Generating:  32%|███▏      | 316/1000 [1:39:43<3:32:36, 18.65s/it]

✓ Checkpoint saved: 632 descriptions processed


Generating:  32%|███▏      | 321/1000 [1:41:10<3:20:10, 17.69s/it]

✓ Checkpoint saved: 642 descriptions processed


Generating:  33%|███▎      | 326/1000 [1:42:44<3:37:01, 19.32s/it]

✓ Checkpoint saved: 652 descriptions processed


Generating:  33%|███▎      | 331/1000 [1:44:24<3:39:21, 19.67s/it]

✓ Checkpoint saved: 662 descriptions processed


Generating:  34%|███▎      | 336/1000 [1:46:03<3:41:16, 20.00s/it]

✓ Checkpoint saved: 672 descriptions processed


Generating:  34%|███▍      | 341/1000 [1:47:39<3:40:15, 20.05s/it]

✓ Checkpoint saved: 682 descriptions processed


Generating:  35%|███▍      | 346/1000 [1:49:23<3:46:30, 20.78s/it]

✓ Checkpoint saved: 692 descriptions processed


Generating:  35%|███▌      | 351/1000 [1:50:57<3:32:07, 19.61s/it]

✓ Checkpoint saved: 702 descriptions processed


Generating:  36%|███▌      | 356/1000 [1:52:27<3:13:17, 18.01s/it]

✓ Checkpoint saved: 712 descriptions processed


Generating:  36%|███▌      | 361/1000 [1:53:57<3:13:48, 18.20s/it]

✓ Checkpoint saved: 722 descriptions processed


Generating:  37%|███▋      | 366/1000 [1:55:36<3:28:25, 19.72s/it]

✓ Checkpoint saved: 732 descriptions processed


Generating:  37%|███▋      | 371/1000 [1:57:14<3:24:00, 19.46s/it]

✓ Checkpoint saved: 742 descriptions processed


Generating:  38%|███▊      | 376/1000 [1:58:55<3:25:58, 19.81s/it]

✓ Checkpoint saved: 752 descriptions processed


Generating:  38%|███▊      | 381/1000 [2:00:34<3:25:17, 19.90s/it]

✓ Checkpoint saved: 762 descriptions processed


Generating:  39%|███▊      | 386/1000 [2:02:03<3:08:15, 18.40s/it]

✓ Checkpoint saved: 772 descriptions processed


Generating:  39%|███▉      | 391/1000 [2:03:56<3:33:45, 21.06s/it]

✓ Checkpoint saved: 782 descriptions processed


Generating:  40%|███▉      | 396/1000 [2:05:26<3:09:52, 18.86s/it]

✓ Checkpoint saved: 792 descriptions processed


Generating:  40%|████      | 401/1000 [2:06:54<2:52:16, 17.26s/it]

✓ Checkpoint saved: 802 descriptions processed


Generating:  41%|████      | 406/1000 [2:08:24<2:54:17, 17.61s/it]

✓ Checkpoint saved: 812 descriptions processed


Generating:  41%|████      | 411/1000 [2:10:02<3:09:51, 19.34s/it]

✓ Checkpoint saved: 822 descriptions processed


Generating:  42%|████▏     | 416/1000 [2:11:41<3:19:25, 20.49s/it]

✓ Checkpoint saved: 832 descriptions processed


Generating:  42%|████▏     | 421/1000 [2:13:19<3:09:23, 19.63s/it]

✓ Checkpoint saved: 842 descriptions processed


Generating:  43%|████▎     | 426/1000 [2:14:50<2:54:18, 18.22s/it]

✓ Checkpoint saved: 852 descriptions processed


Generating:  43%|████▎     | 431/1000 [2:16:31<3:09:45, 20.01s/it]

✓ Checkpoint saved: 862 descriptions processed


Generating:  44%|████▎     | 436/1000 [2:18:02<2:54:40, 18.58s/it]

✓ Checkpoint saved: 872 descriptions processed


Generating:  44%|████▍     | 441/1000 [2:19:34<2:53:25, 18.62s/it]

✓ Checkpoint saved: 882 descriptions processed


Generating:  45%|████▍     | 446/1000 [2:21:01<2:44:35, 17.83s/it]

✓ Checkpoint saved: 892 descriptions processed


Generating:  45%|████▌     | 451/1000 [2:22:23<2:30:46, 16.48s/it]

✓ Checkpoint saved: 902 descriptions processed


Generating:  46%|████▌     | 456/1000 [2:24:06<3:01:33, 20.02s/it]

✓ Checkpoint saved: 912 descriptions processed


Generating:  46%|████▌     | 461/1000 [2:25:45<2:59:03, 19.93s/it]

✓ Checkpoint saved: 922 descriptions processed


Generating:  47%|████▋     | 466/1000 [2:27:24<2:52:22, 19.37s/it]

✓ Checkpoint saved: 932 descriptions processed


Generating:  47%|████▋     | 471/1000 [2:28:56<2:39:25, 18.08s/it]

✓ Checkpoint saved: 942 descriptions processed


Generating:  48%|████▊     | 476/1000 [2:30:28<2:35:12, 17.77s/it]

✓ Checkpoint saved: 952 descriptions processed


Generating:  48%|████▊     | 481/1000 [2:32:06<2:48:39, 19.50s/it]

✓ Checkpoint saved: 962 descriptions processed


Generating:  49%|████▊     | 486/1000 [2:33:37<2:33:34, 17.93s/it]

✓ Checkpoint saved: 972 descriptions processed


Generating:  49%|████▉     | 491/1000 [2:35:10<2:32:31, 17.98s/it]

✓ Checkpoint saved: 982 descriptions processed


Generating:  50%|████▉     | 496/1000 [2:36:54<2:54:36, 20.79s/it]

✓ Checkpoint saved: 992 descriptions processed


Generating:  50%|█████     | 501/1000 [2:38:31<2:44:15, 19.75s/it]

✓ Checkpoint saved: 1002 descriptions processed


Generating:  51%|█████     | 506/1000 [2:40:05<2:39:31, 19.38s/it]

✓ Checkpoint saved: 1012 descriptions processed


Generating:  51%|█████     | 511/1000 [2:41:50<2:42:58, 20.00s/it]

✓ Checkpoint saved: 1022 descriptions processed


Generating:  52%|█████▏    | 516/1000 [2:43:24<2:29:50, 18.58s/it]

✓ Checkpoint saved: 1032 descriptions processed


Generating:  52%|█████▏    | 521/1000 [2:44:57<2:28:27, 18.60s/it]

✓ Checkpoint saved: 1042 descriptions processed


Generating:  53%|█████▎    | 526/1000 [2:46:21<2:15:18, 17.13s/it]

✓ Checkpoint saved: 1052 descriptions processed


Generating:  53%|█████▎    | 531/1000 [2:47:58<2:28:50, 19.04s/it]

✓ Checkpoint saved: 1062 descriptions processed


Generating:  54%|█████▎    | 536/1000 [2:49:39<2:31:51, 19.64s/it]

✓ Checkpoint saved: 1072 descriptions processed


Generating:  54%|█████▍    | 541/1000 [2:51:07<2:12:28, 17.32s/it]

✓ Checkpoint saved: 1082 descriptions processed


Generating:  55%|█████▍    | 546/1000 [2:52:47<2:34:47, 20.46s/it]

✓ Checkpoint saved: 1092 descriptions processed


Generating:  55%|█████▌    | 551/1000 [2:54:23<2:24:25, 19.30s/it]

✓ Checkpoint saved: 1102 descriptions processed


Generating:  56%|█████▌    | 556/1000 [2:55:48<2:07:45, 17.27s/it]

✓ Checkpoint saved: 1112 descriptions processed


Generating:  56%|█████▌    | 561/1000 [2:57:18<2:08:24, 17.55s/it]

✓ Checkpoint saved: 1122 descriptions processed


Generating:  57%|█████▋    | 566/1000 [2:58:47<2:07:23, 17.61s/it]

✓ Checkpoint saved: 1132 descriptions processed


Generating:  57%|█████▋    | 571/1000 [3:00:21<2:13:06, 18.62s/it]

✓ Checkpoint saved: 1142 descriptions processed


Generating:  58%|█████▊    | 576/1000 [3:02:01<2:16:46, 19.36s/it]

✓ Checkpoint saved: 1152 descriptions processed


Generating:  58%|█████▊    | 581/1000 [3:03:25<2:00:46, 17.29s/it]

✓ Checkpoint saved: 1162 descriptions processed


Generating:  59%|█████▊    | 586/1000 [3:05:04<2:16:42, 19.81s/it]

✓ Checkpoint saved: 1172 descriptions processed


Generating:  59%|█████▉    | 591/1000 [3:06:36<2:08:07, 18.80s/it]

✓ Checkpoint saved: 1182 descriptions processed


Generating:  60%|█████▉    | 596/1000 [3:08:06<2:00:56, 17.96s/it]

✓ Checkpoint saved: 1192 descriptions processed


Generating:  60%|██████    | 601/1000 [3:09:42<2:09:03, 19.41s/it]

✓ Checkpoint saved: 1202 descriptions processed


Generating:  61%|██████    | 606/1000 [3:11:22<2:13:45, 20.37s/it]

✓ Checkpoint saved: 1212 descriptions processed


Generating:  61%|██████    | 611/1000 [3:12:50<1:57:18, 18.09s/it]

✓ Checkpoint saved: 1222 descriptions processed


Generating:  62%|██████▏   | 616/1000 [3:14:28<2:02:37, 19.16s/it]

✓ Checkpoint saved: 1232 descriptions processed


Generating:  62%|██████▏   | 621/1000 [3:16:05<1:58:37, 18.78s/it]

✓ Checkpoint saved: 1242 descriptions processed


Generating:  63%|██████▎   | 626/1000 [3:17:37<1:55:19, 18.50s/it]

✓ Checkpoint saved: 1252 descriptions processed


Generating:  63%|██████▎   | 631/1000 [3:19:17<2:03:54, 20.15s/it]

✓ Checkpoint saved: 1262 descriptions processed


Generating:  64%|██████▎   | 636/1000 [3:20:51<1:57:23, 19.35s/it]

✓ Checkpoint saved: 1272 descriptions processed


Generating:  64%|██████▍   | 641/1000 [3:22:29<1:57:57, 19.71s/it]

✓ Checkpoint saved: 1282 descriptions processed


Generating:  65%|██████▍   | 646/1000 [3:24:02<1:51:58, 18.98s/it]

✓ Checkpoint saved: 1292 descriptions processed


Generating:  65%|██████▌   | 651/1000 [3:25:31<1:41:44, 17.49s/it]

✓ Checkpoint saved: 1302 descriptions processed


Generating:  66%|██████▌   | 656/1000 [3:27:08<1:50:02, 19.19s/it]

✓ Checkpoint saved: 1312 descriptions processed


Generating:  66%|██████▌   | 661/1000 [3:28:53<1:55:43, 20.48s/it]

✓ Checkpoint saved: 1322 descriptions processed


Generating:  67%|██████▋   | 666/1000 [3:30:30<1:51:08, 19.97s/it]

✓ Checkpoint saved: 1332 descriptions processed


Generating:  67%|██████▋   | 671/1000 [3:32:02<1:42:32, 18.70s/it]

✓ Checkpoint saved: 1342 descriptions processed


Generating:  68%|██████▊   | 676/1000 [3:33:34<1:40:18, 18.58s/it]

✓ Checkpoint saved: 1352 descriptions processed


Generating:  68%|██████▊   | 681/1000 [3:35:08<1:36:26, 18.14s/it]

✓ Checkpoint saved: 1362 descriptions processed


Generating:  69%|██████▊   | 686/1000 [3:36:45<1:38:25, 18.81s/it]

✓ Checkpoint saved: 1372 descriptions processed


Generating:  69%|██████▉   | 691/1000 [3:38:22<1:34:40, 18.38s/it]

✓ Checkpoint saved: 1382 descriptions processed


Generating:  70%|██████▉   | 696/1000 [3:39:57<1:36:55, 19.13s/it]

✓ Checkpoint saved: 1392 descriptions processed


Generating:  70%|███████   | 701/1000 [3:41:27<1:33:22, 18.74s/it]

✓ Checkpoint saved: 1402 descriptions processed


Generating:  71%|███████   | 706/1000 [3:42:55<1:26:40, 17.69s/it]

✓ Checkpoint saved: 1412 descriptions processed


Generating:  71%|███████   | 711/1000 [3:44:35<1:28:44, 18.42s/it]

✓ Checkpoint saved: 1422 descriptions processed


Generating:  72%|███████▏  | 716/1000 [3:46:16<1:38:07, 20.73s/it]

✓ Checkpoint saved: 1432 descriptions processed


Generating:  72%|███████▏  | 721/1000 [3:47:51<1:26:11, 18.54s/it]

✓ Checkpoint saved: 1442 descriptions processed


Generating:  73%|███████▎  | 726/1000 [3:49:28<1:30:19, 19.78s/it]

✓ Checkpoint saved: 1452 descriptions processed


Generating:  73%|███████▎  | 731/1000 [3:51:07<1:26:27, 19.28s/it]

✓ Checkpoint saved: 1462 descriptions processed


Generating:  74%|███████▎  | 736/1000 [3:52:46<1:25:40, 19.47s/it]

✓ Checkpoint saved: 1472 descriptions processed


Generating:  74%|███████▍  | 741/1000 [3:54:26<1:27:16, 20.22s/it]

✓ Checkpoint saved: 1482 descriptions processed


Generating:  75%|███████▍  | 746/1000 [3:56:03<1:21:23, 19.23s/it]

✓ Checkpoint saved: 1492 descriptions processed


Generating:  75%|███████▌  | 751/1000 [3:57:34<1:18:15, 18.86s/it]

✓ Checkpoint saved: 1502 descriptions processed


Generating:  76%|███████▌  | 756/1000 [3:59:02<1:14:02, 18.21s/it]

✓ Checkpoint saved: 1512 descriptions processed


Generating:  76%|███████▌  | 761/1000 [4:00:36<1:12:53, 18.30s/it]

✓ Checkpoint saved: 1522 descriptions processed


Generating:  77%|███████▋  | 766/1000 [4:02:13<1:12:50, 18.68s/it]

✓ Checkpoint saved: 1532 descriptions processed


Generating:  77%|███████▋  | 771/1000 [4:03:47<1:09:17, 18.16s/it]

✓ Checkpoint saved: 1542 descriptions processed


Generating:  78%|███████▊  | 776/1000 [4:05:30<1:13:45, 19.76s/it]

✓ Checkpoint saved: 1552 descriptions processed


Generating:  78%|███████▊  | 781/1000 [4:07:01<1:07:14, 18.42s/it]

✓ Checkpoint saved: 1562 descriptions processed


Generating:  79%|███████▊  | 786/1000 [4:08:34<1:05:44, 18.43s/it]

✓ Checkpoint saved: 1572 descriptions processed


Generating:  79%|███████▉  | 791/1000 [4:10:09<1:03:04, 18.11s/it]

✓ Checkpoint saved: 1582 descriptions processed


Generating:  80%|███████▉  | 796/1000 [4:11:44<1:06:53, 19.67s/it]

✓ Checkpoint saved: 1592 descriptions processed


Generating:  80%|████████  | 801/1000 [4:13:18<1:02:10, 18.74s/it]

✓ Checkpoint saved: 1602 descriptions processed


Generating:  81%|████████  | 806/1000 [4:14:56<1:05:43, 20.33s/it]

✓ Checkpoint saved: 1612 descriptions processed


Generating:  81%|████████  | 811/1000 [4:16:34<1:03:13, 20.07s/it]

✓ Checkpoint saved: 1622 descriptions processed


Generating:  82%|████████▏ | 816/1000 [4:18:13<1:01:49, 20.16s/it]

✓ Checkpoint saved: 1632 descriptions processed


Generating:  82%|████████▏ | 821/1000 [4:19:50<57:27, 19.26s/it]  

✓ Checkpoint saved: 1642 descriptions processed


Generating:  83%|████████▎ | 826/1000 [4:21:16<51:46, 17.85s/it]

✓ Checkpoint saved: 1652 descriptions processed


Generating:  83%|████████▎ | 831/1000 [4:22:47<50:35, 17.96s/it]

✓ Checkpoint saved: 1662 descriptions processed


Generating:  84%|████████▎ | 836/1000 [4:24:15<49:27, 18.09s/it]

✓ Checkpoint saved: 1672 descriptions processed


Generating:  84%|████████▍ | 841/1000 [4:25:44<44:39, 16.85s/it]

✓ Checkpoint saved: 1682 descriptions processed


Generating:  85%|████████▍ | 846/1000 [4:27:21<49:14, 19.19s/it]

✓ Checkpoint saved: 1692 descriptions processed


Generating:  85%|████████▌ | 851/1000 [4:28:53<45:57, 18.50s/it]

✓ Checkpoint saved: 1702 descriptions processed


Generating:  86%|████████▌ | 856/1000 [4:30:32<46:50, 19.52s/it]

✓ Checkpoint saved: 1712 descriptions processed


Generating:  86%|████████▌ | 861/1000 [4:32:01<41:54, 18.09s/it]

✓ Checkpoint saved: 1722 descriptions processed


Generating:  87%|████████▋ | 866/1000 [4:33:37<42:02, 18.83s/it]

✓ Checkpoint saved: 1732 descriptions processed


Generating:  87%|████████▋ | 871/1000 [4:35:15<42:11, 19.62s/it]

✓ Checkpoint saved: 1742 descriptions processed


Generating:  88%|████████▊ | 876/1000 [4:36:52<40:49, 19.75s/it]

✓ Checkpoint saved: 1752 descriptions processed


Generating:  88%|████████▊ | 881/1000 [4:38:30<39:00, 19.67s/it]

✓ Checkpoint saved: 1762 descriptions processed


Generating:  89%|████████▊ | 886/1000 [4:40:08<37:02, 19.50s/it]

✓ Checkpoint saved: 1772 descriptions processed


Generating:  89%|████████▉ | 891/1000 [4:41:45<35:56, 19.78s/it]

✓ Checkpoint saved: 1782 descriptions processed


Generating:  90%|████████▉ | 896/1000 [4:43:20<32:31, 18.76s/it]

✓ Checkpoint saved: 1792 descriptions processed


Generating:  90%|█████████ | 901/1000 [4:44:58<32:20, 19.60s/it]

✓ Checkpoint saved: 1802 descriptions processed


Generating:  91%|█████████ | 906/1000 [4:46:38<31:53, 20.36s/it]

✓ Checkpoint saved: 1812 descriptions processed


Generating:  91%|█████████ | 911/1000 [4:48:12<29:39, 20.00s/it]

✓ Checkpoint saved: 1822 descriptions processed


Generating:  92%|█████████▏| 916/1000 [4:49:42<26:08, 18.68s/it]

✓ Checkpoint saved: 1832 descriptions processed


Generating:  92%|█████████▏| 921/1000 [4:51:26<26:11, 19.89s/it]

✓ Checkpoint saved: 1842 descriptions processed


Generating:  93%|█████████▎| 926/1000 [4:53:01<23:48, 19.31s/it]

✓ Checkpoint saved: 1852 descriptions processed


Generating:  93%|█████████▎| 931/1000 [4:54:33<21:03, 18.31s/it]

✓ Checkpoint saved: 1862 descriptions processed


Generating:  94%|█████████▎| 936/1000 [4:56:12<20:39, 19.36s/it]

✓ Checkpoint saved: 1872 descriptions processed


Generating:  94%|█████████▍| 941/1000 [4:57:53<19:03, 19.38s/it]

✓ Checkpoint saved: 1882 descriptions processed


Generating:  95%|█████████▍| 946/1000 [4:59:27<17:29, 19.43s/it]

✓ Checkpoint saved: 1892 descriptions processed


Generating:  95%|█████████▌| 951/1000 [5:01:07<15:52, 19.44s/it]

✓ Checkpoint saved: 1902 descriptions processed


Generating:  96%|█████████▌| 956/1000 [5:02:43<14:50, 20.24s/it]

✓ Checkpoint saved: 1912 descriptions processed


Generating:  96%|█████████▌| 961/1000 [5:04:06<11:05, 17.07s/it]

✓ Checkpoint saved: 1922 descriptions processed


Generating:  97%|█████████▋| 966/1000 [5:05:44<10:31, 18.59s/it]

✓ Checkpoint saved: 1932 descriptions processed


Generating:  97%|█████████▋| 971/1000 [5:07:20<08:37, 17.85s/it]

✓ Checkpoint saved: 1942 descriptions processed


Generating:  98%|█████████▊| 976/1000 [5:08:50<07:14, 18.11s/it]

✓ Checkpoint saved: 1952 descriptions processed


Generating:  98%|█████████▊| 981/1000 [5:10:18<05:34, 17.63s/it]

✓ Checkpoint saved: 1962 descriptions processed


Generating:  99%|█████████▊| 986/1000 [5:11:53<04:13, 18.13s/it]

✓ Checkpoint saved: 1972 descriptions processed


Generating:  99%|█████████▉| 991/1000 [5:13:25<02:46, 18.49s/it]

✓ Checkpoint saved: 1982 descriptions processed


Generating: 100%|█████████▉| 996/1000 [5:15:02<01:16, 19.11s/it]

✓ Checkpoint saved: 1992 descriptions processed


Generating: 100%|██████████| 1000/1000 [5:16:16<00:00, 18.98s/it]

✓ Checkpoint saved: 2000 descriptions processed
✔️  Wrote 2000 descriptions to LLaVADescriptions.csv



