# Applying image-to-text model on our images

In Google Colab, with T4 GPU.

In [15]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
drive_path_root = "/content/drive/MyDrive/SOME_PATH_IN_GOOGLE_DRIVE/datasets_generated_ready_models"

In [4]:
from transformers import pipeline

captioner = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base", device=0)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

In [3]:
from PIL import Image
import os
import zipfile
from tqdm import tqdm
import shutil

def do_processing(zip_name, drive_path_root=drive_path_root):
    path_to_zip = drive_path_root + zip_name
    path_extracted = "/content" + zip_name

    print('ZIP source:', path_to_zip)
    print("Extracting to", path_extracted)

    os.makedirs(path_extracted, exist_ok=True)

    with zipfile.ZipFile(path_to_zip, 'r') as zip_ref:
        zip_ref.extractall(path_extracted)

    images = sorted([path_extracted+'/'+p for p in os.listdir(path_extracted) if p.endswith('.png')])
    print(images)

    images = sorted([path_extracted+'/'+p for p in os.listdir(path_extracted) if p.endswith('.png')])

    print("computing captions...")
    image_captions = []
    for image_path in tqdm(images):
        raw_image = Image.open(image_path).convert("RGB")
        caption = captioner(raw_image)
        image_captions.append(caption)
    print("FINISHED computing captions, saving...")


    # save image captions to a file
    output_file = f"image_captions_{zip_name.split('/')[-1].split('.zip')[0]}.txt"
    print('Saved to:', output_file)

    with open(f"image_captions_{zip_name.split('/')[-1].split('.zip')[0]}.txt", 'w') as f:
        for caption in image_captions:
            f.write(caption[0]['generated_text'] + '\n')


    # copy to google drive
    shutil.copyfile(output_file, drive_path_root + '/' + zip_name.split('/')[1] + '/' + output_file)
    print("Finished, saved to", drive_path_root + '/' + zip_name.split('/')[1] + '/' + output_file)

In [6]:
zip_name = "/SD_XL/brecahad_zipped.zip"
do_processing(zip_name)

['/content/SD_XL/brecahad_zipped.zip/brecahad_img_0.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_1.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_2.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_3.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_4.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_5.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_6.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_7.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_8.png', '/content/SD_XL/brecahad_zipped.zip/brecahad_img_9.png']
computing captions...


100%|██████████| 10/10 [00:06<00:00,  1.56it/s]


FINISHED computing captions, saving...
Saved to: image_captions_brecahad_zipped.txt
Finished, saved to /content/drive/MyDrive/--studies_rest/sem9 heidelberg mine/Generative Neural Networks for the Sciences /datasets_generated_ready_models/SD_XL/image_captions_brecahad_zipped.txt


In [7]:
zip_name = "/SD_XL/cars_zipped.zip"
do_processing(zip_name)

['/content/SD_XL/cars_zipped.zip/cars_img_0.png', '/content/SD_XL/cars_zipped.zip/cars_img_1.png', '/content/SD_XL/cars_zipped.zip/cars_img_10.png', '/content/SD_XL/cars_zipped.zip/cars_img_11.png', '/content/SD_XL/cars_zipped.zip/cars_img_12.png', '/content/SD_XL/cars_zipped.zip/cars_img_13.png', '/content/SD_XL/cars_zipped.zip/cars_img_14.png', '/content/SD_XL/cars_zipped.zip/cars_img_15.png', '/content/SD_XL/cars_zipped.zip/cars_img_16.png', '/content/SD_XL/cars_zipped.zip/cars_img_17.png', '/content/SD_XL/cars_zipped.zip/cars_img_18.png', '/content/SD_XL/cars_zipped.zip/cars_img_19.png', '/content/SD_XL/cars_zipped.zip/cars_img_2.png', '/content/SD_XL/cars_zipped.zip/cars_img_20.png', '/content/SD_XL/cars_zipped.zip/cars_img_21.png', '/content/SD_XL/cars_zipped.zip/cars_img_22.png', '/content/SD_XL/cars_zipped.zip/cars_img_23.png', '/content/SD_XL/cars_zipped.zip/cars_img_24.png', '/content/SD_XL/cars_zipped.zip/cars_img_25.png', '/content/SD_XL/cars_zipped.zip/cars_img_26.png', '/

100%|██████████| 50/50 [00:12<00:00,  4.07it/s]

FINISHED computing captions, saving...
Saved to: image_captions_cars_zipped.txt
Finished, saved to /content/drive/MyDrive/--studies_rest/sem9 heidelberg mine/Generative Neural Networks for the Sciences /datasets_generated_ready_models/SD_XL/image_captions_cars_zipped.txt





In [8]:
zip_name = "/SD_XL/wildlife_zipped.zip"
do_processing(zip_name)

['/content/SD_XL/wildlife_zipped.zip/wildlife_img_0.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_1.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_10.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_11.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_12.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_13.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_14.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_15.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_16.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_17.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_18.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_19.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_2.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_20.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_21.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_22.png', '/content/SD_XL/wildlife_zipped.zip/wildlife_img_23.png', '/content/SD_XL/

100%|██████████| 50/50 [00:18<00:00,  2.68it/s]

FINISHED computing captions, saving...
Saved to: image_captions_wildlife_zipped.txt
Finished, saved to /content/drive/MyDrive/--studies_rest/sem9 heidelberg mine/Generative Neural Networks for the Sciences /datasets_generated_ready_models/SD_XL/image_captions_wildlife_zipped.txt





In [28]:
zip_name = "/DALLE3/dalle_wildlife_zipped.zip"
do_processing(zip_name)

['/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_0.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_1.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_10.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_11.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_12.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_13.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_14.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_15.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_16.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_17.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_18.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_19.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_2.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_20.png', '/content/DALLE3/dalle_wildlife_zipped.zip/wildlife_img_21.png', '/content/DALLE3/dalle_wild

100%|██████████| 50/50 [00:17<00:00,  2.83it/s]

FINISHED computing captions, saving...
Saved to: image_captions_dalle_wildlife_zipped.txt
Finished, saved to /content/drive/MyDrive/--studies_rest/sem9 heidelberg mine/Generative Neural Networks for the Sciences /datasets_generated_ready_models/DALLE3/image_captions_dalle_wildlife_zipped.txt





In [29]:
zip_name = "/DALLE3/dalle_cars_zipped.zip"
do_processing(zip_name)

['/content/DALLE3/dalle_cars_zipped.zip/car_img0.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img1.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img10.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img11.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img12.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img13.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img14.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img15.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img16.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img17.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img18.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img19.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img2.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img20.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img21.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img22.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img23.png', '/content/DALLE3/dalle_cars_zipped.zip/car_img24.png', '/content/DA

100%|██████████| 50/50 [00:14<00:00,  3.43it/s]

FINISHED computing captions, saving...
Saved to: image_captions_dalle_cars_zipped.txt
Finished, saved to /content/drive/MyDrive/--studies_rest/sem9 heidelberg mine/Generative Neural Networks for the Sciences /datasets_generated_ready_models/DALLE3/image_captions_dalle_cars_zipped.txt





In [30]:
zip_name = "/DALLE3/dalle_brecahad_zipped.zip"
do_processing(zip_name)

['/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_0.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_1.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_2.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_3.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_4.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_5.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_6.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_7.png', '/content/DALLE3/dalle_brecahad_zipped.zip/brecahad_img_8.png']
computing captions...


100%|██████████| 9/9 [00:02<00:00,  3.41it/s]

FINISHED computing captions, saving...
Saved to: image_captions_dalle_brecahad_zipped.txt
Finished, saved to /content/drive/MyDrive/--studies_rest/sem9 heidelberg mine/Generative Neural Networks for the Sciences /datasets_generated_ready_models/DALLE3/image_captions_dalle_brecahad_zipped.txt



