In [None]:
!pip install -q transformers torch torchvision Pillow tqdm

LOAD IN S3 BUCKET CONTENTS

In [None]:
!pip install -q boto3

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/139.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m133.1/139.2 kB[0m [31m7.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m103.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.6/82.6 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q --upgrade awscli

In [None]:
!aws configure

LOAD IN PRETRAINED BLIP MODEL

In [None]:
!pip install -q transformers

In [None]:
from transformers import pipeline
import torch

# Check if a GPU is available
device = 0 if torch.cuda.is_available() else -1

# Load the model using the GPU if available
pipe = pipeline(
    "image-to-text",
    model="Gurveer05/blip-image-captioning-base-rscid-finetuned",
    device=device
)

INFERENCE ON IMAGES

In [None]:
import boto3
import pandas as pd
from botocore.exceptions import ClientError
from PIL import Image
from io import BytesIO
from tqdm import tqdm

def run_inference(pipe, bucket_name, subfolder, output_csv):
    """
    Downloads .tif images from an S3 bucket, runs inference,
    and writes the descriptions to a CSV.

    Args:
        pipe (pipeline): The pre-loaded Hugging Face pipeline for image captioning.
        bucket_name (str): The name of the S3 bucket.
        subfolder (str): The subfolder within the S3 bucket.
        output_csv (str): The path to the CSV file where results will be saved.
    """
    # Initialize the S3 client
    s3_client = boto3.client('s3')

    continuation_token = None
    results = []

    print(f"Fetching list of .tif files from '{bucket_name}/{subfolder}'...")
    all_keys = []

    try:
        # Step 1: List all .tif files with pagination
        while True:
            if continuation_token:
                response = s3_client.list_objects_v2(
                    Bucket=bucket_name,
                    Prefix=subfolder,
                    ContinuationToken=continuation_token
                )
            else:
                response = s3_client.list_objects_v2(
                    Bucket=bucket_name,
                    Prefix=subfolder
                )

            # Check if any objects were found
            if 'Contents' not in response:
                print("No images found in the specified subfolder.")
                break

            # Add .tif files to the list
            all_keys.extend([content['Key'] for content in response['Contents'] if content['Key'].endswith('.tif')])

            # Check if there are more objects to fetch
            if response.get('IsTruncated'):
                continuation_token = response['NextContinuationToken']
            else:
                break

    except ClientError as e:
        print(f"Error accessing bucket: {e}")
        return

    print(f"Total .tif images found: {len(all_keys)}")

    # Step 2: Download, convert, run inference, and store results
    for key in tqdm(all_keys, desc="Processing images", unit="image"):
        try:
            # Download the image directly from S3
            img_response = s3_client.get_object(Bucket=bucket_name, Key=key)
            image_data = img_response['Body'].read()

            # Open the image using PIL and convert to RGB if necessary
            with Image.open(BytesIO(image_data)) as image:
                image = image.convert("RGB")

                # Run inference using the BLIP model
                caption = pipe(image)[0]['generated_text']

                # Append the result to the list
                results.append({'image_name': key, 'description': caption})

        except Exception as e:
            print(f"Error processing {key}: {e}")

    # Step 3: Write the results to a CSV file
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"Descriptions saved to {output_csv}")


In [None]:
run_inference(
    pipe=pipe,
    bucket_name='_',
    subfolder='_',
    output_csv='/content/image_descriptions.csv'
)

Fetching list of .tif files from 'ibm-rsc/tif_patches'...
Total .tif images found: 12006


Processing images:   0%|          | 10/12006 [00:06<1:30:07,  2.22image/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing images:  73%|███████▎  | 8732/12006 [54:22<13:39,  4.00image/s]

Error processing tif_patches/N-33-60-D-c-4-2_4096_1024.tif: -2
Error processing tif_patches/N-33-60-D-c-4-2_4096_1536.tif: -2


Processing images: 100%|██████████| 12006/12006 [1:13:45<00:00,  2.71image/s]

Descriptions saved to /content/image_descriptions.csv



