In [3]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
from transformers import pipeline
from concurrent.futures import ThreadPoolExecutor, as_completed
import torch
from tqdm import tqdm

# Check if CUDA is available and set the device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Initialize models (do this only once)
image_to_text = pipeline("image-to-text", model="microsoft/git-base-textcaps", device=device)
ner = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", device=device)

def load_dataset(file_path):
    return pd.read_csv(file_path)

def download_image(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return Image.open(BytesIO(response.content))
    except requests.exceptions.RequestException as e:
        print(f"Error downloading image from {url}: {str(e)}")
        return None

def extract_text_and_entity(image, entity_name):
    if image is None:
        return None
    
    # Get the caption (extracted text) from the image
    result = image_to_text(image)
    extracted_text = result[0]['generated_text']
    
    # Extract entities
    entities = ner(extracted_text)
    
    # Extract relevant entities (quantities, measurements, etc.)
    relevant_entities = [entity for entity in entities if entity['entity'] in ['QUANTITY', 'CARDINAL', 'PERCENT']]
    
    # Simple heuristic: find the entity closest to the mention of the target attribute
    target_value = None
    min_distance = float('inf')
    
    for entity in relevant_entities:
        distance = abs(extracted_text.lower().find(entity_name.lower()) - extracted_text.lower().find(entity['word'].lower()))
        if distance < min_distance:
            min_distance = distance
            target_value = entity['word']
    
    return target_value

def process_image(row):
    try:
        img = download_image(row['image_link'])
        entity_value = extract_text_and_entity(img, row['entity_name'])
        return entity_value
    except Exception as e:
        print(f"Error processing {row['image_link']}: {str(e)}")
        return None

def process_batch(batch):
    return [process_image(row) for _, row in batch.iterrows()]

def main(file_path, batch_size=16, max_workers=4):
    df = load_dataset(file_path)
    
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for i in range(0, len(df), batch_size):
            batch = df.iloc[i:i+batch_size]
            futures.append(executor.submit(process_batch, batch))
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing batches"):
            results.extend(future.result())
    
    df['extracted_value'] = results
    return df

# Example usage
file_path = '/kaggle/input/sagnik-sarangi2/sample.csv'  # Update this to the actual path of your CSV file
result_df = main(file_path)
print(result_df[['image_link', 'entity_name', 'entity_value', 'extracted_value']])

Using device: cuda


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Processing batches: 100%|██████████| 13/13 [04:06<00:00, 18.94s/it]

                                            image_link  \
0    https://m.media-amazon.com/images/I/61I9XdN6OF...   
1    https://m.media-amazon.com/images/I/71gSRbyXmo...   
2    https://m.media-amazon.com/images/I/61BZ4zrjZX...   
3    https://m.media-amazon.com/images/I/612mrlqiI4...   
4    https://m.media-amazon.com/images/I/617Tl40LOX...   
..                                                 ...   
194  https://m.media-amazon.com/images/I/718M5ODio0...   
195  https://m.media-amazon.com/images/I/61lpwH0qHb...   
196  https://m.media-amazon.com/images/I/712sZRVe98...   
197  https://m.media-amazon.com/images/I/71d+dz7ogk...   
198  https://m.media-amazon.com/images/I/51WSQa1ygM...   

                       entity_name    entity_value extracted_value  
0                      item_weight      500.0 gram            None  
1                      item_volume         1.0 cup            None  
2                      item_weight      0.709 gram            None  
3                      item


