## Image Segmentation


In [1]:
import cv2
import numpy as np
import torch
from torchvision import models, transforms

def segment_image(image_path):
    # Load the pre-trained Mask R-CNN model
    model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    model.eval()

    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_tensor = transform(image_rgb).unsqueeze(0)

    # Perform segmentation
    with torch.no_grad():
        predictions = model(image_tensor)

    return predictions, image

ModuleNotFoundError: No module named 'cv2'

In [None]:
!pip install torch torchvision torchaudio transformers pytesseract opencv-python matplotlib pillow



## objecyt extraction & storage

In [None]:
import os
import cv2

def extract_and_save_objects(predictions, original_image, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    objects_info = []
    for i, mask in enumerate(predictions[0]['masks']):
        if mask.shape[0] > 0:  # Ensure mask is not empty
            mask_np = mask[0].mul(255).byte().cpu().numpy()
            object_image = cv2.bitwise_and(original_image, original_image, mask=mask_np)
            object_path = os.path.join(output_dir, f'object_{i}.png')
            cv2.imwrite(object_path, object_image)
            objects_info.append({
                'id': i,
                'path': object_path
            })
    
    return objects_info

## Object identification

In [None]:
from transformers import pipeline

def identify_objects(object_images):
    classifier = pipeline("image-classification")
    descriptions = []

    for img_info in object_images:
        try:
            result = classifier(img_info['path'])
            descriptions.append({
                'id': img_info['id'],
                'description': result[0]['label'] if result else "Unknown"
            })
        except Exception as e:
            print(f"Error processing image {img_info['path']}: {str(e)}")
            descriptions.append({
                'id': img_info['id'],
                'description': "Error in processing"
            })

    return descriptions

In [None]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113


## Text/Data Extraction from Objects

In [None]:
import cv2
import pytesseract
from PIL import Image

def extract_text_from_image(image_path):
    try:
        image = Image.open(image_path)
        text = pytesseract.image_to_string(image)
        return text.strip()
    except Exception as e:
        print(f"Error extracting text from {image_path}: {str(e)}")
        return ""

## Summarize Object Attributes

In [None]:
   from transformers import pipeline

def summarize_attributes(descriptions):
    summarizer = pipeline("summarization")
    summaries = []

    for desc in descriptions:
        try:
            input_text = desc['description'][:1024]  # Limit input length
            summary = summarizer(input_text, max_length=50, min_length=10, do_sample=False)
            summaries.append({
                'id': desc['id'],
                'summary': summary[0]['summary_text']
            })
        except Exception as e:
            print(f"Error summarizing description: {str(e)}")
            summaries.append({
                'id': desc['id'],
                'summary': "Error in summarization"
            })

    return summaries

## Data Mapping

In [None]:
import json

def map_data(objects_info, descriptions, extracted_texts, summaries):
    mapped_data = []
    
    for obj in objects_info:
        obj_id = obj['id']
        mapped_obj = {
            "id": obj_id,
            "path": obj['path'],
            "description": next((d['description'] for d in descriptions if d['id'] == obj_id), ""),
            "extracted_text": next((t for t in extracted_texts if t['id'] == obj_id), {}).get('text', ""),
            "summary": next((s['summary'] for s in summaries if s['id'] == obj_id), "")
        }
        mapped_data.append(mapped_obj)
    
    try:
        with open('mapped_data.json', 'w') as f:
            json.dump(mapped_data, f, indent=2)
    except Exception as e:
        print(f"Error writing mapped data to file: {str(e)}")
    
    return mapped_data

## Output Generation

In [None]:
import cv2
import matplotlib.pyplot as plt

def generate_output_image(original_image_path, mapped_data):
    try:
        original_image = cv2.imread(original_image_path)
        original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(original_image_rgb)
        
        for obj in mapped_data:
            x, y = 10, 10  # You may need to adjust this based on actual object positions
            plt.text(x, y, f"ID: {obj['id']}\n{obj['description'][:20]}...", 
                     color='red', fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
            y += 40  # Move text position for next object
        
        plt.title("Annotated Image with Object Descriptions")
        plt.axis('off')
        plt.tight_layout()
        plt.savefig('output_with_annotations.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        print("Output image generated successfully.")
    except Exception as e:
        print(f"Error generating output image: {str(e)}")


##  main script integrates of the AI pipeline

In [None]:
import os

# Import all the necessary functions from the modules we created

def segment_image(image_path):
    # Step 1: Image Segmentation
    predictions, original_image = segment_image(image_path)
    
    # Step 2: Object Extraction and Storage
    output_dir = 'segmented_objects'
    objects_info = extract_and_save_objects(predictions, original_image, output_dir)
    
    # Step 3: Object Identification
    descriptions = identify_objects(objects_info)
    
    # Step 4: Text/Data Extraction from Objects
    extracted_texts = [{'id': obj['id'], 'text': extract_text_from_image(obj['path'])} for obj in objects_info]
    
    # Step 5: Summarize Object Attributes
    summaries = summarize_attributes(descriptions)
    
    # Step 6: Data Mapping
    mapped_data = map_data(objects_info, descriptions, extracted_texts, summaries)
    
    # Step 7: Output Generation
    generate_output_image(image_path, mapped_data)
    
    print("Pipeline completed successfully.")

if __name__ == "__main__":
    image_path = "path/to/your/image.jpg"
    segment_image(image_path)

In [None]:
nvcc --version


In [None]:
import torch

In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
import torch
print(torch.version.cuda)
print(torch.cuda.is_available())