In [18]:
from datasets import load_dataset

ds = load_dataset("detection-datasets/fashionpedia")

In [19]:
# !pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [20]:
# from PIL import Image

# # Replace 'path/to/your/image.jpg' with the actual path to your image
# image_path = 'test2.png'
# image = Image.open(image_path)

In [21]:
from transformers import AutoImageProcessor, AutoModelForObjectDetection

# Load the processor and model
processor = AutoImageProcessor.from_pretrained("valentinafeve/yolos-fashionpedia")
model = AutoModelForObjectDetection.from_pretrained("valentinafeve/yolos-fashionpedia")

In [22]:
from PIL import Image
import numpy as np

# Replace 'Chippewa.png' with the actual path to your image
image_path = 'test2.png'

# Load the image and ensure it's in RGB format
image = Image.open(image_path).convert('RGB')

# Convert the PIL Image to a NumPy array
image_array = np.array(image)

# Debugging: Print the shape of the image array
print('Image shape:', image_array.shape)  # Should be (height, width, 3)

# Preprocess the image
inputs = processor(images=image_array, return_tensors="pt")

Image shape: (1592, 896, 3)


In [23]:
# Run inference
outputs = model(**inputs)

In [24]:
# Post-process the outputs to get object detection results
results = processor.post_process_object_detection(
    outputs, threshold=0.5, target_sizes=[image.size[::-1]]
)
result = results[0]  # Since we have only one image

In [25]:
import json

# Load category mappings
with open('/Users/joshstrupp/Documents/Working/Educational/MSDV/ms1-final/fashionpedia-api/data/demo/category_attributes_descriptions.json', 'r') as f:
    category_data = json.load(f)

# Create mappings from category IDs to names and supercategories
categories = category_data['categories']
category_id_to_name = {category['id']: category['name'] for category in categories}
category_id_to_supercategory = {category['id']: category['supercategory'] for category in categories}

# Create a mapping from label IDs to category names (from the model)
label_mappings = model.config.id2label  # This maps label IDs to category names

# Create a mapping from category names to supercategories
name_to_supercategory = {category['name']: category['supercategory'] for category in categories}

In [26]:
# Prepare the objects data
objects = {
    'bbox_id': [],
    'category': [],
    'bbox': [],
    'area': [],
    'supercategory': [],
    'name': []
}

for idx in range(len(result['scores'])):
    score = result['scores'][idx].item()
    label_id = result['labels'][idx].item()
    box = result['boxes'][idx].tolist()  # [xmin, ymin, xmax, ymax]

    # Compute area
    x_min, y_min, x_max, y_max = box
    area = (x_max - x_min) * (y_max - y_min)

    # Get category name from label ID
    name = label_mappings.get(label_id, 'Unknown')
    supercategory = name_to_supercategory.get(name, 'Unknown')

    # Map category name back to category ID from the dataset if needed
    category_id = next((id for id, n in category_id_to_name.items() if n == name), label_id)

    # Append to objects
    objects['bbox_id'].append(idx)
    objects['category'].append(category_id)
    objects['bbox'].append(box)
    objects['area'].append(area)
    objects['name'].append(name)
    objects['supercategory'].append(supercategory)

# Get image dimensions
width, height = image.size

# Compile the final output
output = {
    'image_id': 0,  # Assign an ID to your image
    'image': image,
    'width': width,
    'height': height,
    'objects': objects
}

# Print the analysis
print("Image ID:", output['image_id'])
print("Width:", output['width'])
print("Height:", output['height'])
print("Objects Detected:")
for i in range(len(objects['bbox_id'])):
    print(f"  Object {i+1}:")
    print(f"    Bounding Box ID: {objects['bbox_id'][i]}")
    print(f"    Category ID: {objects['category'][i]}")
    print(f"    Name: {objects['name'][i]}")
    print(f"    Supercategory: {objects['supercategory'][i]}")
    print(f"    Bounding Box: {objects['bbox'][i]}")
    print(f"    Area: {objects['area'][i]}")

Image ID: 0
Width: 896
Height: 1592
Objects Detected:
  Object 1:
    Bounding Box ID: 0
    Category ID: 33
    Name: neckline
    Supercategory: garment parts
    Bounding Box: [376.6819152832031, 381.1187744140625, 561.2196044921875, 485.8564453125]
    Area: 19328.047760728747
  Object 2:
    Bounding Box ID: 1
    Category ID: 15
    Name: headband, head covering, hair accessory
    Supercategory: head
    Bounding Box: [283.3788757324219, 126.98322296142578, 531.8199462890625, 313.0384216308594]
    Area: 46223.75274006254
  Object 3:
    Bounding Box ID: 2
    Category ID: 10
    Name: dress
    Supercategory: wholebody
    Bounding Box: [14.879779815673828, 390.08843994140625, 736.210205078125, 1500.7442626953125]
    Area: 801149.8369472928
  Object 4:
    Bounding Box ID: 3
    Category ID: 31
    Name: sleeve
    Supercategory: garment parts
    Bounding Box: [552.6659545898438, 431.0205078125, 715.664794921875, 667.8580322265625]
    Area: 38604.24182660133


In [35]:
def analyze_image(image_path, category_json_path='category_attributes_descriptions.json'):
    """
    Analyzes an image using the Fashionpedia model and returns a DataFrame with the outputs.

    Parameters:
    - image_path (str): The path to the image file.
    - category_json_path (str): The path to the category attributes JSON file.

    Returns:
    - df (pandas.DataFrame): A DataFrame containing the analysis results.
    """
    from PIL import Image
    from transformers import AutoImageProcessor, AutoModelForObjectDetection
    import pandas as pd
    import torch
    import json
    import numpy as np

    # 1. Load your image
    image = Image.open(image_path)

    # Ensure image is in RGB format
    if image.mode != 'RGB':
        image = image.convert('RGB')

    # Convert the image to a NumPy array
    image_array = np.array(image)

    # 2. Load the processor and model
    processor = AutoImageProcessor.from_pretrained("valentinafeve/yolos-fashionpedia")
    model = AutoModelForObjectDetection.from_pretrained("valentinafeve/yolos-fashionpedia")

    # 3. Prepare the image
    inputs = processor(images=image_array, return_tensors="pt")

    # 4. Run inference
    outputs = model(**inputs)

    # 5. Process the outputs
    results = processor.post_process_object_detection(
        outputs, threshold=0.8, target_sizes=[image.size[::-1]]
    )
    result = results[0]

    # 6. Map category IDs to names and supercategories
    with open(category_json_path, 'r') as f:
        category_data = json.load(f)
    categories = category_data['categories']
    category_id_to_name = {category['id']: category['name'] for category in categories}
    category_id_to_supercategory = {category['id']: category['supercategory'] for category in categories}

    label_mappings = model.config.id2label
    name_to_supercategory = {category['name']: category['supercategory'] for category in categories}

    # 7. Prepare data for DataFrame
    data = []
    for idx in range(len(result['scores'])):
        score = result['scores'][idx].item()
        label_id = result['labels'][idx].item()
        box = result['boxes'][idx].tolist()
        x_min, y_min, x_max, y_max = box
        area = (x_max - x_min) * (y_max - y_min)

        name = label_mappings.get(label_id, 'Unknown')
        supercategory = name_to_supercategory.get(name, 'Unknown')
        category_id = next((id for id, n in category_id_to_name.items() if n == name), label_id)

        data.append({
            'bbox_id': idx,
            'category_id': category_id,
            'name': name,
            'supercategory': supercategory,
            'bbox': box,
            'area': area,
            'score': score
        })

    # Create DataFrame
    df = pd.DataFrame(data)

    return df

# Replace 'path/to/your/image.jpg' with the actual path to your image
image_path = 'test2.png'

# Optionally, specify the path to your category attributes JSON file
category_json_path = '../category_attributes_descriptions.json'

# Call the function
df = analyze_image(image_path, category_json_path)

In [36]:
import os

def save_cropped_images(image_path, df, output_dir='cropped_images'):
    """
    Crops the original image according to the bounding boxes and saves the cropped images.

    Parameters:
    - image_path (str): Path to the original image.
    - df (pandas.DataFrame): DataFrame containing the detection results.
    - output_dir (str): Directory where the cropped images will be saved.
    """
    from PIL import Image
    import os

    # Load the image
    image = Image.open(image_path)

    # Ensure image is in RGB format
    if image.mode != 'RGB':
        image = image.convert('RGB')

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Get image dimensions
    width, height = image.size

    # Loop over each detection
    for idx, row in df.iterrows():
        bbox = row['bbox']
        name = row['name']

        # Extract bounding box coordinates
        x_min, y_min, x_max, y_max = map(int, bbox)

        # Clip coordinates to image bounds
        x_min = max(0, min(width, x_min))
        y_min = max(0, min(height, y_min))
        x_max = max(0, min(width, x_max))
        y_max = max(0, min(height, y_max))

        # Check for valid crop
        if x_max > x_min and y_max > y_min:
            # Crop the image
            cropped_image = image.crop((x_min, y_min, x_max, y_max))

            # Ensure cropped image is in RGB mode
            if cropped_image.mode != 'RGB':
                cropped_image = cropped_image.convert('RGB')

            # Create a unique filename
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            filename = f"{image_name}_{name.replace(' ', '_')}_{idx}.jpg"
            output_path = os.path.join(output_dir, filename)

            try:
                # Save the cropped image
                cropped_image.save(output_path, format='JPEG')
                print(f"Saved cropped image: {output_path}")
            except Exception as e:
                print(f"Failed to save {output_path}: {e}")
        else:
            print(f"Skipping invalid crop for detection {idx}")

# Assume df is the DataFrame obtained from analyze_image
image_path = 'Angola Costume.png'  # Replace with your image path

# Check if output directory exists, if not create it
if not os.path.exists('cropped_images'):
    os.makedirs('cropped_images')

# Call the function to save cropped images, ensuring unique filenames
existing_files = set(os.listdir('cropped_images'))
counter = 1
while any(f"{os.path.splitext(os.path.basename(image_path))[0]}_{counter}" in f for f in existing_files):
    counter += 1
save_cropped_images(image_path, df, output_dir='cropped_images')

Saved cropped image: cropped_images/Angola Costume_neckline_0.jpg
Saved cropped image: cropped_images/Angola Costume_dress_1.jpg
Saved cropped image: cropped_images/Angola Costume_sleeve_2.jpg
