# Vehicles Segmentation 

In this notebook we will use the SAM model in order to enrich the the pNeuma Vision Dataset with masks and prepare a dataset for machine learning training tasks. 

In [41]:
from datasets import load_dataset
# Load the dataset in streaming mode
dataset = load_dataset("katospiegel/pneuma-vision-parquet", streaming=True)

Downloading readme:   0%|          | 0.00/595 [00:00<?, ?B/s]

In [68]:
from datasets import Features, Value, Image, Dataset
from PIL import Image as PILImage
import io
import numpy as np
import pandas as pd

import torch
from transformers import SamModel, SamProcessor


def crop_image(img, x, y, box_size=80):
    half_box_size = box_size // 2
    left = max(x - half_box_size, 0)
    upper = max(y - half_box_size, 0)
    right = left + box_size
    lower = upper + box_size
    cropped_img = img.crop((left, upper, right, lower))
    return cropped_img


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")


data_list = [] 

for data in dataset['train'].take(2):

    annotation_json = data["Annotation_json"]
    annotation_df = pd.read_json(annotation_json, orient='columns')

    for index, row in annotation_df.iterrows():
        x_img = row['x_img [px]']
        y_img = row['y_img [px]']

        # Crop Image
        box_size = 80 # Choose pair number
        raw_image = data["Image"] #PILImage.open(data["image"]).convert("RGB")

        cropped_image = crop_image(raw_image, x_img, y_img, box_size=80)

        # Image prediction
        inputs = processor(cropped_image, return_tensors="pt").to(device)
        image_embeddings = model.get_image_embeddings(inputs["pixel_values"])


        input_points = [[[box_size/2, box_size/2]]]
        ## Here is where we provide the input points
        inputs = processor(cropped_image, input_points=input_points, return_tensors="pt").to(device)
        # pop the pixel_values as they are not neded
        inputs.pop("pixel_values", None)
        inputs.update({"image_embeddings": image_embeddings})

        with torch.no_grad():
            outputs = model(**inputs)

        masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
        scores = outputs.iou_scores

        highest_score_mask = get_mask_with_highest_score(masks[0], scores)
        highest_score_mask_pil = get_mask_with_highest_score_as_pil(masks[0], scores)

        # Store the mask bytes in the new data dictionary
        feature = Image()
        new_data = {}
        new_data['Time [s]'] = row['Time [s]']
        new_data['id'] = row['ID']
        new_data['Type'] = row['Type']
        new_data['x_img [px]'] = row['x_img [px]']
        new_data['y_img [px]'] = row['y_img [px]']
        new_data['Angle_img [rad]'] = row['Angle_img [rad]']
        new_data['Frame'] = data['Frame']    
        new_data['Image'] = feature.encode_example(cropped_image)
        new_data['Mask'] = feature.encode_example(highest_score_mask_pil)
        data_list.append(new_data)

  annotation_df = pd.read_json(annotation_json, orient='columns')
  annotation_df = pd.read_json(annotation_json, orient='columns')


## Uploading of dataset to Hugging Face

In [69]:
features = Features({
    'Time [s]': Value(dtype='float32'),
    'id': Value(dtype='int32'),
    'Type': Value(dtype='string'),
    'x_img [px]': Value(dtype='int32'),
    'y_img [px]': Value(dtype='int32'),
    'Angle_img [rad]': Value(dtype='float32'),
    'Frame': Value(dtype='string'),
    'Image': Image(decode=True),
    'Mask': Image(decode=True)
})

data_dict = {key: [dic[key] for dic in data_list] for key in data_list[0]}

# Once all data points are prepared, create the new dataset from the list
new_dataset = Dataset.from_dict(data_dict, features=features)

In [71]:
new_dataset.push_to_hub("katospiegel/ordfts-hackathon-pneuma-vehicles-segmentation")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/273 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]



README.md:   0%|          | 0.00/30.0 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/katospiegel/ordfts-hackathon-pneuma-vehicles-segmentation/commit/ae21818ef53037f331ed069f6b7a99cad96cd996', commit_message='Upload dataset', commit_description='', oid='ae21818ef53037f331ed069f6b7a99cad96cd996', pr_url=None, pr_revision=None, pr_num=None)

## Previsualization of dataset

In [73]:
%%html

<iframe
  src="https://huggingface.co/datasets/katospiegel/ordfts-hackathon-pneuma-vehicles-segmentation/embed/viewer/default/train"
  frameborder="0"
  width="100%"
  height="560px"
></iframe>