# Requirements

In [2]:
!pip install fiftyone
!pip install roboflow
import fiftyone as fo
from roboflow import Roboflow

Collecting fiftyone
  Downloading fiftyone-1.3.2-py3-none-any.whl.metadata (24 kB)
Collecting aiofiles (from fiftyone)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.6.0-py3-none-any.whl.metadata (16 kB)
Collecting boto3 (from fiftyone)
  Downloading boto3-1.37.13-py3-none-any.whl.metadata (6.7 kB)
Collecting dacite<1.8.0,>=1.6.0 (from fiftyone)
  Downloading dacite-1.7.0-py3-none-any.whl.metadata (14 kB)
Collecting ftfy (from fiftyone)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting hypercorn>=0.13.2 (from fiftyone)
  Downloading hypercorn-0.17.3-py3-none-any.whl.metadata (5.4 kB)
Collecting kaleido!=0.2.1.post1 (from fiftyone)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting mongoengine~=0.29.1 (from fiftyone)
  Downloading mongoengine-0.29.1-py3-none-any.whl.metadata (6.7 kB)
Collecting motor~=3.6.0 (from fiftyone)
  Downloading motor-3

# Import & Load the Data

In [None]:
rf = Roboflow(api_key="NpeBlgiGKRnkLj4kIL2C")
project = rf.workspace("safewalkbd").project("safewalkbd-l8jbn")
version = project.version(9)
dataset = version.download("coco")

loading Roboflow workspace...
loading Roboflow project...


In [None]:
import os
import shutil

dataset_dir = "/content/SafeWalkBD-9"

for split in ["train", "valid", "test"]:
    split_path = os.path.join(dataset_dir, split)
    data_path = os.path.join(split_path, "data")

    # Create 'data' folder if it doesn't exist
    if not os.path.exists(data_path):
        os.makedirs(data_path)

    # Move all image files into 'data/' subfolder
    for file in os.listdir(split_path):
        if file.endswith(".jpg") or file.endswith(".png"):  # Adjust for other image formats if needed
            shutil.move(os.path.join(split_path, file), os.path.join(data_path, file))

print("Folder structure updated successfully.")

Folder structure updated successfully.


In [None]:
# Load the training dataset
train_dataset = fo.Dataset.from_dir(
    dataset_dir=f"{dataset_dir}/train",
    dataset_type=fo.types.COCODetectionDataset,
    labels_path=f"{dataset_dir}/train/_annotations.coco.json"
)

# Load the validation dataset
valid_dataset = fo.Dataset.from_dir(
    dataset_dir=f"{dataset_dir}/valid",
    dataset_type=fo.types.COCODetectionDataset,
    labels_path=f"{dataset_dir}/valid/_annotations.coco.json"
)

# Load the test dataset
test_dataset = fo.Dataset.from_dir(
    dataset_dir=f"{dataset_dir}/test",
    dataset_type=fo.types.COCODetectionDataset,
    labels_path=f"{dataset_dir}/test/_annotations.coco.json"
)

 100% |███████████████| 7193/7193 [58.8s elapsed, 0s remaining, 115.8 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 7193/7193 [58.8s elapsed, 0s remaining, 115.8 samples/s]      


 100% |███████████████| 1989/1989 [14.8s elapsed, 0s remaining, 162.5 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1989/1989 [14.8s elapsed, 0s remaining, 162.5 samples/s]      


  92% |█████████████\-|  973/1059 [9.3s elapsed, 840.7ms remaining, 102.1 samples/s] 

In [None]:
# Launch FiftyOne app for visualization
session = fo.launch_app(train_dataset)

# Text cleaning for irreleveant images

In [None]:
import cv2
import numpy as np
import os
from PIL import Image
import shutil

def is_street_view(image_path, road_threshold=0.2, sidewalk_threshold=0.1):
    """
    Checks if an image is a street view based on the presence of roads and sidewalks.

    Args:
        image_path (str): Path to the image file.
        road_threshold (float): Minimum proportion of road pixels to consider it a street view.
        sidewalk_threshold (float): Minimum proportion of sidewalk pixels to consider it a street view.

    Returns:
        bool: True if it's a street view, False otherwise.
    """

    try:
        img = cv2.imread(image_path)
        if img is None:
            return False
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        lower_road = np.array([0, 0, 0])
        upper_road = np.array([180, 50, 100])

        lower_sidewalk = np.array([0, 0, 50])
        upper_sidewalk = np.array([180, 30, 200])

        road_mask = cv2.inRange(img_hsv, lower_road, upper_road)
        sidewalk_mask = cv2.inRange(img_hsv, lower_sidewalk, upper_sidewalk)

        road_pixels = np.count_nonzero(road_mask)
        sidewalk_pixels = np.count_nonzero(sidewalk_mask)
        total_pixels = img.shape[0] * img.shape[1]

        road_proportion = road_pixels / total_pixels
        sidewalk_proportion = sidewalk_pixels / total_pixels

        return road_proportion > road_threshold or sidewalk_proportion > sidewalk_threshold

    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return False

def filter_street_view_dataset(input_root_dir, output_root_dir):
    """
    Filters street view images within a dataset structure (train, test, valid).

    Args:
        input_root_dir (str): Root directory containing train, test, valid subdirectories.
        output_root_dir (str): Root directory to store filtered street view images.
    """

    for subdir in ["train/data", "test/data", "valid/data"]:
        input_subdir = os.path.join(input_root_dir, subdir)
        output_subdir = os.path.join(output_root_dir, subdir)

        if not os.path.exists(input_subdir):
            print(f"Warning: Subdirectory '{input_subdir}' not found.")
            continue

        if not os.path.exists(output_subdir):
            os.makedirs(output_subdir)

        for filename in os.listdir(input_subdir):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                image_path = os.path.join(input_subdir, filename)
                if is_street_view(image_path):
                    output_path = os.path.join(output_subdir, filename)
                    try:
                        Image.open(image_path).save(output_path)
                        print(f"Copied {filename} from {subdir} to {output_subdir}")
                    except Exception as e:
                        print(f"Error copying {filename}: {e}")
def clean_non_street_view(input_root_dir):
    """
    Deletes non-street view images from the input directories.
    """
    for subdir in ["train/data", "test/data", "valid/data"]:
        input_subdir = os.path.join(input_root_dir, subdir)

        if not os.path.exists(input_subdir):
            print(f"Warning: Subdirectory '{input_subdir}' not found.")
            continue

        for filename in os.listdir(input_subdir):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                image_path = os.path.join(input_subdir, filename)
                if not is_street_view(image_path):
                    try:
                        os.remove(image_path)
                        print(f"Deleted {filename} from {subdir}")
                    except Exception as e:
                        print(f"Error deleting {filename}: {e}")



In [None]:
# Example usage:
input_root_directory = dataset_dir  # Replace with your input root directory
output_root_directory = "/content/filtered_dataset/" #Replace with your desired output directory.

#Copy the street view images to a new location.
filter_street_view_dataset(input_root_directory, output_root_directory)

#Or, delete the non street view images from the existing directory.
#clean_non_street_view(input_root_directory)

Copied 336_png_jpg.rf.3ec2792295d97be37e1b489285808bfc.jpg from train to /content/filtered_dataset/train
Copied images180_jpg.rf.df158d2ae784d3eb3bb603690f8da689.jpg from train to /content/filtered_dataset/train
Copied 461_png_jpg.rf.3e4e58e34144c7c3c238fb78f4cde012.jpg from train to /content/filtered_dataset/train
Copied a2_156_jpg.rf.2663a49db1a3888000cb4e4c9ebdbbac.jpg from train to /content/filtered_dataset/train
Copied T111076_jpg.rf.9d567bc819ae9b97df18f4c317feecca.jpg from train to /content/filtered_dataset/train
Copied 533_png_jpg.rf.7c01b63feb647c58382981f5b848706d.jpg from train to /content/filtered_dataset/train
Copied Autorickshaw-1-_jpg.rf.f16001a595fcd8b27b0150936495e0de.jpg from train to /content/filtered_dataset/train
Copied T112435_jpg.rf.64ffd0ac7423385bdde31e535520ce7e.jpg from train to /content/filtered_dataset/train
Copied 2023-Bangladeshi-Rail-crossing-syestem-Bangladesh-railway-High-speed-train-in-Bangladesh_129_jpg.rf.9065cd17f4b8fed118bb8f5b58a6eec9.jpg from tr

In [1]:
import json

# Load annotation JSON file
with open(f"{dataset_dir}/train/_annotations.coco.json", "r") as f:
    data = json.load(f)

# Print top-level keys
print(data.keys())

# Extract category names
category_names = [category["name"] for category in data["categories"] if category["name"] != 'car-vehicle-dog-animal-curb-wall']

# Print the list of category names
print(category_names)

NameError: name 'dataset_dir' is not defined

In [None]:
import fiftyone.brain as fob
import fiftyone.zoo as foz

In [None]:
model = foz.load_zoo_model(
    "zero-shot-detection-transformer-torch",
    name_or_path="google/owlvit-base-patch32",
    classes=category_names,
)