Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow relative pathnames for importing/exporting annotations #487

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.imports
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Importing from other sources of COCO annotations can be very cumbersome, often relative pathnames are encouraged (in the coco-annotator too apparently), however importing or exporting removes that relationship.
A new attribute has been added to the ImageCatalouge to capture the relative path of an image at import time as well as scanning. if there are no sub-directories this variable will be equal to the file_name, however the search logic will use the relative pathname to discover the image.
When exporting the relative path will replace the 'file_name' entry in the 'image' dictionary, this behaviour can be switched off by setting the 'EXPORT_RELPATH' environment variable to false (in the compose file for the workers).
If the annotation only contains a bbox and no valid segmentation the segmentation is derived form the bbox. Previously this annotation would have been ignored.

Uwe Rosebrock July 2021

2 changes: 2 additions & 0 deletions backend/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class Config:
WORKER_CONNECTIONS = 1000

TESTING = os.getenv("TESTING", False)

EXPORT_RELPATH = os.getenv("EXPORT_RELPATH", True)

### Workers
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "amqp://user:password@messageq:5672//")
Expand Down
16 changes: 11 additions & 5 deletions backend/database/images.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
import os
import imantics as im


from pathlib import PurePath
from PIL import Image, ImageFile
from mongoengine import *

from .events import Event, SessionEvent
from .datasets import DatasetModel
from .annotations import AnnotationModel

from config import Config

ImageFile.LOAD_TRUNCATED_IMAGES = True


class ImageModel(DynamicDocument):

COCO_PROPERTIES = ["id", "width", "height", "file_name", "path", "license",\
"flickr_url", "coco_url", "date_captured", "dataset_id"]
"flickr_url", "coco_url", "date_captured", "dataset_id","relpath","num_annotations"]

# -- Contants
THUMBNAIL_DIRECTORY = '.thumbnail'
Expand All @@ -35,6 +35,7 @@ class ImageModel(DynamicDocument):

# Absolute path to image file
path = StringField(required=True, unique=True)
relpath = StringField(required=True, unique=True)
width = IntField(required=True)
height = IntField(required=True)
file_name = StringField()
Expand Down Expand Up @@ -74,16 +75,21 @@ def create_from_path(cls, path, dataset_id=None):

if dataset_id is not None:
image.dataset_id = dataset_id
dataset = DatasetModel.objects.get(id=dataset_id)
directory = os.path.join(Config.DATASET_DIRECTORY, dataset.name)
else:
# Get dataset name from path
folders = path.split('/')

i = folders.index("datasets")
dataset_name = folders[i+1]

directory = os.path.join(*folders[:i+2])
dataset = DatasetModel.objects(name=dataset_name).first()
if dataset is not None:
image.dataset_id = dataset.id


# UR added relpath
image.relpath = str(PurePath(image.path).relative_to(directory))
pil_image.close()

return image
Expand Down
4 changes: 4 additions & 0 deletions backend/database/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class TaskModel(DynamicDocument):
_update_every = 10
_progress_update = 0


def debug(self, string):
self._log(string, level="DEBUG")

def error(self, string):
self._log(string, level="ERROR")

Expand Down
43 changes: 32 additions & 11 deletions backend/workers/tasks/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@
from celery import shared_task
from ..socket import create_socket
from mongoengine import Q
from config import Config
from pathlib import PurePath


def bbox2seg(bbox):
return [bbox[0],bbox[1],bbox[0]+bbox[2],bbox[1],bbox[0]+bbox[2],bbox[1]+bbox[3],bbox[0],bbox[1]+bbox[3]]


@shared_task
def export_annotations(task_id, dataset_id, categories):

Expand Down Expand Up @@ -74,7 +80,10 @@ def export_annotations(task_id, dataset_id, categories):
total_images = db_images.count()
for image in db_images:
image = fix_ids(image)


if Config.EXPORT_RELPATH and 'relpath' in image:
image['file_name'] = image['relpath']

progress += 1
task.set_progress((progress / total_items) * 100, socket=socket)

Expand Down Expand Up @@ -103,7 +112,11 @@ def export_annotations(task_id, dataset_id, categories):

num_annotations += 1
coco.get('annotations').append(annotation)

'''
if num_annotations > 0:
image["num_annotations"]=num_annotations
image["annotated"]=True
'''
task.info(
f"Exporting {num_annotations} annotations for image {image.get('id')}")
coco.get('images').append(image)
Expand Down Expand Up @@ -135,7 +148,8 @@ def import_annotations(task_id, dataset_id, coco_json):

task = TaskModel.objects.get(id=task_id)
dataset = DatasetModel.objects.get(id=dataset_id)

# UR added relpath
directory = os.path.join(Config.DATASET_DIRECTORY, dataset.name)
task.update(status="PROGRESS")
socket = create_socket()

Expand Down Expand Up @@ -203,12 +217,12 @@ def import_annotations(task_id, dataset_id, coco_json):
for image in coco_images:
image_id = image.get('id')
image_filename = image.get('file_name')

# update progress
progress += 1
task.set_progress((progress / total_items) * 100, socket=socket)

image_model = images.filter(file_name__exact=image_filename).all()
# UR added relpath
image_model = images.filter(relpath=image_filename).all()

if len(image_model) == 0:
task.warning(f"Could not find image {image_filename}")
Expand Down Expand Up @@ -239,11 +253,11 @@ def import_annotations(task_id, dataset_id, coco_json):
progress += 1
task.set_progress((progress / total_items) * 100, socket=socket)

has_segmentation = len(segmentation) > 0
has_segmentation = (len(segmentation) > 0 or isbbox) and sum(bbox) > 1
has_keypoints = len(keypoints) > 0
if not has_segmentation and not has_keypoints:
task.warning(
f"Annotation {annotation.get('id')} has no segmentation or keypoints")
f"Annotation {annotation.get('id')} has no segmentation, bbox or keypoints")
continue

try:
Expand All @@ -259,7 +273,8 @@ def import_annotations(task_id, dataset_id, coco_json):
image_id=image_model.id,
category_id=category_model_id,
segmentation=segmentation,
keypoints=keypoints
keypoints=keypoints,
bbox = bbox
).first()

if annotation_model is None:
Expand All @@ -272,9 +287,14 @@ def import_annotations(task_id, dataset_id, coco_json):
annotation_model.metadata = annotation.get('metadata', {})

if has_segmentation:
if len(segmentation) < 1 or len(segmentation[0]) < 1: ## we have an empty segment with a bbox
task.info(f"Creating segment from bbox {bbox}")
segmentation = [bbox2seg(bbox)]
isbbox = True

annotation_model.segmentation = segmentation
annotation_model.area = area
annotation_model.bbox = bbox
annotation_model.area = area
annotation_model.bbox = bbox

if has_keypoints:
annotation_model.keypoints = keypoints
Expand All @@ -283,6 +303,7 @@ def import_annotations(task_id, dataset_id, coco_json):
annotation_model.save()

image_categories.append(category_id)

else:
annotation_model.update(deleted=False, isbbox=isbbox)
task.info(
Expand Down
28 changes: 17 additions & 11 deletions backend/workers/tasks/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from celery import shared_task
from ..socket import create_socket
from .thumbnails import thumbnail_generate_single_image

from pathlib import PurePath
import os


Expand All @@ -22,36 +22,42 @@ def scan_dataset(task_id, dataset_id):

directory = dataset.directory
toplevel = list(os.listdir(directory))
task.info(f"Scanning {directory}")
task.info(f"Scanning {directory} ")

count = 0
for root, dirs, files in os.walk(directory):

task.info(f"Scanning {directory} at {root}")
try:
youarehere = toplevel.index(root.split('/')[-1])
progress = int(((youarehere)/len(toplevel))*100)
if root in toplevel:
youarehere = toplevel.index(root.split('/')[-1])
progress = int(((youarehere)/len(toplevel))*100)
else:
progress = len(toplevel)/100
youarehere = root
task.set_progress(progress, socket=socket)
except:
pass
except Exception as ee:
task.warning(f"Could not set progress {youarehere} because of {ee}")

if root.split('/')[-1].startswith('.'):
task.debug(f"Ignoring hidden root: {root}")
continue

for file in files:
path = os.path.join(root, file)

relpath = str(PurePath(path).relative_to(directory))
if path.endswith(ImageModel.PATTERN):
db_image = ImageModel.objects(path=path).first()
db_image = ImageModel.objects(relpath=relpath).first()

if db_image is not None:
task.debug(f"File already exists: {relpath}")
continue

try:
ImageModel.create_from_path(path, dataset.id).save()
count += 1
task.info(f"New file found: {path}")
except:
task.warning(f"Could not read {path}")
except Exception as e:
task.warning(f"Could not read {path} because of {e}")

[thumbnail_generate_single_image.delay(image.id) for image in ImageModel.objects(regenerate_thumbnail=True).all()]

Expand Down
16 changes: 16 additions & 0 deletions build_local.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
if [[ "$1" == regi* ]];then

docker build -t docker-registry.it.csiro.au/trike/uwer/coco-annotator:python-env -f ./backend/Dockerfile .
docker build -t docker-registry.it.csiro.au/trike/uwer/annotator_webclient -f Dockerfile .
docker build -t docker-registry.it.csiro.au/trike/uwer/annotator_workers -f ./backend/workers/Dockerfile .

docker push docker-registry.it.csiro.au/trike/uwer/coco-annotator:python-env
docker push docker-registry.it.csiro.au/trike/uwer/annotator_webclient
docker push docker-registry.it.csiro.au/trike/uwer/annotator_workers
else
docker build -f ./backend/Dockerfile . -t uwer/coco-annotator:python-env
docker build . -t uwer/annotator_webserver
docker build -f ./backend/workers/Dockerfile . -t uwer/annotator_workers

fi
61 changes: 61 additions & 0 deletions docker-compose-local.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
version: "3"
volumes:
mongodb_data_uwer:
external: false

networks:
cocoannotator_uwer:
external: false

services:
webserver:
#image: uwer/coco-annotator
image: uwer/annotator_webserver
#:webserver-stable
container_name: annotator_webserver_uwer
restart: always
ports:
- "5050:5000"
environment:
- SECRET_KEY=RandomSecretKeyHere
- FILE_WATCHER=true
volumes:
- "./datasets:/datasets"
- "./models:/models"
depends_on:
- database
- workers
networks:
- cocoannotator_uwer
workers:
container_name: annotator_workers_uwer
image: uwer/annotator_workers
#:workers-stable
volumes:
- "./datasets:/datasets"
depends_on:
- messageq
- database
networks:
- cocoannotator_uwer
messageq:
image: rabbitmq:3
container_name: annotator_message_q_uwer
environment:
- RABBITMQ_DEFAULT_USER=user
- RABBITMQ_DEFAULT_PASS=password
- EXPORT_RELPATH=true
networks:
- cocoannotator_uwer
database:
image: mongo:4.0
container_name: annotator_mongodb_uwer
restart: always
environment:
- MONGO_DATA_DIR=/data/db
- MONGO_LOG_DIR=/dev/null
volumes:
- "mongodb_data_uwer:/data/db"
command: "mongod --smallfiles --logpath=/dev/null"
networks:
- cocoannotator_uwer