From 2339d22a95f500cd453ff59bbc3c4df01893c765 Mon Sep 17 00:00:00 2001
From: Chris Rapson <>
Date: Fri, 7 Jul 2023 09:04:28 +1200
Subject: [PATCH 1/5] Add capability to convert keypoints from COCO to YOLOv5
 format

---
 pylabel/exporter.py | 78 ++++++++++++++++++++++++++++++++++++++-------
 pylabel/shared.py   |  1 +
 2 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/pylabel/exporter.py b/pylabel/exporter.py
index 1c838da..b3d4a04 100644
--- a/pylabel/exporter.py
+++ b/pylabel/exporter.py
@@ -403,6 +403,41 @@ def voc_xml_file_creation(
 
         return output_file_paths
 
+    @staticmethod
+    def _df_to_csv(
+        df: pd.DataFrame,
+        file_path: str,
+        sep: str = " ",
+        float_format: str = "%0.4f",
+        columns: List[str] = None
+    ):
+        """
+        with the pandas to_csv method the output for a list (e.g. the keypoints) must include a quote character or an
+        escape character. To avoid both of those, we need a custom function
+        """
+        if columns is None:
+            columns = df.columns
+
+        def _format_float(fl: float, fl_format: str):
+            if np.isnan(fl):
+                return ""
+            else:
+                return fl_format % fl
+
+        with open(file_path, "w") as f:
+            for _, row in df[columns].iterrows():
+                formatted_row = []
+                for x in row:
+                    if isinstance(x, float):
+                        formatted_row.append(_format_float(x, float_format))
+                    elif isinstance(x, list):
+                        formatted_row.extend(
+                            [_format_float(y, float_format) if isinstance(y, float) else str(y) for y in x]
+                        )
+                    else:
+                        formatted_row.append(str(x))
+                f.write(sep.join(formatted_row) + '\n')
+
     def ExportToYoloV5(
         self,
         output_path="training/labels",
@@ -528,6 +563,25 @@ def ExportToYoloV5(
                 yolo_dataset["ann_bbox_height"] / yolo_dataset["img_height"]
             )
 
+            keypoints_yolo = [[] for _ in range(len(yolo_dataset.index))]
+            for img_ix, row in yolo_dataset.iterrows():
+                img_width = row["img_width"]
+                img_height = row["img_height"]
+                keypoints_coco = row["ann_keypoints"]
+                if keypoints_coco:
+                    for bbox_ix, kp in enumerate(keypoints_coco):
+                        if bbox_ix % 3 == 0:
+                            # x coordinate
+                            keypoints_yolo[img_ix].append(kp / img_width)
+                        elif bbox_ix % 3 == 1:
+                            # y coordinate
+                            keypoints_yolo[img_ix].append(kp / img_height)
+                        else:
+                            # visibility
+                            keypoints_yolo[img_ix].append(kp)
+            yolo_dataset["keypoints_scaled_as_string"] = keypoints_yolo
+
+
         # Create folders to store annotations
         if output_path == None:
             dest_folder = PurePath(
@@ -562,19 +616,21 @@ def ExportToYoloV5(
 
             # If segmentation = false then output bounding boxes
             if segmentation == False:
-                df_single_img_annots.to_csv(
-                    destination,
-                    index=False,
-                    header=False,
+                columns = [
+                    "cat_id",
+                    "center_x_scaled",
+                    "center_y_scaled",
+                    "width_scaled",
+                    "height_scaled",
+                    "keypoints_scaled_as_string"
+                ]
+
+                self._df_to_csv(
+                    df=df_single_img_annots,
+                    file_path=destination,
                     sep=" ",
                     float_format="%.4f",
-                    columns=[
-                        "cat_id",
-                        "center_x_scaled",
-                        "center_y_scaled",
-                        "width_scaled",
-                        "height_scaled",
-                    ],
+                    columns=columns
                 )
 
             # If segmentation = true then output the segmentation mask
diff --git a/pylabel/shared.py b/pylabel/shared.py
index 04a1c5c..cabcb0e 100644
--- a/pylabel/shared.py
+++ b/pylabel/shared.py
@@ -24,6 +24,7 @@
     "ann_area",
     "ann_segmentation",
     "ann_iscrowd",
+    "ann_keypoints",
     "ann_pose",
     "ann_truncated",
     "ann_difficult",

From 4a12a6c765fbee5fa593228c34c1c0484a5f6397 Mon Sep 17 00:00:00 2001
From: Chris Rapson <>
Date: Wed, 12 Jul 2023 14:42:02 +1200
Subject: [PATCH 2/5] Guard keypoints export to YOLO with a flag, and add
 documentation

---
 pylabel/exporter.py | 55 ++++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/pylabel/exporter.py b/pylabel/exporter.py
index b3d4a04..34a6103 100644
--- a/pylabel/exporter.py
+++ b/pylabel/exporter.py
@@ -446,6 +446,7 @@ def ExportToYoloV5(
         use_splits=False,
         cat_id_index=None,
         segmentation=False,
+        keypoints=False,
     ):
         """Writes annotation files to disk in YOLOv5 format and returns the paths to files.
 
@@ -470,13 +471,22 @@ def ExportToYoloV5(
                 /train. If a YAML file is specificied then the YAML file will use the splits to specify the folders user for the
                 train, val, and test datasets.
             cat_id_index (int):
-                Reindex the cat_id values so that that they start from an int (usually 0 or 1) and
+                Reindex the cat_id values so that they start from an int (usually 0 or 1) and
                 then increment the cat_ids to index + number of categories continuously.
                 It's useful if the cat_ids are not continuous in the original dataset.
                 Yolo requires the set of annotations to start at 0 when training a model.
             segmentation (boolean):
                 If true, then segmentation annotations will be exported instead of bounding box annotations.
                 If there are no segmentation annotations, then no annotations will be empty.
+            keypoints (boolean):
+                If true, then keypoint annotations will be exported as well as bounding box annotations.
+                It is not possible to export both segmentation and keypoint annotations at the same time in YOLO format.
+                Each bounding box within a dataset should have the same number of keypoints defined e.g. 17 for COCO.
+                Keypoints are a triplet of (x, y, visibility), see e.g. https://cocodataset.org/#format-data
+                If some images have no keypoint annotations, then the bounding boxes will be followed by a series of
+                delimiting spaces.
+                If some bounding boxes within an image have no keypoint annotations, those keypoints will be a series of
+                zeroes, denoting x=0, y=0, visibility=0.
 
         Returns:
             A list with 1 or more paths (strings) to annotations files. If a YAML file is created
@@ -490,6 +500,8 @@ def ExportToYoloV5(
         """
         ds = self.dataset
 
+        assert not (segmentation and keypoints), "Only one of segmentation and keypoints can be exported in YOLO format"
+
         # Inspired by https://github.com/aws-samples/groundtruth-object-detection/blob/master/create_annot.py
         yolo_dataset = ds.df.copy(deep=True)
         # Convert nan values in the split column from nan to '' because those are easier to work with with when building paths
@@ -563,24 +575,24 @@ def ExportToYoloV5(
                 yolo_dataset["ann_bbox_height"] / yolo_dataset["img_height"]
             )
 
-            keypoints_yolo = [[] for _ in range(len(yolo_dataset.index))]
-            for img_ix, row in yolo_dataset.iterrows():
-                img_width = row["img_width"]
-                img_height = row["img_height"]
-                keypoints_coco = row["ann_keypoints"]
-                if keypoints_coco:
-                    for bbox_ix, kp in enumerate(keypoints_coco):
-                        if bbox_ix % 3 == 0:
-                            # x coordinate
-                            keypoints_yolo[img_ix].append(kp / img_width)
-                        elif bbox_ix % 3 == 1:
-                            # y coordinate
-                            keypoints_yolo[img_ix].append(kp / img_height)
-                        else:
-                            # visibility
-                            keypoints_yolo[img_ix].append(kp)
-            yolo_dataset["keypoints_scaled_as_string"] = keypoints_yolo
-
+            if keypoints:
+                keypoints_yolo = [[] for _ in range(len(yolo_dataset.index))]
+                for img_ix, row in yolo_dataset.iterrows():
+                    img_width = row["img_width"]
+                    img_height = row["img_height"]
+                    keypoints_coco = row["ann_keypoints"]
+                    if keypoints_coco:
+                        for bbox_ix, kp in enumerate(keypoints_coco):
+                            if bbox_ix % 3 == 0:
+                                # x coordinate
+                                keypoints_yolo[img_ix].append(kp / img_width)
+                            elif bbox_ix % 3 == 1:
+                                # y coordinate
+                                keypoints_yolo[img_ix].append(kp / img_height)
+                            else:
+                                # visibility
+                                keypoints_yolo[img_ix].append(kp)
+                yolo_dataset["keypoints_scaled"] = keypoints_yolo
 
         # Create folders to store annotations
         if output_path == None:
@@ -622,8 +634,9 @@ def ExportToYoloV5(
                     "center_y_scaled",
                     "width_scaled",
                     "height_scaled",
-                    "keypoints_scaled_as_string"
                 ]
+                if keypoints:
+                    columns.append("keypoints_scaled")
 
                 self._df_to_csv(
                     df=df_single_img_annots,
@@ -734,7 +747,7 @@ def ExportToCoco(self, output_path=None, cat_id_index=None):
                 This is where the annotation files will be written. If not-specified then the path will be derived from the path_to_annotations and
                 name properties of the dataset object.
             cat_id_index (int):
-                Reindex the cat_id values so that that they start from an int (usually 0 or 1) and
+                Reindex the cat_id values so that they start from an int (usually 0 or 1) and
                 then increment the cat_ids to index + number of categories continuously.
                 It's useful if the cat_ids are not continuous in the original dataset.
                 Some models like Yolo require starting from 0 and others like Detectron require starting from 1.

From 22c7d14cecf7cb799386c984b1c4f30148c7d37b Mon Sep 17 00:00:00 2001
From: Chris Rapson <4982550+chrisrapson@users.noreply.github.com>
Date: Thu, 13 Jul 2023 13:50:09 +1200
Subject: [PATCH 3/5] Export keypoints to COCO format, if available

---
 pylabel/exporter.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pylabel/exporter.py b/pylabel/exporter.py
index 34a6103..15551c7 100644
--- a/pylabel/exporter.py
+++ b/pylabel/exporter.py
@@ -816,6 +816,12 @@ def ExportToCoco(self, output_path=None, cat_id_index=None):
                     }
                 ]
 
+                # include keypoints, if available
+                if "ann_keypoints" in df.keys():
+                    n_keypoints = int(len(df["ann_keypoints"][i]) / 3)  # 3 numbers per keypoint: x,y,visibility
+                    annotations[0]["num_keypoints"] = n_keypoints
+                    annotations[0]["keypoints"] = df["ann_keypoints"][i]
+
                 categories = [
                     {
                         "id": int(df["cat_id"][i]),

From 86af20f632c19c398fab7bc3bfae42b82148215b Mon Sep 17 00:00:00 2001
From: Alex Heaton <aheaton@gmail.com>
Date: Sun, 16 Jul 2023 19:03:15 +0900
Subject: [PATCH 4/5] Bump v# to 52

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8194959..da2abc2 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 setup(
     name="pylabel",
     packages=["pylabel"],
-    version="0.1.51",
+    version="0.1.52",
     description="Transform, analyze, and visualize computer vision annotations.",
     long_description=long_description,
     long_description_content_type="text/markdown",

From badfabf1e8e3b9d81a5c18f2a78e710b0be20cbf Mon Sep 17 00:00:00 2001
From: Chris Rapson <4982550+chrisrapson@users.noreply.github.com>
Date: Mon, 17 Jul 2023 18:21:10 +1200
Subject: [PATCH 5/5] Make COCO keypoints exporter work with NaNs (i.e.
 datasets and images without keypoint labels)

---
 pylabel/exporter.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pylabel/exporter.py b/pylabel/exporter.py
index 15551c7..2afa4d8 100644
--- a/pylabel/exporter.py
+++ b/pylabel/exporter.py
@@ -817,10 +817,18 @@ def ExportToCoco(self, output_path=None, cat_id_index=None):
                 ]
 
                 # include keypoints, if available
-                if "ann_keypoints" in df.keys():
-                    n_keypoints = int(len(df["ann_keypoints"][i]) / 3)  # 3 numbers per keypoint: x,y,visibility
+                if "ann_keypoints" in df.keys() and (not np.isnan(df["ann_keypoints"][i]).all()):
+                    keypoints = df["ann_keypoints"][i]
+                    if isinstance(keypoints, list):
+                        n_keypoints = int(len(keypoints) / 3)  # 3 numbers per keypoint: x,y,visibility
+                    elif isinstance(keypoints, np.ndarray):
+                        n_keypoints = int(keypoints.size / 3)  # 3 numbers per keypoint: x,y,visibility
+                    else:
+                        raise TypeError('The keypoints array is expected to be either a list or a numpy array')
                     annotations[0]["num_keypoints"] = n_keypoints
-                    annotations[0]["keypoints"] = df["ann_keypoints"][i]
+                    annotations[0]["keypoints"] = keypoints
+                else:
+                    pass
 
                 categories = [
                     {