In [34]:
import torch
import json
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from PIL import Image
from ibug.face_detection import RetinaFacePredictor
from ibug.face_parsing import FaceParser as RTNetPredictor
from ibug.face_parsing.utils import label_colormap
%matplotlib inline

In [2]:
threshold = 0.8 # default = 0.8
weights = None # r"C:\mahmoud_dev\machine learning\segmentation\face_parsing\ibug\face_parsing\rtnet\weights\rtnet101-fcn-14.torch" # default = None
num_classes = 14 # default = 11
max_num_faces = 50 # default = 50

parser_encoder = 'rtnet50'
parser_decoder = 'fcn'

root = r"D:\_Xchng\Mahmoud\segmenation\dataset\images"
save_root = r"D:\_Xchng\Mahmoud\segmenation\dataset\annotations"

rotate_image = True


In [3]:
def get_coco_annotations():
    # Label ids of the dataset
    category_ids = {
    "background": 0,
    "skin": 1,
    "left_eyebrow": 2,
    "right_eyebrow": 3,
    "left_eye": 4,
    "right_eye": 5,
    "nose": 6,
    "upper_lip": 7,
    "inner_mouth": 8,
    "lower_lip": 9,
    "hair": 10,
    "left_ear": 11,
    "right_ear": 12,
    "glasses": 13
    }

    coco_annotations = {
        "info": {},
        "licenses": [],
        "categories": [],
        "images": [],
        "annotations": []
    }

    for category_name, category_id in category_ids.items():
        # Add category information to the COCO annotations dictionary
        category_info = {
            "id": category_id,
            "name": category_name
        }
        coco_annotations["categories"].append(category_info)
    
    return coco_annotations["categories"]
get_coco_annotations()

[{'id': 0, 'name': 'background'},
 {'id': 1, 'name': 'skin'},
 {'id': 2, 'name': 'left_eyebrow'},
 {'id': 3, 'name': 'right_eyebrow'},
 {'id': 4, 'name': 'left_eye'},
 {'id': 5, 'name': 'right_eye'},
 {'id': 6, 'name': 'nose'},
 {'id': 7, 'name': 'upper_lip'},
 {'id': 8, 'name': 'inner_mouth'},
 {'id': 9, 'name': 'lower_lip'},
 {'id': 10, 'name': 'hair'},
 {'id': 11, 'name': 'left_ear'},
 {'id': 12, 'name': 'right_ear'},
 {'id': 13, 'name': 'glasses'}]

In [4]:
def get_image_pred(img, face_detector, face_parser):
    if rotate_image:
        img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)

    faces = face_detector(img, rgb=False)
    masks = face_parser.predict_img(img, faces, rgb=False)
    
    return img, faces, masks


def render_image(img_path, face_detector, face_parser):
    colormap = label_colormap(num_classes)
    img, faces, masks = get_image_pred(img_path, face_detector, face_parser)
    alphas = np.linspace(0.75, 0.25, num=max_num_faces)

    for i, (face, mask) in enumerate(zip(faces, masks)):
        bbox = face[:4].astype(int)
        cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=(0, 0, 255), thickness=2)
        alpha = alphas[i]
        index = mask > 0
        res = colormap[mask]
        img[index] = (1 - alpha) * img[index].astype(float) + \
            alpha * res[index].astype(float)
    img = np.clip(img.round(), 0, 255).astype(np.uint8)
    
    return img

def segment_images():
    if torch.cuda.is_available():
        device = 'cuda:0'
    face_detector = RetinaFacePredictor(threshold=threshold, device=device, model=(RetinaFacePredictor.get_model('mobilenet0.25')))
    face_parser = RTNetPredictor(device=device, ckpt=weights, encoder=parser_encoder, decoder=parser_decoder, num_classes=num_classes)

    for i, image_name in enumerate(os.listdir(root)):
        image_path = os.path.join(root, image_name)

        filename = image_name.split(".")
        save_path = os.path.join(save_root, f"{filename[0]}.json")
        
        save_segmentation_result(i, filename[0], image_path, face_detector, face_parser, save_path)

# segment_images()



In [39]:
if torch.cuda.is_available():
    device = 'cuda:0'
face_detector = RetinaFacePredictor(threshold=threshold, device=device, model=(RetinaFacePredictor.get_model('mobilenet0.25')))
face_parser = RTNetPredictor(device=device, ckpt=weights, encoder=parser_encoder, decoder=parser_decoder, num_classes=num_classes)
# specify the directory where the segmented images are stored
segmented_images_dir = r'D:\_Xchng\Mahmoud\segmenation\dataset\images'
output_path = r'D:\_Xchng\Mahmoud\segmenation\dataset\images\annotations.json'
# create a dictionary to map RGB values to label IDs for each label
color_to_label = {
    "(0, 0, 0)": 0, # background
    "(128, 0, 0)": 1, # skin
    "(0, 128, 0)": 2, # left_eyebrow
    "(128, 128, 0)": 3, # right_eyebrow
    "(0, 0, 128)": 4, # left_eye
    "(128, 0, 128)": 5, # right_eye
    "(0, 128, 128)": 6, # nose
    "(128, 128, 128)": 7, # upper_lip
    "(64, 0, 0)": 8, # inner_mouth
    "(192, 0, 0)": 9, # lower_lip
    "(64, 128, 0)": 10, # hair
    "(192, 128, 0)": 11, # left_ear
    "(64, 0, 128)": 12, # right_ear
    "(192, 0, 128)": 13 # glasses
}



Hybrid stages [True, True, True]


In [43]:
# create a list to store the annotations for each image
annotations = []

# loop over each image file in the directory
for image_file in os.listdir(segmented_images_dir):
    if image_file.endswith(".png"):
        # open the image using PIL
        image = cv2.imread(os.path.join(segmented_images_dir, image_file))
        # get the image width and height
        width, height = image.shape[0], image.shape[1]
        # create a dictionary for the image annotation
        image_annotation = {
            'file_name': image_file,
            'height': height,
            'width': width,
            'annotations': []
        }
        image, faces, masks = get_image_pred(image, face_detector, face_parser)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        # loop over each pixel in the image
        print(color_to_label[masks[0]])
        for y in range(height):
            for x in range(width):
                # get the color of the pixel
                r, g, b = masks[0].getpixel((x, y))
                # check if the pixel is part of a segmented object
                if (r, g, b) in color_to_label:
                    print("found it")
                    # get the label ID corresponding to the color of the pixel
                    label_id = color_to_label[(r, g, b)]
                    # create a dictionary for the segmentation annotation
                    segmentation_annotation = {
                        'segmentation': [[x, y, x+1, y, x+1, y+1, x, y+1]],
                        'category_id': label_id,
                        'iscrowd': 0
                    }
                    # append the segmentation annotation to the image annotation
                    image_annotation['annotations'].append(segmentation_annotation)
        # append the image annotation to the list of annotations
        annotations.append(image_annotation)

# create a dictionary for the COCO dataset
dataset = {
    'images': [],
    'annotations': [],
    'categories': get_coco_annotations()}
for i, image_annotation in enumerate(annotations):
    # set the image ID
    image_id = i + 1
    # update the image annotation with the ID
    image_annotation['id'] = image_id
    # add the image annotation to the COCO dataset
    dataset['images'].append(image_annotation)
    # loop over the segmentation annotations in the image annotation and add them to the COCO dataset
    for segmentation_annotation in image_annotation['annotations']:
        # set the segmentation ID
        segmentation_id = len(dataset['annotations']) + 1
        # update the segmentation annotation with the image ID and ID
        segmentation_annotation['image_id'] = image_id
        segmentation_annotation['id'] = segmentation_id
        # add the segmentation annotation to the COCO dataset
        dataset['annotations'].append(segmentation_annotation)

# save the COCO dataset dictionary to a JSON file
with open(output_path, 'w') as f:
    json.dump(dataset, f, indent=4)


TypeError: unhashable type: 'numpy.ndarray'