diff --git a/CHANGELOG.md b/CHANGELOG.md index 73f98c4..8046f51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +* Fix fomm model download by @Ghassen-Chaabouni in https://github.com/sensity-ai/dot/pull/160 * Add video and image swap to the GUI by @Ghassen-Chaabouni in https://github.com/sensity-ai/dot/pull/116 ## [1.3.0] - 2024-02-19 diff --git a/README.md b/README.md index 10bb434..4b10ef3 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Supported methods: Download and run the dot executable for your OS: - Windows (Tested on Windows 10 and 11): - - Download `dot.zip` from [here](https://drive.google.com/file/d/1IgMaaKzFw4lBKa8MWnsH7nKwBQtrtLGJ/view), unzip it and then run `dot.exe` + - Download `dot.zip` from [here](https://drive.google.com/file/d/1_duaEs2SAUGfAvr5oC4V3XR-ZzBtWQXo/view), unzip it and then run `dot.exe` - Ubuntu: - ToDo - Mac (Tested on Apple M2 Sonoma 14.0): @@ -147,7 +147,7 @@ pip install -e . ##### Download Models -- Download dot model checkpoints from [here](https://drive.google.com/file/d/1nL3WkntTrVBZVQvOF2i7clY8eFRqSr8L/view) +- Download dot model checkpoints from [here](https://drive.google.com/file/d/1Y_11R66DL4N1WY8cNlXVNR3RkHnGDGWX/view) - Unzip the downloaded file in the root of this project #### CLI Usage @@ -303,10 +303,6 @@ Instructions vary depending on your operating system. - Install [OBS Studio](https://obsproject.com/). -- Install [VirtualCam plugin](https://obsproject.com/forum/resources/obs-virtualcam.539/). - -Choose `Install and register only 1 virtual camera`. - - Run OBS Studio. - In the Sources section, press on Add button ("+" sign), diff --git a/envs/environment-gpu.yaml b/envs/environment-gpu.yaml index 0dd4c4d..586ea6f 100644 --- a/envs/environment-gpu.yaml +++ b/envs/environment-gpu.yaml @@ -7,5 +7,5 @@ dependencies: - python=3.8 - pip=21.3 - pip: - - onnxruntime-gpu==1.9.0 + - onnxruntime-gpu==1.18.0 - -r ../requirements.txt diff --git a/requirements-dev.txt b/requirements-dev.txt index 1db3256..7463b94 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -42,8 +42,12 @@ colorama==0.4.6 # ipython # pytest # tqdm +coloredlogs==15.0.1 + # via onnxruntime-gpu coverage==6.4.2 - # via pytest-cov + # via + # coverage + # pytest-cov customtkinter==5.2.0 # via dot (setup.cfg) cycler==0.11.0 @@ -60,7 +64,7 @@ dlib==19.19.0 # via dot (setup.cfg) executing==0.8.3 # via stack-data -face-alignment==1.3.3 +face-alignment==1.4.1 # via dot (setup.cfg) filelock==3.7.1 # via @@ -71,9 +75,11 @@ flake8==3.9.2 flatbuffers==2.0 # via # mediapipe - # onnxruntime + # onnxruntime-gpu fonttools==4.43.0 # via matplotlib +humanfriendly==10.0 + # via coloredlogs identify==2.5.1 # via pre-commit idna==2.10 @@ -130,7 +136,7 @@ numpy==1.22.0 # matplotlib # mediapipe # numba - # onnxruntime + # onnxruntime-gpu # opencv-contrib-python # opencv-python # pywavelets @@ -138,7 +144,7 @@ numpy==1.22.0 # scipy # tifffile # torchvision -onnxruntime==1.9.0 +onnxruntime-gpu==1.18.0 # via dot (setup.cfg) opencv-contrib-python==4.5.5.62 # via @@ -152,6 +158,7 @@ packaging==21.3 # via # kornia # matplotlib + # onnxruntime-gpu # pytest # scikit-image parso==0.8.3 @@ -183,7 +190,7 @@ protobuf==3.20.2 # via # dot (setup.cfg) # mediapipe - # onnxruntime + # onnxruntime-gpu pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -204,6 +211,8 @@ pyparsing==3.0.9 # via # matplotlib # packaging +pyreadline3==3.4.1 + # via humanfriendly pytest==7.1.2 # via # dot (setup.cfg) @@ -243,7 +252,9 @@ sounddevice==0.4.6 stack-data==0.3.0 # via ipython sympy==1.12 - # via torch + # via + # onnxruntime-gpu + # torch tifffile==2022.5.4 # via scikit-image toml==0.10.2 diff --git a/requirements.txt b/requirements.txt index cdb637a..d637d27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,8 @@ colorama==0.4.6 # click # pytest # tqdm +coloredlogs==15.0.1 + # via onnxruntime-gpu customtkinter==5.2.0 # via dot (setup.cfg) cycler==0.11.0 @@ -31,16 +33,18 @@ dlib==19.19.0 # via dot (setup.cfg) exceptiongroup==1.1.2 # via pytest -face-alignment==1.3.3 +face-alignment==1.4.1 # via dot (setup.cfg) filelock==3.12.2 # via torch flatbuffers==2.0 # via # mediapipe - # onnxruntime + # onnxruntime-gpu fonttools==4.43.0 # via matplotlib +humanfriendly==10.0 + # via coloredlogs idna==2.10 # via requests imageio==2.19.3 @@ -77,7 +81,7 @@ numpy==1.22.0 # matplotlib # mediapipe # numba - # onnxruntime + # onnxruntime-gpu # opencv-contrib-python # opencv-python # pywavelets @@ -85,7 +89,7 @@ numpy==1.22.0 # scipy # tifffile # torchvision -onnxruntime==1.9.0 +onnxruntime-gpu==1.18.0 # via dot (setup.cfg) opencv-contrib-python==4.5.5.62 # via @@ -99,6 +103,7 @@ packaging==21.3 # via # kornia # matplotlib + # onnxruntime-gpu # pytest # scikit-image pillow==10.0.1 @@ -114,13 +119,15 @@ protobuf==3.20.2 # via # dot (setup.cfg) # mediapipe - # onnxruntime + # onnxruntime-gpu pycparser==2.21 # via cffi pyparsing==3.0.9 # via # matplotlib # packaging +pyreadline3==3.4.1 + # via humanfriendly pytest==7.4.0 # via dot (setup.cfg) python-dateutil==2.8.2 @@ -147,7 +154,9 @@ six==1.16.0 sounddevice==0.4.6 # via mediapipe sympy==1.12 - # via torch + # via + # onnxruntime-gpu + # torch tifffile==2022.5.4 # via scikit-image tomli==2.0.1 diff --git a/setup.cfg b/setup.cfg index a5982e5..83a9f12 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.3.0 +current_version = 1.4.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)? @@ -30,11 +30,11 @@ python_requires = >=3.8,<3.9 install_requires = click dlib - face_alignment + face_alignment==1.4.1 kornia mediapipe numpy - onnxruntime + onnxruntime-gpu==1.18.0 opencv-contrib-python opencv_python Pillow diff --git a/src/dot/__init__.py b/src/dot/__init__.py index a4e851a..d09266d 100644 --- a/src/dot/__init__.py +++ b/src/dot/__init__.py @@ -6,7 +6,7 @@ from .dot import DOT -__version__ = "1.1.0" +__version__ = "1.4.0" __author__ = "Sensity" __url__ = "https://github.com/sensity-ai/dot/tree/main/dot" __docs__ = "Deepfake offensive toolkit" diff --git a/src/dot/commons/utils.py b/src/dot/commons/utils.py index 257420f..65cab6b 100644 --- a/src/dot/commons/utils.py +++ b/src/dot/commons/utils.py @@ -15,9 +15,9 @@ np.random.seed(SEED) -def log(*args, file=sys.stderr, **kwargs): +def log(*args, **kwargs): time_str = f"{time.time():.6f}" - print(f"[{time_str}]", *args, file=file, **kwargs) + print(f"[{time_str}]", *args, **kwargs) def info(*args, file=sys.stdout, **kwargs): diff --git a/src/dot/fomm/face_alignment.py b/src/dot/fomm/face_alignment.py new file mode 100644 index 0000000..206ebf6 --- /dev/null +++ b/src/dot/fomm/face_alignment.py @@ -0,0 +1,364 @@ +import warnings +from enum import IntEnum + +import numpy as np +import torch +from face_alignment.folder_data import FolderData +from face_alignment.utils import crop, draw_gaussian, flip, get_image, get_preds_fromhm +from packaging import version +from tqdm import tqdm + + +class LandmarksType(IntEnum): + """Enum class defining the type of landmarks to detect. + + ``TWO_D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face + ``TWO_HALF_D`` - this points represent the projection of the 3D points into 3D + ``THREE_D`` - detect the points ``(x,y,z)``` in a 3D space + + """ + + TWO_D = 1 + TWO_HALF_D = 2 + THREE_D = 3 + + +class NetworkSize(IntEnum): + # TINY = 1 + # SMALL = 2 + # MEDIUM = 3 + LARGE = 4 + + +default_model_urls = { + "2DFAN-4": "saved_models/face_alignment/2DFAN4-cd938726ad.zip", + "3DFAN-4": "saved_models/face_alignment/3DFAN4-4a694010b9.zip", + "depth": "saved_models/face_alignment/depth-6c4283c0e0.zip", +} + +models_urls = { + "1.6": { + "2DFAN-4": "saved_models/face_alignment/2DFAN4_1.6-c827573f02.zip", + "3DFAN-4": "saved_models/face_alignment/3DFAN4_1.6-ec5cf40a1d.zip", + "depth": "saved_models/face_alignment/depth_1.6-2aa3f18772.zip", + }, + "1.5": { + "2DFAN-4": "saved_models/face_alignment/2DFAN4_1.5-a60332318a.zip", + "3DFAN-4": "saved_models/face_alignment/3DFAN4_1.5-176570af4d.zip", + "depth": "saved_models/face_alignment/depth_1.5-bc10f98e39.zip", + }, +} + + +class FaceAlignment: + def __init__( + self, + landmarks_type, + network_size=NetworkSize.LARGE, + device="cuda", + dtype=torch.float32, + flip_input=False, + face_detector="sfd", + face_detector_kwargs=None, + verbose=False, + ): + self.device = device + self.flip_input = flip_input + self.landmarks_type = landmarks_type + self.verbose = verbose + self.dtype = dtype + + if version.parse(torch.__version__) < version.parse("1.5.0"): + raise ImportError( + "Unsupported pytorch version detected. Minimum supported version of pytorch: 1.5.0\ + Either upgrade (recommended) your pytorch setup, or downgrade to face-alignment 1.2.0" + ) + + network_size = int(network_size) + pytorch_version = torch.__version__ + if "dev" in pytorch_version: + pytorch_version = pytorch_version.rsplit(".", 2)[0] + else: + pytorch_version = pytorch_version.rsplit(".", 1)[0] + + if "cuda" in device: + torch.backends.cudnn.benchmark = True + + # Get the face detector + face_detector_module = __import__( + "face_alignment.detection." + face_detector, + globals(), + locals(), + [face_detector], + 0, + ) + face_detector_kwargs = face_detector_kwargs or {} + self.face_detector = face_detector_module.FaceDetector( + device=device, verbose=verbose, **face_detector_kwargs + ) + + # Initialise the face alignemnt networks + if landmarks_type == LandmarksType.TWO_D: + network_name = "2DFAN-" + str(network_size) + else: + network_name = "3DFAN-" + str(network_size) + self.face_alignment_net = torch.jit.load( + models_urls.get(pytorch_version, default_model_urls)[network_name] + ) + + self.face_alignment_net.to(device, dtype=dtype) + self.face_alignment_net.eval() + + # Initialiase the depth prediciton network + if landmarks_type == LandmarksType.THREE_D: + self.depth_prediciton_net = torch.jit.load( + models_urls.get(pytorch_version, default_model_urls)["depth"] + ) + + self.depth_prediciton_net.to(device, dtype=dtype) + self.depth_prediciton_net.eval() + + def get_landmarks( + self, + image_or_path, + detected_faces=None, + return_bboxes=False, + return_landmark_score=False, + ): + """Deprecated, please use get_landmarks_from_image + + Arguments: + image_or_path {string or numpy.array or torch.tensor} -- The input image or path to it + + Keyword Arguments: + detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found + in the image (default: {None}) + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. + """ + return self.get_landmarks_from_image( + image_or_path, detected_faces, return_bboxes, return_landmark_score + ) + + @torch.no_grad() + def get_landmarks_from_image( + self, + image_or_path, + detected_faces=None, + return_bboxes=False, + return_landmark_score=False, + ): + """Predict the landmarks for each face present in the image. + + This function predicts a set of 68 2D or 3D images, one for each image present. + If detect_faces is None the method will also run a face detector. + + Arguments: + image_or_path {string or numpy.array or torch.tensor} -- The input image or path to it. + + Keyword Arguments: + detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found + in the image (default: {None}) + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. + + Return: + result: + 1. if both return_bboxes and return_landmark_score are False, result will be: + landmark + 2. Otherwise, result will be one of the following, depending on the actual value of return_* arguments. + (landmark, landmark_score, detected_face) + (landmark, None, detected_face) + (landmark, landmark_score, None ) + """ + image = get_image(image_or_path) # noqa + + if detected_faces is None: + detected_faces = self.face_detector.detect_from_image(image.copy()) + + if len(detected_faces) == 0: + warnings.warn("No faces were detected.") + if return_bboxes or return_landmark_score: + return None, None, None + else: + return None + + landmarks = [] + landmarks_scores = [] + for i, d in enumerate(detected_faces): + center = torch.tensor( + [d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0] + ) + center[1] = center[1] - (d[3] - d[1]) * 0.12 + scale = (d[2] - d[0] + d[3] - d[1]) / self.face_detector.reference_scale + + inp = crop(image, center, scale) # noqa + inp = torch.from_numpy(inp.transpose((2, 0, 1))).float() + + inp = inp.to(self.device, dtype=self.dtype) + inp.div_(255.0).unsqueeze_(0) + + out = self.face_alignment_net(inp).detach() + if self.flip_input: + out += flip( + self.face_alignment_net(flip(inp)).detach(), is_label=True + ) # noqa + out = out.to(device="cpu", dtype=torch.float32).numpy() + + pts, pts_img, scores = get_preds_fromhm(out, center.numpy(), scale) # noqa + pts, pts_img = torch.from_numpy(pts), torch.from_numpy(pts_img) + pts, pts_img = pts.view(68, 2) * 4, pts_img.view(68, 2) + scores = scores.squeeze(0) + + if self.landmarks_type == LandmarksType.THREE_D: + heatmaps = np.zeros((68, 256, 256), dtype=np.float32) + for i in range(68): + if pts[i, 0] > 0 and pts[i, 1] > 0: + heatmaps[i] = draw_gaussian(heatmaps[i], pts[i], 2) # noqa + heatmaps = torch.from_numpy(heatmaps).unsqueeze_(0) + + heatmaps = heatmaps.to(self.device, dtype=self.dtype) + depth_pred = ( + self.depth_prediciton_net(torch.cat((inp, heatmaps), 1)) + .data.cpu() + .view(68, 1) + .to(dtype=torch.float32) + ) + pts_img = torch.cat( + (pts_img, depth_pred * (1.0 / (256.0 / (200.0 * scale)))), 1 + ) + + landmarks.append(pts_img.numpy()) + landmarks_scores.append(scores) + + if not return_bboxes: + detected_faces = None + if not return_landmark_score: + landmarks_scores = None + if return_bboxes or return_landmark_score: + return landmarks, landmarks_scores, detected_faces + else: + return landmarks + + @torch.no_grad() + def get_landmarks_from_batch( + self, + image_batch, + detected_faces=None, + return_bboxes=False, + return_landmark_score=False, + ): + """Predict the landmarks for each face present in the image. + + This function predicts a set of 68 2D or 3D images, one for each image in a batch in parallel. + If detect_faces is None the method will also run a face detector. + + Arguments: + image_batch {torch.tensor} -- The input images batch + + Keyword Arguments: + detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found + in the image (default: {None}) + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. + + Return: + result: + 1. if both return_bboxes and return_landmark_score are False, result will be: + landmarks + 2. Otherwise, result will be one of the following, depending on the actual value of return_* arguments. + (landmark, landmark_score, detected_face) + (landmark, None, detected_face) + (landmark, landmark_score, None ) + """ + + if detected_faces is None: + detected_faces = self.face_detector.detect_from_batch(image_batch) + + if len(detected_faces) == 0: + warnings.warn("No faces were detected.") + if return_bboxes or return_landmark_score: + return None, None, None + else: + return None + + landmarks = [] + landmarks_scores_list = [] + # A batch for each frame + for i, faces in enumerate(detected_faces): + res = self.get_landmarks_from_image( + image_batch[i].cpu().numpy().transpose(1, 2, 0), + detected_faces=faces, + return_landmark_score=return_landmark_score, + ) + if return_landmark_score: + landmark_set, landmarks_scores, _ = res + landmarks_scores_list.append(landmarks_scores) + else: + landmark_set = res + # Bacward compatibility + if landmark_set is not None: + landmark_set = np.concatenate(landmark_set, axis=0) + else: + landmark_set = [] + landmarks.append(landmark_set) + + if not return_bboxes: + detected_faces = None + if not return_landmark_score: + landmarks_scores_list = None + if return_bboxes or return_landmark_score: + return landmarks, landmarks_scores_list, detected_faces + else: + return landmarks + + def get_landmarks_from_directory( + self, + path, + extensions=[".jpg", ".png"], + recursive=True, + show_progress_bar=True, + return_bboxes=False, + return_landmark_score=False, + ): + """Scan a directory for images with a given extension type(s) and predict the landmarks for each + face present in the images found. + + Arguments: + path {str} -- path to the target directory containing the images + + Keyword Arguments: + extensions {list of str} -- list containing the image extensions considered (default: ['.jpg', '.png']) + recursive {boolean} -- If True, scans for images recursively (default: True) + show_progress_bar {boolean} -- If True displays a progress bar (default: True) + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. + """ + dataset = FolderData( + path, + self.face_detector.tensor_or_path_to_ndarray, + extensions, + recursive, + self.verbose, + ) + dataloader = torch.utils.data.DataLoader( + dataset, batch_size=1, shuffle=False, num_workers=2, prefetch_factor=4 + ) + + predictions = {} + for (image_path, image) in tqdm(dataloader, disable=not show_progress_bar): + image_path, image = image_path[0], image[0] + bounding_boxes = self.face_detector.detect_from_image(image) + if return_bboxes or return_landmark_score: + preds, bbox, score = self.get_landmarks_from_image( + image, + bounding_boxes, + return_bboxes=return_bboxes, + return_landmark_score=return_landmark_score, + ) + predictions[image_path] = (preds, bbox, score) + else: + preds = self.get_landmarks_from_image(image, bounding_boxes) + predictions[image_path] = preds + + return predictions diff --git a/src/dot/fomm/predictor_local.py b/src/dot/fomm/predictor_local.py index c7803d8..9518def 100644 --- a/src/dot/fomm/predictor_local.py +++ b/src/dot/fomm/predictor_local.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 -import face_alignment import numpy as np import torch import yaml from scipy.spatial import ConvexHull +from . import face_alignment from .modules.generator_optim import OcclusionAwareGenerator from .modules.keypoint_detector import KPDetector @@ -69,7 +69,12 @@ def __init__( self.checkpoint_path = checkpoint_path self.generator, self.kp_detector = self.load_checkpoints() self.fa = face_alignment.FaceAlignment( - face_alignment.LandmarksType._2D, flip_input=True, device=self.device + face_alignment.LandmarksType.TWO_D, + flip_input=True, + device=self.device, + face_detector_kwargs={ + "path_to_detector": "saved_models/face_alignment/s3fd-619a316812.pth" + }, ) self.source = None self.kp_source = None