From a2571001fdb5c8ea070fc07624090c3022eebf7b Mon Sep 17 00:00:00 2001 From: cleong110 <122366389+cleong110@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:28:25 -0500 Subject: [PATCH 1/8] add --save_crops option --- sign_language_segmentation/bin.py | 37 ++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index c28d795..c330ec1 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -9,6 +9,7 @@ from pose_format.utils.generic import pose_normalization_info, pose_hide_legs, normalize_hands_3d from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments +from pathlib import Path def add_optical_flow(pose: Pose): @@ -59,8 +60,12 @@ def predict(model, pose: Pose): def get_args(): parser = argparse.ArgumentParser() - parser.add_argument('--pose', required=True, type=str, help='path to input pose file') + parser.add_argument('--pose', required=True, type=Path, help='path to input pose file') parser.add_argument('--elan', required=True, type=str, help='path to output elan file') + parser.add_argument('--save_crops', + type=str, + choices=["SENTENCE","SIGN"], + help='whether to save cropped sentence .pose files') parser.add_argument('--video', default=None, required=False, type=str, help='path to video file') parser.add_argument('--subtitles', default=None, required=False, type=str, help='path to subtitle file') parser.add_argument('--model', default='model_E1s-1.pth', required=False, type=str, help='path to model file') @@ -68,6 +73,23 @@ def get_args(): return parser.parse_args() +def save_pose_segments(tiers, tier_id, input_file_path): + # reload it without any of the processing, so we get all the original points and such. + with input_file_path.open("rb") as f: + pose = Pose.read(f.read()) + + for i, segment in enumerate(tiers[tier_id]): + out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose" + start_frame = int(segment["start"]) + end_frame = int(segment["end"]) + cropped_pose = Pose( + header=pose.header, + body=pose.body[start_frame:end_frame] + ) + + print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}") + with out_path.open("wb") as f: + cropped_pose.write(f) def main(): args = get_args() @@ -80,6 +102,7 @@ def main(): else: pose = process_pose(pose) + print(pose) print('Loading model ...') install_dir = str(os.path.dirname(os.path.abspath(__file__))) model = load_model(os.path.join(install_dir, "dist", args.model)) @@ -109,9 +132,17 @@ def main(): eaf.add_linked_file(args.pose, mimetype="application/pose") for tier_id, segments in tiers.items(): + # print(f"TIER: {tier_id}")s eaf.add_tier(tier_id) for segment in segments: - eaf.add_annotation(tier_id, int(segment["start"] / fps * 1000), int(segment["end"] / fps * 1000)) + start_frame = int(segment["start"] / fps * 1000) + end_frame = int(segment["end"] / fps * 1000) + eaf.add_annotation(tier_id, start_frame, end_frame) + + if args.save_crops: + print(f"Saving {args.save_crops} cropped .pose files") + save_pose_segments(tiers, tier_id=args.save_crops, input_file_path=args.pose) + if args.subtitles and os.path.exists(args.subtitles): import srt @@ -122,7 +153,7 @@ def main(): end = subtitle.end.total_seconds() eaf.add_annotation("SUBTITLE", int(start * 1000), int(end * 1000), subtitle.content) - print('Saving to disk ...') + print('Saving .eaf to disk ...') eaf.to_file(args.elan) From 143dd7e32b19ef53099ed3afb87865f4eb4181e6 Mon Sep 17 00:00:00 2001 From: cleong110 <122366389+cleong110@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:30:53 -0500 Subject: [PATCH 2/8] Minor cleanup --- sign_language_segmentation/bin.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index c330ec1..05889bf 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -65,7 +65,7 @@ def get_args(): parser.add_argument('--save_crops', type=str, choices=["SENTENCE","SIGN"], - help='whether to save cropped sentence .pose files') + help='whether to save cropped .pose files') parser.add_argument('--video', default=None, required=False, type=str, help='path to video file') parser.add_argument('--subtitles', default=None, required=False, type=str, help='path to subtitle file') parser.add_argument('--model', default='model_E1s-1.pth', required=False, type=str, help='path to model file') @@ -102,7 +102,6 @@ def main(): else: pose = process_pose(pose) - print(pose) print('Loading model ...') install_dir = str(os.path.dirname(os.path.abspath(__file__))) model = load_model(os.path.join(install_dir, "dist", args.model)) From 49f05d8dbf04561cbc5beba8dbdb6cfbe7e713f7 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:36:14 -0500 Subject: [PATCH 3/8] CDL: some requested changes for PR --- sign_language_segmentation/bin.py | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index 877aa9e..08fd593 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +from pathlib import Path import argparse import os @@ -9,7 +10,7 @@ from pose_format.utils.generic import pose_normalization_info, pose_hide_legs, normalize_hands_3d from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments -from pathlib import Path + def add_optical_flow(pose: Pose): @@ -62,7 +63,7 @@ def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--pose', required=True, type=Path, help='path to input pose file') parser.add_argument('--elan', required=True, type=str, help='path to output elan file') - parser.add_argument('--save_crops', + parser.add_argument('--save-segments', type=str, choices=["SENTENCE","SIGN"], help='whether to save cropped .pose files') @@ -79,17 +80,17 @@ def save_pose_segments(tiers, tier_id, input_file_path): pose = Pose.read(f.read()) for i, segment in enumerate(tiers[tier_id]): - out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose" - start_frame = int(segment["start"]) - end_frame = int(segment["end"]) - cropped_pose = Pose( - header=pose.header, - body=pose.body[start_frame:end_frame] - ) - - print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}") - with out_path.open("wb") as f: - cropped_pose.write(f) + out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose" + start_frame = int(segment["start"]) + end_frame = int(segment["end"]) + cropped_pose = Pose( + header=pose.header, + body=pose.body[start_frame:end_frame] + ) + + print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}") + with out_path.open("wb") as f: + cropped_pose.write(f) def main(): args = get_args() @@ -131,16 +132,15 @@ def main(): eaf.add_linked_file(args.pose, mimetype="application/pose") for tier_id, segments in tiers.items(): - # print(f"TIER: {tier_id}")s eaf.add_tier(tier_id) for segment in segments: start_frame = int(segment["start"] / fps * 1000) end_frame = int(segment["end"] / fps * 1000) eaf.add_annotation(tier_id, start_frame, end_frame) - if args.save_crops: - print(f"Saving {args.save_crops} cropped .pose files") - save_pose_segments(tiers, tier_id=args.save_crops, input_file_path=args.pose) + if args.save_segments: + print(f"Saving {args.save_segments} cropped .pose files") + save_pose_segments(tiers, tier_id=args.save_segments, input_file_path=args.pose) if args.subtitles and os.path.exists(args.subtitles): From 64e2e86c1e492ce4a6641d7c42bc52373890fc0c Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:04:56 -0500 Subject: [PATCH 4/8] Some style changes for PR and Pylint. Rearranging functions, fixing lines/whitespace, and opening with a specific encoding --- sign_language_segmentation/bin.py | 66 +++++++++++++++---------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index 08fd593..13fd0e1 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -12,7 +12,6 @@ from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments - def add_optical_flow(pose: Pose): from pose_format.numpy.representation.distance import DistanceRepresentation from pose_format.utils.optical_flow import OpticalFlowCalculator @@ -59,61 +58,59 @@ def predict(model, pose: Pose): return model(pose_data) -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--pose', required=True, type=Path, help='path to input pose file') - parser.add_argument('--elan', required=True, type=str, help='path to output elan file') - parser.add_argument('--save-segments', - type=str, - choices=["SENTENCE","SIGN"], - help='whether to save cropped .pose files') - parser.add_argument('--video', default=None, required=False, type=str, help='path to video file') - parser.add_argument('--subtitles', default=None, required=False, type=str, help='path to subtitle file') - parser.add_argument('--model', default='model_E1s-1.pth', required=False, type=str, help='path to model file') - parser.add_argument('--no-pose-link', action='store_true', help='whether to link the pose file') - - return parser.parse_args() - def save_pose_segments(tiers, tier_id, input_file_path): - # reload it without any of the processing, so we get all the original points and such. + # reload it without any of the processing, so we get all the original points and such. with input_file_path.open("rb") as f: pose = Pose.read(f.read()) - + for i, segment in enumerate(tiers[tier_id]): out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose" start_frame = int(segment["start"]) end_frame = int(segment["end"]) - cropped_pose = Pose( - header=pose.header, - body=pose.body[start_frame:end_frame] - ) - + cropped_pose = Pose(header=pose.header, body=pose.body[start_frame:end_frame]) + print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}") with out_path.open("wb") as f: cropped_pose.write(f) + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--pose", required=True, type=Path, help="path to input pose file") + parser.add_argument("--elan", required=True, type=str, help="path to output elan file") + parser.add_argument( + "--save-segments", type=str, choices=["SENTENCE", "SIGN"], help="whether to save cropped .pose files" + ) + parser.add_argument("--video", default=None, required=False, type=str, help="path to video file") + parser.add_argument("--subtitles", default=None, required=False, type=str, help="path to subtitle file") + parser.add_argument("--model", default="model_E1s-1.pth", required=False, type=str, help="path to model file") + parser.add_argument("--no-pose-link", action="store_true", help="whether to link the pose file") + + return parser.parse_args() + + def main(): args = get_args() - print('Loading pose ...') + print("Loading pose ...") with open(args.pose, "rb") as f: pose = Pose.read(f.read()) - if 'E4' in args.model: + if "E4" in args.model: pose = process_pose(pose, optical_flow=True, hand_normalization=True) else: pose = process_pose(pose) - print('Loading model ...') + print("Loading model ...") install_dir = str(os.path.dirname(os.path.abspath(__file__))) model = load_model(os.path.join(install_dir, "dist", args.model)) - print('Estimating segments ...') + print("Estimating segments ...") probs = predict(model, pose) sign_segments = probs_to_segments(probs["sign"], 60, 50) sentence_segments = probs_to_segments(probs["sentence"], 90, 90) - print('Building ELAN file ...') + print("Building ELAN file ...") tiers = { "SIGN": sign_segments, "SENTENCE": sentence_segments, @@ -137,24 +134,27 @@ def main(): start_frame = int(segment["start"] / fps * 1000) end_frame = int(segment["end"] / fps * 1000) eaf.add_annotation(tier_id, start_frame, end_frame) - + if args.save_segments: print(f"Saving {args.save_segments} cropped .pose files") save_pose_segments(tiers, tier_id=args.save_segments, input_file_path=args.pose) - if args.subtitles and os.path.exists(args.subtitles): import srt + eaf.add_tier("SUBTITLE") - with open(args.subtitles, "r") as infile: + # open with explicit encoding, + # as directed in https://github.com/cdown/srt/blob/master/srt_tools/utils.py#L155-L160 + # see also https://github.com/cdown/srt/issues/67, https://github.com/cdown/srt/issues/36 + with open(args.subtitles, "r", encoding="utf-8-sig") as infile: for subtitle in srt.parse(infile): start = subtitle.start.total_seconds() end = subtitle.end.total_seconds() eaf.add_annotation("SUBTITLE", int(start * 1000), int(end * 1000), subtitle.content) - print('Saving .eaf to disk ...') + print("Saving .eaf to disk ...") eaf.to_file(args.elan) -if __name__ == '__main__': +if __name__ == "__main__": main() From be7aafe6c574b405acde3b36d42784648b9c355f Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:11:06 -0500 Subject: [PATCH 5/8] Trying to clarify frame number to ms timestamp conversion --- sign_language_segmentation/bin.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index 13fd0e1..bc43b86 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -130,10 +130,12 @@ def main(): for tier_id, segments in tiers.items(): eaf.add_tier(tier_id) + frames_per_millisecond = fps*1000 for segment in segments: - start_frame = int(segment["start"] / fps * 1000) - end_frame = int(segment["end"] / fps * 1000) - eaf.add_annotation(tier_id, start_frame, end_frame) + # convert frame numbers to millisecond timestamps, for Elan + start_frame_time = int(segment["start"] / fps * frames_per_millisecond) + end_frame_time = int(segment["end"] / fps * frames_per_millisecond) + eaf.add_annotation(tier_id, start_frame_time, end_frame_time) if args.save_segments: print(f"Saving {args.save_segments} cropped .pose files") From 34bd1ee1914876449f9ea52b6721cc1290cc4a15 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:18:23 -0500 Subject: [PATCH 6/8] Type annotations for save_pose_segments --- sign_language_segmentation/bin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index bc43b86..486b944 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -58,7 +58,7 @@ def predict(model, pose: Pose): return model(pose_data) -def save_pose_segments(tiers, tier_id, input_file_path): +def save_pose_segments(tiers:dict, tier_id:str, input_file_path:Path): # reload it without any of the processing, so we get all the original points and such. with input_file_path.open("rb") as f: pose = Pose.read(f.read()) @@ -69,7 +69,7 @@ def save_pose_segments(tiers, tier_id, input_file_path): end_frame = int(segment["end"]) cropped_pose = Pose(header=pose.header, body=pose.body[start_frame:end_frame]) - print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}") + print(f"Saving cropped pose with start {start_frame} and end {end_frame} to {out_path}") with out_path.open("wb") as f: cropped_pose.write(f) From 0fbc47165d6e61d25bc4918430bf20e6fdce36f1 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:21:51 -0500 Subject: [PATCH 7/8] Adding output type annotations for the ones I feel sure of --- sign_language_segmentation/bin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index 486b944..91eb310 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -12,7 +12,7 @@ from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments -def add_optical_flow(pose: Pose): +def add_optical_flow(pose: Pose)->None: from pose_format.numpy.representation.distance import DistanceRepresentation from pose_format.utils.optical_flow import OpticalFlowCalculator @@ -26,7 +26,7 @@ def add_optical_flow(pose: Pose): pose.body.data = np.concatenate([pose.body.data, flow], axis=-1).astype(np.float32) -def process_pose(pose: Pose, optical_flow=False, hand_normalization=False): +def process_pose(pose: Pose, optical_flow=False, hand_normalization=False) -> Pose: pose = pose.get_components(["POSE_LANDMARKS", "LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS"]) normalization_info = pose_normalization_info(pose.header) @@ -58,7 +58,7 @@ def predict(model, pose: Pose): return model(pose_data) -def save_pose_segments(tiers:dict, tier_id:str, input_file_path:Path): +def save_pose_segments(tiers:dict, tier_id:str, input_file_path:Path)->None: # reload it without any of the processing, so we get all the original points and such. with input_file_path.open("rb") as f: pose = Pose.read(f.read()) From f4110c18d6f61c09bf39c29252f22d918a4451f3 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:00:24 -0500 Subject: [PATCH 8/8] Fix my own mistake in ms time conversion for Elan --- sign_language_segmentation/bin.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py index 91eb310..25e79aa 100644 --- a/sign_language_segmentation/bin.py +++ b/sign_language_segmentation/bin.py @@ -130,12 +130,11 @@ def main(): for tier_id, segments in tiers.items(): eaf.add_tier(tier_id) - frames_per_millisecond = fps*1000 for segment in segments: # convert frame numbers to millisecond timestamps, for Elan - start_frame_time = int(segment["start"] / fps * frames_per_millisecond) - end_frame_time = int(segment["end"] / fps * frames_per_millisecond) - eaf.add_annotation(tier_id, start_frame_time, end_frame_time) + start_time_ms = int(segment["start"] / fps * 1000) + end_time_ms = int(segment["end"] / fps * 1000) + eaf.add_annotation(tier_id, start_time_ms, end_time_ms) if args.save_segments: print(f"Saving {args.save_segments} cropped .pose files")