From a2571001fdb5c8ea070fc07624090c3022eebf7b Mon Sep 17 00:00:00 2001
From: cleong110 <122366389+cleong110@users.noreply.github.com>
Date: Wed, 27 Nov 2024 14:28:25 -0500
Subject: [PATCH 1/8] add --save_crops option

---
 sign_language_segmentation/bin.py | 37 ++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index c28d795..c330ec1 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -9,6 +9,7 @@
 from pose_format.utils.generic import pose_normalization_info, pose_hide_legs, normalize_hands_3d
 
 from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments
+from pathlib import Path
 
 
 def add_optical_flow(pose: Pose):
@@ -59,8 +60,12 @@ def predict(model, pose: Pose):
 
 def get_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--pose', required=True, type=str, help='path to input pose file')
+    parser.add_argument('--pose', required=True, type=Path, help='path to input pose file')
     parser.add_argument('--elan', required=True, type=str, help='path to output elan file')
+    parser.add_argument('--save_crops', 
+                        type=str, 
+                        choices=["SENTENCE","SIGN"],
+                        help='whether to save cropped sentence .pose files')
     parser.add_argument('--video', default=None, required=False, type=str, help='path to video file')
     parser.add_argument('--subtitles', default=None, required=False, type=str, help='path to subtitle file')
     parser.add_argument('--model', default='model_E1s-1.pth', required=False, type=str, help='path to model file')
@@ -68,6 +73,23 @@ def get_args():
 
     return parser.parse_args()
 
+def save_pose_segments(tiers, tier_id, input_file_path):
+    # reload it without any of the processing, so we get all the original points and such. 
+    with input_file_path.open("rb") as f:
+        pose = Pose.read(f.read())
+        
+    for i, segment in enumerate(tiers[tier_id]):
+                out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose"
+                start_frame = int(segment["start"])
+                end_frame = int(segment["end"])
+                cropped_pose = Pose(
+                    header=pose.header,
+                    body=pose.body[start_frame:end_frame]
+                )
+                
+                print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}")
+                with out_path.open("wb") as f:
+                    cropped_pose.write(f)
 
 def main():
     args = get_args()
@@ -80,6 +102,7 @@ def main():
         else:
             pose = process_pose(pose)
 
+    print(pose)
     print('Loading model ...')
     install_dir = str(os.path.dirname(os.path.abspath(__file__)))
     model = load_model(os.path.join(install_dir, "dist", args.model))
@@ -109,9 +132,17 @@ def main():
         eaf.add_linked_file(args.pose, mimetype="application/pose")
 
     for tier_id, segments in tiers.items():
+        # print(f"TIER: {tier_id}")s
         eaf.add_tier(tier_id)
         for segment in segments:
-            eaf.add_annotation(tier_id, int(segment["start"] / fps * 1000), int(segment["end"] / fps * 1000))
+            start_frame = int(segment["start"] / fps * 1000)
+            end_frame = int(segment["end"] / fps * 1000)
+            eaf.add_annotation(tier_id, start_frame, end_frame)
+            
+    if args.save_crops:
+        print(f"Saving {args.save_crops} cropped .pose files")
+        save_pose_segments(tiers, tier_id=args.save_crops, input_file_path=args.pose)
+            
 
     if args.subtitles and os.path.exists(args.subtitles):
         import srt
@@ -122,7 +153,7 @@ def main():
                 end = subtitle.end.total_seconds()
                 eaf.add_annotation("SUBTITLE", int(start * 1000), int(end * 1000), subtitle.content)
 
-    print('Saving to disk ...')
+    print('Saving .eaf to disk ...')
     eaf.to_file(args.elan)
 
 

From 143dd7e32b19ef53099ed3afb87865f4eb4181e6 Mon Sep 17 00:00:00 2001
From: cleong110 <122366389+cleong110@users.noreply.github.com>
Date: Wed, 27 Nov 2024 14:30:53 -0500
Subject: [PATCH 2/8] Minor cleanup

---
 sign_language_segmentation/bin.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index c330ec1..05889bf 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -65,7 +65,7 @@ def get_args():
     parser.add_argument('--save_crops', 
                         type=str, 
                         choices=["SENTENCE","SIGN"],
-                        help='whether to save cropped sentence .pose files')
+                        help='whether to save cropped .pose files')
     parser.add_argument('--video', default=None, required=False, type=str, help='path to video file')
     parser.add_argument('--subtitles', default=None, required=False, type=str, help='path to subtitle file')
     parser.add_argument('--model', default='model_E1s-1.pth', required=False, type=str, help='path to model file')
@@ -102,7 +102,6 @@ def main():
         else:
             pose = process_pose(pose)
 
-    print(pose)
     print('Loading model ...')
     install_dir = str(os.path.dirname(os.path.abspath(__file__)))
     model = load_model(os.path.join(install_dir, "dist", args.model))

From 49f05d8dbf04561cbc5beba8dbdb6cfbe7e713f7 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Tue, 3 Dec 2024 16:36:14 -0500
Subject: [PATCH 3/8] CDL: some requested changes for PR

---
 sign_language_segmentation/bin.py | 34 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index 877aa9e..08fd593 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from pathlib import Path
 import argparse
 import os
 
@@ -9,7 +10,7 @@
 from pose_format.utils.generic import pose_normalization_info, pose_hide_legs, normalize_hands_3d
 
 from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments
-from pathlib import Path
+
 
 
 def add_optical_flow(pose: Pose):
@@ -62,7 +63,7 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument('--pose', required=True, type=Path, help='path to input pose file')
     parser.add_argument('--elan', required=True, type=str, help='path to output elan file')
-    parser.add_argument('--save_crops', 
+    parser.add_argument('--save-segments', 
                         type=str, 
                         choices=["SENTENCE","SIGN"],
                         help='whether to save cropped .pose files')
@@ -79,17 +80,17 @@ def save_pose_segments(tiers, tier_id, input_file_path):
         pose = Pose.read(f.read())
         
     for i, segment in enumerate(tiers[tier_id]):
-                out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose"
-                start_frame = int(segment["start"])
-                end_frame = int(segment["end"])
-                cropped_pose = Pose(
-                    header=pose.header,
-                    body=pose.body[start_frame:end_frame]
-                )
-                
-                print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}")
-                with out_path.open("wb") as f:
-                    cropped_pose.write(f)
+        out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose"
+        start_frame = int(segment["start"])
+        end_frame = int(segment["end"])
+        cropped_pose = Pose(
+            header=pose.header,
+            body=pose.body[start_frame:end_frame]
+        )
+        
+        print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}")
+        with out_path.open("wb") as f:
+            cropped_pose.write(f)
 
 def main():
     args = get_args()
@@ -131,16 +132,15 @@ def main():
         eaf.add_linked_file(args.pose, mimetype="application/pose")
 
     for tier_id, segments in tiers.items():
-        # print(f"TIER: {tier_id}")s
         eaf.add_tier(tier_id)
         for segment in segments:
             start_frame = int(segment["start"] / fps * 1000)
             end_frame = int(segment["end"] / fps * 1000)
             eaf.add_annotation(tier_id, start_frame, end_frame)
             
-    if args.save_crops:
-        print(f"Saving {args.save_crops} cropped .pose files")
-        save_pose_segments(tiers, tier_id=args.save_crops, input_file_path=args.pose)
+    if args.save_segments:
+        print(f"Saving {args.save_segments} cropped .pose files")
+        save_pose_segments(tiers, tier_id=args.save_segments, input_file_path=args.pose)
             
 
     if args.subtitles and os.path.exists(args.subtitles):

From 64e2e86c1e492ce4a6641d7c42bc52373890fc0c Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:04:56 -0500
Subject: [PATCH 4/8] Some style changes for PR and Pylint. Rearranging
 functions, fixing lines/whitespace, and opening with a specific encoding

---
 sign_language_segmentation/bin.py | 66 +++++++++++++++----------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index 08fd593..13fd0e1 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -12,7 +12,6 @@
 from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments
 
 
-
 def add_optical_flow(pose: Pose):
     from pose_format.numpy.representation.distance import DistanceRepresentation
     from pose_format.utils.optical_flow import OpticalFlowCalculator
@@ -59,61 +58,59 @@ def predict(model, pose: Pose):
         return model(pose_data)
 
 
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--pose', required=True, type=Path, help='path to input pose file')
-    parser.add_argument('--elan', required=True, type=str, help='path to output elan file')
-    parser.add_argument('--save-segments', 
-                        type=str, 
-                        choices=["SENTENCE","SIGN"],
-                        help='whether to save cropped .pose files')
-    parser.add_argument('--video', default=None, required=False, type=str, help='path to video file')
-    parser.add_argument('--subtitles', default=None, required=False, type=str, help='path to subtitle file')
-    parser.add_argument('--model', default='model_E1s-1.pth', required=False, type=str, help='path to model file')
-    parser.add_argument('--no-pose-link', action='store_true', help='whether to link the pose file')
-
-    return parser.parse_args()
-
 def save_pose_segments(tiers, tier_id, input_file_path):
-    # reload it without any of the processing, so we get all the original points and such. 
+    # reload it without any of the processing, so we get all the original points and such.
     with input_file_path.open("rb") as f:
         pose = Pose.read(f.read())
-        
+
     for i, segment in enumerate(tiers[tier_id]):
         out_path = input_file_path.parent / f"{input_file_path.stem}_{tier_id}_{i}.pose"
         start_frame = int(segment["start"])
         end_frame = int(segment["end"])
-        cropped_pose = Pose(
-            header=pose.header,
-            body=pose.body[start_frame:end_frame]
-        )
-        
+        cropped_pose = Pose(header=pose.header, body=pose.body[start_frame:end_frame])
+
         print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}")
         with out_path.open("wb") as f:
             cropped_pose.write(f)
 
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pose", required=True, type=Path, help="path to input pose file")
+    parser.add_argument("--elan", required=True, type=str, help="path to output elan file")
+    parser.add_argument(
+        "--save-segments", type=str, choices=["SENTENCE", "SIGN"], help="whether to save cropped .pose files"
+    )
+    parser.add_argument("--video", default=None, required=False, type=str, help="path to video file")
+    parser.add_argument("--subtitles", default=None, required=False, type=str, help="path to subtitle file")
+    parser.add_argument("--model", default="model_E1s-1.pth", required=False, type=str, help="path to model file")
+    parser.add_argument("--no-pose-link", action="store_true", help="whether to link the pose file")
+
+    return parser.parse_args()
+
+
 def main():
     args = get_args()
 
-    print('Loading pose ...')
+    print("Loading pose ...")
     with open(args.pose, "rb") as f:
         pose = Pose.read(f.read())
-        if 'E4' in args.model:
+        if "E4" in args.model:
             pose = process_pose(pose, optical_flow=True, hand_normalization=True)
         else:
             pose = process_pose(pose)
 
-    print('Loading model ...')
+    print("Loading model ...")
     install_dir = str(os.path.dirname(os.path.abspath(__file__)))
     model = load_model(os.path.join(install_dir, "dist", args.model))
 
-    print('Estimating segments ...')
+    print("Estimating segments ...")
     probs = predict(model, pose)
 
     sign_segments = probs_to_segments(probs["sign"], 60, 50)
     sentence_segments = probs_to_segments(probs["sentence"], 90, 90)
 
-    print('Building ELAN file ...')
+    print("Building ELAN file ...")
     tiers = {
         "SIGN": sign_segments,
         "SENTENCE": sentence_segments,
@@ -137,24 +134,27 @@ def main():
             start_frame = int(segment["start"] / fps * 1000)
             end_frame = int(segment["end"] / fps * 1000)
             eaf.add_annotation(tier_id, start_frame, end_frame)
-            
+
     if args.save_segments:
         print(f"Saving {args.save_segments} cropped .pose files")
         save_pose_segments(tiers, tier_id=args.save_segments, input_file_path=args.pose)
-            
 
     if args.subtitles and os.path.exists(args.subtitles):
         import srt
+
         eaf.add_tier("SUBTITLE")
-        with open(args.subtitles, "r") as infile:
+        # open with explicit encoding,
+        # as directed in https://github.com/cdown/srt/blob/master/srt_tools/utils.py#L155-L160
+        # see also https://github.com/cdown/srt/issues/67, https://github.com/cdown/srt/issues/36
+        with open(args.subtitles, "r", encoding="utf-8-sig") as infile:
             for subtitle in srt.parse(infile):
                 start = subtitle.start.total_seconds()
                 end = subtitle.end.total_seconds()
                 eaf.add_annotation("SUBTITLE", int(start * 1000), int(end * 1000), subtitle.content)
 
-    print('Saving .eaf to disk ...')
+    print("Saving .eaf to disk ...")
     eaf.to_file(args.elan)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From be7aafe6c574b405acde3b36d42784648b9c355f Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:11:06 -0500
Subject: [PATCH 5/8] Trying to clarify frame number to ms timestamp conversion

---
 sign_language_segmentation/bin.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index 13fd0e1..bc43b86 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -130,10 +130,12 @@ def main():
 
     for tier_id, segments in tiers.items():
         eaf.add_tier(tier_id)
+        frames_per_millisecond = fps*1000
         for segment in segments:
-            start_frame = int(segment["start"] / fps * 1000)
-            end_frame = int(segment["end"] / fps * 1000)
-            eaf.add_annotation(tier_id, start_frame, end_frame)
+            # convert frame numbers to millisecond timestamps, for Elan
+            start_frame_time = int(segment["start"] / fps * frames_per_millisecond)
+            end_frame_time = int(segment["end"] / fps * frames_per_millisecond)
+            eaf.add_annotation(tier_id, start_frame_time, end_frame_time)
 
     if args.save_segments:
         print(f"Saving {args.save_segments} cropped .pose files")

From 34bd1ee1914876449f9ea52b6721cc1290cc4a15 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:18:23 -0500
Subject: [PATCH 6/8] Type annotations for save_pose_segments

---
 sign_language_segmentation/bin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index bc43b86..486b944 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -58,7 +58,7 @@ def predict(model, pose: Pose):
         return model(pose_data)
 
 
-def save_pose_segments(tiers, tier_id, input_file_path):
+def save_pose_segments(tiers:dict, tier_id:str, input_file_path:Path):
     # reload it without any of the processing, so we get all the original points and such.
     with input_file_path.open("rb") as f:
         pose = Pose.read(f.read())
@@ -69,7 +69,7 @@ def save_pose_segments(tiers, tier_id, input_file_path):
         end_frame = int(segment["end"])
         cropped_pose = Pose(header=pose.header, body=pose.body[start_frame:end_frame])
 
-        print(f"saving cropped pose with start {start_frame} and end {end_frame} to {out_path}")
+        print(f"Saving cropped pose with start {start_frame} and end {end_frame} to {out_path}")
         with out_path.open("wb") as f:
             cropped_pose.write(f)
 

From 0fbc47165d6e61d25bc4918430bf20e6fdce36f1 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:21:51 -0500
Subject: [PATCH 7/8] Adding output type annotations for the ones I feel sure
 of

---
 sign_language_segmentation/bin.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index 486b944..91eb310 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -12,7 +12,7 @@
 from sign_language_segmentation.src.utils.probs_to_segments import probs_to_segments
 
 
-def add_optical_flow(pose: Pose):
+def add_optical_flow(pose: Pose)->None:
     from pose_format.numpy.representation.distance import DistanceRepresentation
     from pose_format.utils.optical_flow import OpticalFlowCalculator
 
@@ -26,7 +26,7 @@ def add_optical_flow(pose: Pose):
     pose.body.data = np.concatenate([pose.body.data, flow], axis=-1).astype(np.float32)
 
 
-def process_pose(pose: Pose, optical_flow=False, hand_normalization=False):
+def process_pose(pose: Pose, optical_flow=False, hand_normalization=False) -> Pose:
     pose = pose.get_components(["POSE_LANDMARKS", "LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS"])
 
     normalization_info = pose_normalization_info(pose.header)
@@ -58,7 +58,7 @@ def predict(model, pose: Pose):
         return model(pose_data)
 
 
-def save_pose_segments(tiers:dict, tier_id:str, input_file_path:Path):
+def save_pose_segments(tiers:dict, tier_id:str, input_file_path:Path)->None:
     # reload it without any of the processing, so we get all the original points and such.
     with input_file_path.open("rb") as f:
         pose = Pose.read(f.read())

From f4110c18d6f61c09bf39c29252f22d918a4451f3 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 16:00:24 -0500
Subject: [PATCH 8/8] Fix my own mistake in ms time conversion for Elan

---
 sign_language_segmentation/bin.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sign_language_segmentation/bin.py b/sign_language_segmentation/bin.py
index 91eb310..25e79aa 100644
--- a/sign_language_segmentation/bin.py
+++ b/sign_language_segmentation/bin.py
@@ -130,12 +130,11 @@ def main():
 
     for tier_id, segments in tiers.items():
         eaf.add_tier(tier_id)
-        frames_per_millisecond = fps*1000
         for segment in segments:
             # convert frame numbers to millisecond timestamps, for Elan
-            start_frame_time = int(segment["start"] / fps * frames_per_millisecond)
-            end_frame_time = int(segment["end"] / fps * frames_per_millisecond)
-            eaf.add_annotation(tier_id, start_frame_time, end_frame_time)
+            start_time_ms = int(segment["start"] / fps * 1000)
+            end_time_ms = int(segment["end"] / fps * 1000)
+            eaf.add_annotation(tier_id, start_time_ms, end_time_ms)
 
     if args.save_segments:
         print(f"Saving {args.save_segments} cropped .pose files")