/
video.py
1457 lines (1155 loc) · 45.6 KB
/
video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
""" Video reading and writing interfaces for different formats. """
import os
import shutil
import h5py as h5
import cv2
import imgstore
import numpy as np
import attr
import cattr
import logging
import multiprocessing
from typing import Iterable, List, Optional, Tuple, Union
from sleap.util import json_loads, json_dumps
logger = logging.getLogger(__name__)
@attr.s(auto_attribs=True, cmp=False)
class DummyVideo:
"""
Fake video backend,returns frames with all zeros.
This can be useful when you want to look at labels for a dataset but don't
have access to the real video.
"""
filename: str = ""
height: int = 2000
width: int = 2000
frames: int = 10000
channels: int = 1
dummy: bool = True
@property
def test_frame(self):
return self.get_frame(0)
def get_frame(self, idx) -> np.ndarray:
return np.zeros((self.height, self.width, self.channels))
@attr.s(auto_attribs=True, cmp=False)
class HDF5Video:
"""
Video data stored as 4D datasets in HDF5 files.
Args:
filename: The name of the HDF5 file where the dataset with video data
is stored.
dataset: The name of the HDF5 dataset where the video data is stored.
file_h5: The h5.File object that the underlying dataset is stored.
dataset_h5: The h5.Dataset object that the underlying data is stored.
input_format: A string value equal to either "channels_last" or
"channels_first".
This specifies whether the underlying video data is stored as:
* "channels_first": shape = (frames, channels, height, width)
* "channels_last": shape = (frames, height, width, channels)
convert_range: Whether we should convert data to [0, 255]-range
"""
filename: str = attr.ib(default=None)
dataset: str = attr.ib(default=None)
input_format: str = attr.ib(default="channels_last")
convert_range: bool = attr.ib(default=True)
def __attrs_post_init__(self):
"""Called by attrs after __init__()."""
self.enable_source_video = True
self._test_frame_ = None
self.__original_to_current_frame_idx = dict()
self.__dataset_h5 = None
self.__tried_to_load = False
@input_format.validator
def check(self, attribute, value):
"""Called by attrs to validates input format."""
if value not in ["channels_first", "channels_last"]:
raise ValueError(f"HDF5Video input_format={value} invalid.")
if value == "channels_first":
self.__channel_idx = 1
self.__width_idx = 2
self.__height_idx = 3
else:
self.__channel_idx = 3
self.__width_idx = 2
self.__height_idx = 1
def _load(self):
if self.__tried_to_load:
return
self.__tried_to_load = True
# Handle cases where the user feeds in h5.File objects instead of filename
if isinstance(self.filename, h5.File):
self.__file_h5 = self.filename
self.filename = self.__file_h5.filename
elif type(self.filename) is str:
try:
self.__file_h5 = h5.File(self.filename, "r")
except OSError as ex:
raise FileNotFoundError(
f"Could not find HDF5 file {self.filename}"
) from ex
else:
self.__file_h5 = None
# Handle the case when h5.Dataset is passed in
if isinstance(self.dataset, h5.Dataset):
self.__dataset_h5 = self.dataset
self.__file_h5 = self.__dataset_h5.file
self.dataset = self.__dataset_h5.name
# File loaded and dataset name given, so load dataset
elif isinstance(self.dataset, str) and (self.__file_h5 is not None):
self.__dataset_h5 = self.__file_h5[self.dataset]
# Check for frame_numbers dataset corresponding to video
base_dataset_path = "/".join(self.dataset.split("/")[:-1])
framenum_dataset = f"{base_dataset_path}/frame_numbers"
if framenum_dataset in self.__file_h5:
original_idx_lists = self.__file_h5[framenum_dataset]
# Create map from idx in original video to idx in current
for current_idx in range(len(original_idx_lists)):
original_idx = original_idx_lists[current_idx]
self.__original_to_current_frame_idx[original_idx] = current_idx
source_video_group = f"{base_dataset_path}/source_video"
if source_video_group in self.__file_h5:
d = json_loads(
self.__file_h5.require_group(source_video_group).attrs["json"]
)
self._source_video_ = Video.cattr().structure(d, Video)
@property
def __dataset_h5(self) -> h5.Dataset:
if self.__loaded_dataset is None and not self.__tried_to_load:
self._load()
return self.__loaded_dataset
@__dataset_h5.setter
def __dataset_h5(self, val):
self.__loaded_dataset = val
@property
def test_frame(self):
# Load if not already loaded
if self._test_frame_ is None:
# Lets grab a test frame to help us figure things out about the video
self._test_frame_ = self.get_frame(self.last_frame_idx)
# Return stored test frame
return self._test_frame_
@property
def enable_source_video(self):
return self._enable_source_video
@enable_source_video.setter
def enable_source_video(self, val):
self._enable_source_video = val
def matches(self, other: "HDF5Video") -> bool:
"""
Check if attributes match those of another video.
Args:
other: The other video to compare with.
Returns:
True if attributes match, False otherwise.
"""
return (
self.filename == other.filename
and self.dataset == other.dataset
and self.convert_range == other.convert_range
and self.input_format == other.input_format
)
def close(self):
"""Closes the HDF5 file object (if it's open)."""
try:
self.__file_h5.close()
except:
pass
self.__file_h5 = None
def __del__(self):
"""Releases file object."""
self.close()
def _try_frame_from_source_video(self, idx) -> np.ndarray:
try:
return self._source_video.get_frame(idx)
except:
raise ValueError(f"Frame index {idx} not in original index.")
@property
def _source_video(self) -> "Video":
if self.enable_source_video:
if hasattr(self, "_source_video_") and self._source_video_:
return self._source_video_
return None
# The properties and methods below complete our contract with the
# higher level Video interface.
@property
def frames(self):
"""See :class:`Video`."""
return self.__dataset_h5.shape[0]
@property
def channels(self):
"""See :class:`Video`."""
if "channels" in self.__dataset_h5.attrs:
return int(self.__dataset_h5.attrs["channels"])
return self.__dataset_h5.shape[self.__channel_idx]
@property
def width(self):
"""See :class:`Video`."""
if "width" in self.__dataset_h5.attrs:
return int(self.__dataset_h5.attrs["width"])
return self.__dataset_h5.shape[self.__width_idx]
@property
def height(self):
"""See :class:`Video`."""
if "height" in self.__dataset_h5.attrs:
return int(self.__dataset_h5.attrs["height"])
return self.__dataset_h5.shape[self.__height_idx]
@property
def dtype(self):
"""See :class:`Video`."""
return self.test_frame.dtype
@property
def last_frame_idx(self) -> int:
"""
The idx number of the last frame.
Overrides method of base :class:`Video` class for videos with
select frames indexed by number from original video, since the last
frame index here will not match the number of frames in video.
"""
# Ensure that video is loaded since we'll need data from loading
self._load()
if self.__original_to_current_frame_idx:
last_key = sorted(self.__original_to_current_frame_idx.keys())[-1]
return last_key
return self.frames - 1
def reset(self):
"""Reloads the video."""
# TODO
pass
def get_frame(self, idx) -> np.ndarray:
"""
Get a frame from the underlying HDF5 video data.
Args:
idx: The index of the frame to get.
Returns:
The numpy.ndarray representing the video frame data.
"""
# Ensure that video is loaded since we'll need data from loading
self._load()
# If we only saved some frames from a video, map to idx in dataset.
if self.__original_to_current_frame_idx:
if idx in self.__original_to_current_frame_idx:
idx = self.__original_to_current_frame_idx[idx]
else:
return self._try_frame_from_source_video(idx)
frame = self.__dataset_h5[idx]
if self.__dataset_h5.attrs.get("format", ""):
frame = cv2.imdecode(frame, cv2.IMREAD_UNCHANGED)
# Add dimension for single channel (dropped by opencv).
if frame.ndim == 2:
frame = frame[..., np.newaxis]
if self.input_format == "channels_first":
frame = np.transpose(frame, (2, 1, 0))
if self.convert_range and np.max(frame) <= 1.0:
frame = (frame * 255).astype(int)
return frame
@attr.s(auto_attribs=True, cmp=False)
class MediaVideo:
"""
Video data stored in traditional media formats readable by FFMPEG
This class provides bare minimum read only interface on top of
OpenCV's VideoCapture class.
Args:
filename: The name of the file (.mp4, .avi, etc)
grayscale: Whether the video is grayscale or not. "auto" means detect
based on first frame.
bgr: Whether color channels ordered as (blue, green, red).
"""
filename: str = attr.ib()
grayscale: bool = attr.ib()
bgr: bool = attr.ib(default=True)
# Unused attributes still here so we don't break deserialization
dataset: str = attr.ib(default="")
input_format: str = attr.ib(default="")
_detect_grayscale = False
_reader_ = None
_test_frame_ = None
@property
def __lock(self):
if not hasattr(self, "_lock"):
self._lock = multiprocessing.RLock()
return self._lock
@grayscale.default
def __grayscale_default__(self):
self._detect_grayscale = True
return False
@property
def __reader(self):
# Load if not already loaded
if self._reader_ is None:
if not os.path.isfile(self.filename):
raise FileNotFoundError(
f"Could not find filename video filename named {self.filename}"
)
# Try and open the file either locally in current directory or with full path
self._reader_ = cv2.VideoCapture(self.filename)
# If the user specified None for grayscale bool, figure it out based on the
# the first frame of data.
if self._detect_grayscale is True:
self.grayscale = bool(
np.alltrue(self.test_frame[..., 0] == self.test_frame[..., -1])
)
# Return cached reader
return self._reader_
@property
def __frames_float(self):
return self.__reader.get(cv2.CAP_PROP_FRAME_COUNT)
@property
def test_frame(self):
# Load if not already loaded
if self._test_frame_ is None:
# Lets grab a test frame to help us figure things out about the video
self._test_frame_ = self.get_frame(0, grayscale=False)
# Return stored test frame
return self._test_frame_
def matches(self, other: "MediaVideo") -> bool:
"""
Check if attributes match those of another video.
Args:
other: The other video to compare with.
Returns:
True if attributes match, False otherwise.
"""
return (
self.filename == other.filename
and self.grayscale == other.grayscale
and self.bgr == other.bgr
)
@property
def fps(self) -> float:
"""Returns frames per second of video."""
return self.__reader.get(cv2.CAP_PROP_FPS)
# The properties and methods below complete our contract with the
# higher level Video interface.
@property
def frames(self):
"""See :class:`Video`."""
return int(self.__frames_float)
@property
def channels(self):
"""See :class:`Video`."""
if self.grayscale:
return 1
else:
return self.test_frame.shape[2]
@property
def width(self):
"""See :class:`Video`."""
return self.test_frame.shape[1]
@property
def height(self):
"""See :class:`Video`."""
return self.test_frame.shape[0]
@property
def dtype(self):
"""See :class:`Video`."""
return self.test_frame.dtype
def reset(self):
"""Reloads the video."""
self._reader_ = None
def get_frame(self, idx: int, grayscale: bool = None) -> np.ndarray:
"""See :class:`Video`."""
with self.__lock:
if self.__reader.get(cv2.CAP_PROP_POS_FRAMES) != idx:
self.__reader.set(cv2.CAP_PROP_POS_FRAMES, idx)
success, frame = self.__reader.read()
if not success or frame is None:
raise KeyError(f"Unable to load frame {idx} from {self}.")
if grayscale is None:
grayscale = self.grayscale
if grayscale:
frame = frame[..., 0][..., None]
if self.bgr:
frame = frame[..., ::-1]
return frame
@attr.s(auto_attribs=True, cmp=False)
class NumpyVideo:
"""
Video data stored as Numpy array.
Args:
filename: Either a file to load or a numpy array of the data.
* numpy data shape: (frames, height, width, channels)
"""
filename: Union[str, np.ndarray] = attr.ib()
def __attrs_post_init__(self):
self.__frame_idx = 0
self.__height_idx = 1
self.__width_idx = 2
self.__channel_idx = 3
# Handle cases where the user feeds in np.array instead of filename
if isinstance(self.filename, np.ndarray):
self.__data = self.filename
self.filename = "Raw Video Data"
elif type(self.filename) is str:
try:
self.__data = np.load(self.filename)
except OSError as ex:
raise FileNotFoundError(
f"Could not find filename {self.filename}"
) from ex
else:
self.__data = None
def set_video_ndarray(self, data: np.ndarray):
self.__data = data
# The properties and methods below complete our contract with the
# higher level Video interface.
@property
def test_frame(self):
return self.get_frame(0)
def matches(self, other: "NumpyVideo") -> np.ndarray:
"""
Check if attributes match those of another video.
Args:
other: The other video to compare with.
Returns:
True if attributes match, False otherwise.
"""
return np.all(self.__data == other.__data)
@property
def frames(self):
"""See :class:`Video`."""
return self.__data.shape[self.__frame_idx]
@property
def channels(self):
"""See :class:`Video`."""
return self.__data.shape[self.__channel_idx]
@property
def width(self):
"""See :class:`Video`."""
return self.__data.shape[self.__width_idx]
@property
def height(self):
"""See :class:`Video`."""
return self.__data.shape[self.__height_idx]
@property
def dtype(self):
"""See :class:`Video`."""
return self.__data.dtype
def reset(self):
"""Reloads the video."""
# TODO
pass
def get_frame(self, idx):
"""See :class:`Video`."""
return self.__data[idx]
@attr.s(auto_attribs=True, cmp=False)
class ImgStoreVideo:
"""
Video data stored as an ImgStore dataset.
See: https://github.com/loopbio/imgstore
This class is just a lightweight wrapper for reading such datasets as
video sources for SLEAP.
Args:
filename: The name of the file or directory to the imgstore.
index_by_original: ImgStores are great for storing a collection of
selected frames from an larger video. If the index_by_original is
set to True then the get_frame function will accept the original
frame numbers of from original video. If False, then it will
accept the frame index from the store directly.
Default to True so that we can use an ImgStoreVideo in a dataset
to replace another video without having to update all the frame
indices on :class:`LabeledFrame` objects in the dataset.
"""
filename: str = attr.ib(default=None)
index_by_original: bool = attr.ib(default=True)
_store_ = None
_img_ = None
def __attrs_post_init__(self):
# If the filename does not contain metadata.yaml, append it to the filename
# assuming that this is a directory that contains the imgstore.
if "metadata.yaml" not in self.filename:
# Use "/" since this works on Windows and posix
self.filename = self.filename + "/metadata.yaml"
# Make relative path into absolute, ImgStores don't work properly it seems
# without full paths if we change working directories. Video.fixup_path will
# fix this later when loading these datasets.
self.filename = os.path.abspath(self.filename)
self.__store = None
# The properties and methods below complete our contract with the
# higher level Video interface.
def matches(self, other):
"""
Check if attributes match.
Args:
other: The instance to comapare with.
Returns:
True if attributes match, False otherwise
"""
return (
self.filename == other.filename
and self.index_by_original == other.index_by_original
)
@property
def __store(self):
if self._store_ is None:
self.open()
return self._store_
@__store.setter
def __store(self, val):
self._store_ = val
@property
def __img(self):
if self._img_ is None:
self.open()
return self._img_
@property
def frames(self):
"""See :class:`Video`."""
return self.__store.frame_count
@property
def channels(self):
"""See :class:`Video`."""
if len(self.__img.shape) < 3:
return 1
else:
return self.__img.shape[2]
@property
def width(self):
"""See :class:`Video`."""
return self.__img.shape[1]
@property
def height(self):
"""See :class:`Video`."""
return self.__img.shape[0]
@property
def dtype(self):
"""See :class:`Video`."""
return self.__img.dtype
@property
def last_frame_idx(self) -> int:
"""
The idx number of the last frame.
Overrides method of base :class:`Video` class for videos with
select frames indexed by number from original video, since the last
frame index here will not match the number of frames in video.
"""
if self.index_by_original:
return self.__store.frame_max
return self.frames - 1
def reset(self):
"""Reloads the video."""
# TODO
pass
def get_frame(self, frame_number: int) -> np.ndarray:
"""
Get a frame from the underlying ImgStore video data.
Args:
frame_number: The number of the frame to get. If
index_by_original is set to True, then this number should
actually be a frame index within the imgstore. That is,
if there are 4 frames in the imgstore, this number should be
be from 0 to 3.
Returns:
The numpy.ndarray representing the video frame data.
"""
# Check if we need to open the imgstore and do it if needed
if not self._store_:
self.open()
if self.index_by_original:
img, (frame_number, frame_timestamp) = self.__store.get_image(frame_number)
else:
img, (frame_number, frame_timestamp) = self.__store.get_image(
frame_number=None, frame_index=frame_number
)
# If the frame has one channel, add a singleton channel as it seems other
# video implementations do this.
if img.ndim == 2:
img = img[:, :, None]
return img
@property
def imgstore(self):
"""
Get the underlying ImgStore object for this Video.
Returns:
The imgstore that is backing this video object.
"""
return self.__store
def open(self):
"""
Open the image store if it isn't already open.
Returns:
None
"""
if not self._store_:
# Open the imgstore
self._store_ = imgstore.new_for_filename(self.filename)
# Read a frame so we can compute shape an such
self._img_, (frame_number, frame_timestamp) = self._store_.get_next_image()
def close(self):
"""
Close the imgstore if it isn't already closed.
Returns:
None
"""
if self.imgstore:
# Open the imgstore
self.__store.close()
self.__store = None
@attr.s(auto_attribs=True, cmp=False)
class SingleImageVideo:
"""
Video wrapper for individual image files.
Args:
filenames: Files to load as video.
"""
filename: Optional[str] = attr.ib(default=None)
filenames: Optional[List[str]] = attr.ib(factory=list)
height_: Optional[int] = attr.ib(default=None)
width_: Optional[int] = attr.ib(default=None)
channels_: Optional[int] = attr.ib(default=None)
def __attrs_post_init__(self):
if not self.filename and self.filenames:
self.filename = self.filenames[0]
elif self.filename and not self.filenames:
self.filenames = [self.filename]
self.__data = dict()
self.test_frame_ = None
def _load_idx(self, idx):
img = cv2.imread(self._get_filename(idx))
if img.shape[2] == 3:
# OpenCV channels are in BGR order, so we should convert to RGB
img = img[:, :, ::-1]
return img
def _get_filename(self, idx: int) -> str:
f = self.filenames[idx]
if os.path.exists(f):
return f
# Try the directory from the "video" file (this works if all the images
# are in the same directory with distinctive filenames).
f = os.path.join(os.path.dirname(self.filename), os.path.basename(f))
if os.path.exists(f):
return f
raise FileNotFoundError(f"Unable to locate file {idx}: {self.filenames[idx]}")
def _load_test_frame(self):
if self.test_frame_ is None:
self.test_frame_ = self._load_idx(0)
if self.height_ is None:
self.height_ = self.test_frame.shape[0]
if self.width_ is None:
self.width_ = self.test_frame.shape[1]
if self.channels_ is None:
self.channels_ = self.test_frame.shape[2]
def get_idx_from_filename(self, filename: str) -> int:
try:
return self.filenames.index(filename)
except IndexError as e:
return None
# The properties and methods below complete our contract with the
# higher level Video interface.
@property
def test_frame(self) -> np.ndarray:
self._load_test_frame()
return self.test_frame_
def matches(self, other: "SingleImageVideo") -> bool:
"""
Check if attributes match those of another video.
Args:
other: The other video to compare with.
Returns:
True if attributes match, False otherwise.
"""
return self.filenames == other.filenames
@property
def frames(self):
"""See :class:`Video`."""
return len(self.filenames)
@property
def channels(self):
"""See :class:`Video`."""
if self.channels_ is None:
self._load_test_frame()
return self.channels_
@property
def width(self):
"""See :class:`Video`."""
if self.width_ is None:
self._load_test_frame()
return self.width_
@width.setter
def width(self, val):
self.width_ = val
@property
def height(self):
"""See :class:`Video`."""
if self.height_ is None:
self._load_test_frame()
return self.height_
@height.setter
def height(self, val):
self.height_ = val
@property
def dtype(self):
"""See :class:`Video`."""
return self.__data.dtype
def reset(self):
"""Reloads the video."""
# TODO
pass
def get_frame(self, idx):
"""See :class:`Video`."""
if idx not in self.__data:
self.__data[idx] = self._load_idx(idx)
return self.__data[idx]
@attr.s(auto_attribs=True, cmp=False)
class Video:
"""
The top-level interface to any Video data used by SLEAP.
This class provides a common interface for various supported video data
backends. It provides the bare minimum of properties and methods that
any video data needs to support in order to function with other SLEAP
components. This interface currently only supports reading of video
data, there is no write support. Unless one is creating a new video
backend, this class should be instantiated from its various class methods
for different formats. For example:
>>> video = Video.from_hdf5(filename="test.h5", dataset="box")
>>> video = Video.from_media(filename="test.mp4")
Or we can use auto-detection based on filename:
>>> video = Video.from_filename(filename="test.mp4")
Args:
backend: A backend is an object that implements the following basic
required methods and properties
* Properties
* :code:`frames`: The number of frames in the video
* :code:`channels`: The number of channels in the video
(e.g. 1 for grayscale, 3 for RGB)
* :code:`width`: The width of each frame in pixels
* :code:`height`: The height of each frame in pixels
* Methods
* :code:`get_frame(frame_index: int) -> np.ndarray`:
Get a single frame from the underlying video data with
output shape=(height, width, channels).
"""
backend: Union[
HDF5Video, NumpyVideo, MediaVideo, ImgStoreVideo, SingleImageVideo, DummyVideo
] = attr.ib()
# Delegate to the backend
def __getattr__(self, item):
return getattr(self.backend, item)
@property
def num_frames(self) -> int:
"""
The number of frames in the video. Just an alias for frames property.
"""
return self.frames
@property
def last_frame_idx(self) -> int:
"""
The idx number of the last frame. Usually `numframes - 1`.
"""
if hasattr(self.backend, "last_frame_idx"):
return self.backend.last_frame_idx
return self.frames - 1
@property
def shape(self) -> Tuple[int, int, int, int]:
""" Returns (frame count, height, width, channels)."""
return (self.frames, self.height, self.width, self.channels)
def __str__(self):
""" Informal string representation (for print or format) """
return type(self).__name__ + " ([%d x %d x %d x %d])" % self.shape
def __len__(self):
"""
The length of the video should be the number of frames.
Returns:
The number of frames in the video.
"""
return self.frames
def get_frame(self, idx: int) -> np.ndarray:
"""
Return a single frame of video from the underlying video data.
Args:
idx: The index of the video frame
Returns:
The video frame with shape (height, width, channels)
"""
return self.backend.get_frame(idx)
def get_frames(self, idxs: Union[int, Iterable[int]]) -> np.ndarray:
"""
Return a collection of video frames from the underlying video data.
Args:
idxs: An iterable object that contains the indices of frames.
Returns:
The requested video frames with shape
(len(idxs), height, width, channels)
"""
if np.isscalar(idxs):
idxs = [idxs]
return np.stack([self.get_frame(idx) for idx in idxs], axis=0)
def get_frames_safely(self, idxs: Iterable[int]) -> Tuple[List[int], np.ndarray]:
"""
Returns list of frame indices and frames which were successfully loaded.
idxs: An iterable object that contains the indices of frames.
Returns: A tuple of (frame indices, frames), where
* frame indices is a subset of the specified idxs, and
* frames has shape (len(frame indices), height, width, channels).
If zero frames were loaded successfully, then frames is None.
"""
frames = []