# Libs

In [1]:
from tqdm import tqdm

from mbp.preprocessing.loader import Loader
from mbp.preprocessing.video_repair import VideoRepair
from mbp.utils.media_metadata import VideoMetadataExtractor

from scripts.datasets import sit_datasets
from scripts.check_video_quality import analyse_metadata
from scripts.utils import extract_id_clip

In [32]:
for dataset in sit_datasets.values():
    loader = Loader(dataset.raw, dataset.video_extension)
    data = loader.load()
    
    data = [d for d in data if extract_id_clip(d.stem)[0] in dataset.labels['id'].to_list()]
    
    extractor = VideoMetadataExtractor(data)
    metadata = extractor.run()
    
    analyse_metadata(metadata)

10-Jun-24 14:29:53 - INFO - Initializing data loader...
10-Jun-24 14:29:53 - INFO - Loaded 115 files...
10-Jun-24 14:29:53 - INFO - Initializing video metadata extractor...
100%|██████████| 70/70 [00:12<00:00,  5.61it/s]
10-Jun-24 14:30:06 - INFO - Initializing data loader...
10-Jun-24 14:30:06 - INFO - Loaded 288 files...
10-Jun-24 14:30:06 - INFO - Initializing video metadata extractor...


Information for videos with clip 2:
       duration        fps       frames                          mtime
count      70.0  70.000000    70.000000                             70
mean        0.0  23.374286  5457.371429  2021-07-27 02:20:54.312457472
min         0.0   0.000000  3052.000000            2021-05-04 23:48:38
25%         0.0  30.300000  5457.000000            2021-06-26 06:33:23
50%         0.0  30.300000  5677.500000     2021-07-05 17:51:45.500000
75%         0.0  30.300000  5708.000000  2021-10-02 10:15:23.750000128
max         0.0  30.300000  5767.000000     2021-10-02 12:49:07.468000
std         0.0  12.815216   538.550878                            NaN
Videos with the lowest duration: 
    duration   fps  frames               mtime
0          0  30.3    5713 2021-07-14 15:37:44
37         0  30.3    5767 2021-05-20 16:46:06
38         0   0.0    5764 2021-10-02 10:17:33
39         0  30.3    5675 2021-06-24 15:28:48
40         0  30.3    5678 2021-07-03 12:41:15
41       

100%|██████████| 122/122 [00:37<00:00,  3.28it/s]
10-Jun-24 14:30:43 - INFO - Initializing data loader...
10-Jun-24 14:30:43 - INFO - Loaded 159 files...
10-Jun-24 14:30:43 - INFO - Initializing video metadata extractor...


Information for videos with clip 1:
       duration        fps       frames                          mtime
count      59.0  59.000000    59.000000                             59
mean        0.0  10.784746  5390.423729  2022-01-08 19:32:13.389830400
min         0.0   0.000000  5164.000000            2021-07-07 20:50:51
25%         0.0   0.000000  5303.000000            2021-09-01 11:51:10
50%         0.0   0.000000  5357.000000            2021-11-01 16:11:48
75%         0.0  30.300000  5403.500000     2022-05-25 16:51:49.500000
max         0.0  30.300000  5683.000000            2022-09-14 16:01:09
std         0.0  14.632013   142.964693                            NaN
Videos with the lowest duration: 
    duration   fps  frames               mtime
0          0  30.3    5336 2022-09-01 13:48:20
64         0   0.0    5683 2021-07-07 20:50:59
66         0   0.0    5357 2021-09-01 11:52:24
68         0   0.0    5669 2021-09-01 11:52:07
72         0   0.0    5368 2021-09-01 11:52:01
74       

100%|██████████| 74/74 [00:13<00:00,  5.60it/s]

Information for videos with clip 1:
       duration   fps       frames                          mtime
count      35.0  35.0    35.000000                             35
mean        0.0  30.3  5585.542857  2022-01-22 09:21:21.200000256
min         0.0  30.3  4653.000000            2022-01-13 16:46:37
25%         0.0  30.3  5617.000000            2022-01-13 17:00:25
50%         0.0  30.3  5618.000000            2022-01-13 17:07:45
75%         0.0  30.3  5618.000000     2022-01-13 17:14:36.500000
max         0.0  30.3  5620.000000            2022-09-07 15:13:13
std         0.0   0.0   162.675488                            NaN
Videos with the lowest duration: 
    duration   fps  frames               mtime
0          0  30.3    5617 2022-01-13 16:55:44
40         0  30.3    5617 2022-01-13 17:10:59
43         0  30.3    5617 2022-01-13 17:12:01
45         0  30.3    5618 2022-01-13 17:12:45
47         0  30.3    5618 2022-01-13 17:13:30
49         0  30.3    5619 2022-01-13 17:14:14
51     




In [33]:
def repair(data, output_path):
    repairer = VideoRepair(output_path)

    for video in tqdm(data):
        repairer.run(video)

In [34]:
for dataset in sit_datasets.values():
    print(f'Extracting dataset from: {dataset.raw}...')
    loader = Loader(dataset.raw, dataset.video_extension)
    data = loader.load()

    repair(data, dataset.corrected)

10-Jun-24 14:36:51 - INFO - Initializing data loader...
10-Jun-24 14:36:51 - INFO - Loaded 115 files...


Extracting dataset from: /data/sit_21_hu_video_mix_home/raw...


100%|██████████| 115/115 [00:00<00:00, 2064.51it/s]
10-Jun-24 14:36:51 - INFO - Initializing data loader...


Extracting dataset from: /data/sit_22_hu_video_mix_lab/raw...


10-Jun-24 14:36:52 - INFO - Loaded 288 files...
100%|██████████| 288/288 [00:00<00:00, 1903.60it/s]
10-Jun-24 14:36:52 - INFO - Initializing data loader...
10-Jun-24 14:36:52 - INFO - Loaded 159 files...


Extracting dataset from: /data/sit_22_alu_video_mix_lab/raw...


100%|██████████| 159/159 [00:00<00:00, 1926.38it/s]


In [35]:
for dataset in sit_datasets.values():
    loader = Loader(dataset.corrected, dataset.video_extension)
    data = loader.load()
    
    data = [d for d in data if extract_id_clip(d.stem)[0] in dataset.labels['id'].to_list()]
    
    extractor = VideoMetadataExtractor(data)
    metadata = extractor.run()
    
    analyse_metadata(metadata)

10-Jun-24 14:37:39 - INFO - Initializing data loader...
10-Jun-24 14:37:39 - INFO - Loaded 115 files...
10-Jun-24 14:37:39 - INFO - Initializing video metadata extractor...
100%|██████████| 70/70 [00:31<00:00,  2.20it/s]
10-Jun-24 14:38:11 - INFO - Initializing data loader...
10-Jun-24 14:38:11 - INFO - Loaded 289 files...
10-Jun-24 14:38:11 - INFO - Initializing video metadata extractor...


Information for videos with clip 2:
         duration        fps       frames                          mtime
count   70.000000  70.000000    70.000000                             70
mean   188.987143  23.348571  5599.942857  2023-01-31 16:14:38.061893632
min    170.400000   0.000000  4729.000000     2023-01-28 12:03:46.064637
25%    189.180000  30.000000  5644.750000  2023-01-28 16:15:38.308029952
50%    189.240000  30.300000  5678.000000  2023-02-01 07:07:04.321109504
75%    189.307500  30.300000  5708.000000  2023-02-01 07:07:53.396816640
max    189.720000  30.300000  5767.000000     2023-02-08 01:02:45.601963
std      2.256532  12.801390   221.029206                            NaN
Videos with the lowest duration: 
    duration   fps  frames                      mtime
62    170.40  30.3    5140 2023-02-01 07:06:33.701659
54    189.12  30.3    5744 2023-02-01 07:07:11.764004
57    189.12  30.3    5659 2023-02-01 07:07:03.677205
35    189.12  30.3    4729 2023-02-01 07:08:07.179769
13 

100%|██████████| 122/122 [01:21<00:00,  1.50it/s]
10-Jun-24 14:39:32 - INFO - Initializing data loader...
10-Jun-24 14:39:32 - INFO - Loaded 159 files...
10-Jun-24 14:39:32 - INFO - Initializing video metadata extractor...


Information for videos with clip 1:
         duration        fps       frames                          mtime
count   59.000000  59.000000    59.000000                             59
mean   186.150678  10.784746  5390.423729  2023-01-28 13:32:39.910702080
min    186.000000   0.000000  5164.000000     2023-01-28 13:28:20.021773
25%    186.120000   0.000000  5303.000000  2023-01-28 13:30:28.930592512
50%    186.120000   0.000000  5357.000000  2023-01-28 13:32:30.444514048
75%    186.180000  30.300000  5403.500000  2023-01-28 13:34:41.634995456
max    186.650000  30.300000  5683.000000     2023-01-28 13:36:43.814847
std      0.091668  14.632013   142.964693                            NaN
Videos with the lowest duration: 
     duration   fps  frames                      mtime
114    186.00  30.3    5233 2023-01-28 13:29:51.948095
120    186.06  30.3    5349 2023-01-28 13:30:41.004797
31     186.06  30.3    5164 2023-01-28 13:32:15.558729
106    186.06  30.3    5396 2023-01-28 13:35:46.28738

100%|██████████| 74/74 [00:50<00:00,  1.46it/s]

Information for videos with clip 1:
         duration   fps       frames                          mtime
count   35.000000  35.0    35.000000                             35
mean   186.180571  30.3  5585.542857  2023-01-28 16:07:46.484566272
min    186.120000  30.3  4653.000000     2023-01-28 13:37:01.112280
25%    186.180000  30.3  5617.000000  2023-01-28 16:37:14.954213376
50%    186.180000  30.3  5618.000000  2023-01-28 16:38:20.014540032
75%    186.180000  30.3  5618.000000  2023-01-28 16:39:41.724389888
max    186.240000  30.3  5620.000000     2023-01-28 16:41:08.841435
std      0.024003   0.0   162.675488                            NaN
Videos with the lowest duration: 
    duration   fps  frames                      mtime
72    186.12  30.3    4653 2023-01-28 16:41:08.841435
1     186.13  30.3    5617 2023-01-28 16:37:57.229928
2     186.14  30.3    5617 2023-01-28 16:41:00.674650
36    186.15  30.3    5617 2023-01-28 16:39:01.288403
5     186.18  30.3    5618 2023-01-28 16:37:43.6


