# Data exploration of input videos

In [8]:
!ffprobe -hide_banner input_files/Cosmos_War_of_the_Planets.mp4

Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'input_files/Cosmos_War_of_the_Planets.mp4':
  Metadata:
    major_brand     : mp42
    minor_version   : 0
    compatible_brands: mp42mp41
    creation_time   : 2021-08-02T19:15:48.000000Z
  Duration: 00:00:20.05, start: 0.000000, bitrate: 3309 kb/s
  Stream #0:0[0x1](eng): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(progressive), 628x354 [SAR 1:1 DAR 314:177], 2989 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default)
    Metadata:
      creation_time   : 2021-08-02T19:15:48.000000Z
      handler_name    : ?Mainconcept Video Media Handler
      vendor_id       : [0][0][0][0]
      encoder         : AVC Coding
  Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 317 kb/s (default)
    Metadata:
      creation_time   : 2021-08-02T19:15:48.000000Z
      handler_name    : #Mainconcept MP4 Sound Media Handler
      vendor_id       : [0][0][0][0]


In [9]:
from IPython.display import Video
Video("input_files/Cosmos_War_of_the_Planets.mp4")

# Video analysis and report generation 

In [157]:
EXPECTED_FORMAT = {
    "container": "mp42",
    "video_codec": "h264",
    "audio_codec": "aac",
    "frame_rate": 25,
    "aspect_ratio": "16:9",
    "resolution_width": 640,
    "resolution_height": 360, 
    "video_br_min_mbs": 2,
    "video_br_max_mbs": 5, 
    "audio_br_max_kbs": 256,
    "audio_channels": 2
}

In [166]:
VIDEO_FILENAME = "input_files/Cosmos_War_of_the_Planets.mp4"
VIDEO_FILENAME = "input_files/Last_man_on_earth_1964.mov"
VIDEO_FILENAME = "input_files/The_Gun_and_the_Pulpit.avi"
VIDEO_FILENAME = "input_files/The_Hill_Gang_Rides_Again.mp4"
VIDEO_FILENAME = "input_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4"

Function that runs ffprobe command inside python and returns its output inspired by: https://stackoverflow.com/a/9896732

In [167]:
import os, sys, subprocess, shlex, re
import json
from subprocess import call
from fractions import Fraction

def probe_file(filename):
    cmnd = ['ffprobe', "-print_format", "json", "-show_streams", '-show_format', '-pretty', '-loglevel', 'quiet', filename]
    p = subprocess.Popen(cmnd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err =  p.communicate()
    if err:
        print ("========= error ========")
        print(err)
        raise Exception(err)
    return json.loads(out)

def calc_ar(w, h):
    fr = Fraction(w,h)
    num = fr.numerator
    denom = fr.denominator
    return f"{num}:{denom}"

def parse_probe(probe_r):
    # TODO: EXTRACT NEEDED INFO FROM FFPROBE OUTPUT HERE
    r = {}
    video_info = extract_stream_info(result, "video")
    audio_info = extract_stream_info(result, "audio")
    r["container"] = probe_r["format"]["tags"]["major_brand"] if "tags" in probe_r["format"] else probe_r["format"]["format_name"]
    r["video_codec"] = video_info["codec_name"]
    r["audio_codec"] = audio_info["codec_name"]
    if "/" in video_info["avg_frame_rate"]:
        num, denom = video_info["avg_frame_rate"].split("/")
        fr = int(num) / int(denom)
    else:
        fr = int(video_info["avg_frame_rate"])
    r["frame_rate"] = fr
    
    r["aspect_ratio"] = video_info["display_aspect_ratio"] if "display_aspect_ratio" in video_info else calc_ar(video_info["width"], video_info["height"])
    r["resolution_width"] = video_info["width"]
    r["resolution_height"] = video_info["height"]
    r["video_br_mbs"] = video_info["bit_rate"]
    r["audio_br_kbs"] = audio_info["bit_rate"]
    
    r["audio_channels"]= audio_info["channels"]
    return r

def compare_attrs(video_format, expected_format):
    problematic_fields = []
    #FIXME video_format["container"] is the codec not the container. how do I get the container?
    if(video_format["container"] != expected_format["container"]):
        problematic_fields.append(("container", video_format["container"]))
    if(video_format["video_codec"] != expected_format["video_codec"]):
        problematic_fields.append(("video_codec", video_format["video_codec"]))
    if(video_format["audio_codec"] != expected_format["audio_codec"]):
        problematic_fields.append(("audio_codec", video_format["audio_codec"]))
    if(video_format["frame_rate"] != expected_format["frame_rate"]):
        problematic_fields.append(("frame_rate", video_format["frame_rate"]))
    if(video_format["aspect_ratio"] != expected_format["aspect_ratio"]):
        problematic_fields.append(("aspect_ratio", video_format["aspect_ratio"]))
    if(video_format["resolution_width"] != expected_format["resolution_width"]):
        problematic_fields.append(("resolution_width", video_format["resolution_width"]))
    if(video_format["resolution_height"] != expected_format["resolution_height"]):
        problematic_fields.append(("resolution_height", video_format["resolution_height"]))
    video_br_min_mbs = expected_format["video_br_min_mbs"]
    video_br_max_mbs = expected_format["video_br_max_mbs"]
    video_br_mbs = float(video_format["video_br_mbs"].split()[0])
    if(video_br_mbs < video_br_min_mbs):
        problematic_fields.append(("video_br_mbs", video_br_mbs))
    if(video_br_mbs > video_br_max_mbs):
        problematic_fields.append(("video_br_mbs", video_br_mbs))
    audio_br_kbs = float(video_format["audio_br_kbs"].split()[0])
    if(audio_br_kbs > expected_format["audio_br_max_kbs"]):
        problematic_fields.append(("audio_br_kbs", audio_br_kbs))
    if(video_format["audio_channels"] != expected_format["audio_channels"]):
        problematic_fields.append(("audio_channels", video_format["audio_channels"]))
    return problematic_fields
    
def extract_stream_info(result, codec_type):
    for stream in result["streams"]:
        if(stream["codec_type"] == codec_type):
            return stream
    return None
    
    
result = probe_file(VIDEO_FILENAME)
video_format = parse_probe(result)
misssing_attrs = compare_attrs(video_format, EXPECTED_FORMAT)
misssing_attrs

[('video_codec', 'hevc'),
 ('audio_codec', 'mp3'),
 ('frame_rate', 29.97002997002997),
 ('video_br_mbs', 8.038857),
 ('audio_br_kbs', 320.0)]

In [139]:
result

{'programs': [],
 'streams': [{'index': 0,
   'codec_name': 'rawvideo',
   'codec_long_name': 'raw video',
   'codec_type': 'video',
   'codec_tag_string': 'IYUV',
   'codec_tag': '0x56555949',
   'width': 720,
   'height': 404,
   'coded_width': 720,
   'coded_height': 404,
   'closed_captions': 0,
   'film_grain': 0,
   'has_b_frames': 0,
   'pix_fmt': 'yuv420p',
   'level': -99,
   'refs': 1,
   'r_frame_rate': '25/1',
   'avg_frame_rate': '25/1',
   'time_base': '1/25',
   'start_pts': 0,
   'start_time': '0:00:00.000000',
   'duration_ts': 500,
   'duration': '0:00:20.000000',
   'bit_rate': '87.438878 Mbit/s',
   'nb_frames': '500',
   'disposition': {'default': 0,
    'dub': 0,
    'original': 0,
    'comment': 0,
    'lyrics': 0,
    'karaoke': 0,
    'forced': 0,
    'hearing_impaired': 0,
    'visual_impaired': 0,
    'clean_effects': 0,
    'attached_pic': 0,
    'timed_thumbnails': 0,
    'captions': 0,
    'descriptions': 0,
    'metadata': 0,
    'dependent': 0,
    'stil