# Video analysis and report generation 

In [3]:
EXPECTED_FORMAT = {
    "container": "mp42",
    "container_short_name": "mp4",
    "video_codec": "h264",
    "audio_codec": "aac",
    "frame_rate": 25,
    "aspect_ratio": "16:9",
    "resolution_width": 640,
    "resolution_height": 360, 
    "video_br_min_mbs": 2,
    "video_br_max_mbs": 5, 
    "audio_br_max_kbs": 256,
    "audio_channels": 2
}

In [4]:
INPUT_FILE = "Cosmos_War_of_the_Planets.mp4"
IN_FOLDER = "input_files"
OUT_FOLDER = "output_files"

VIDEO_FILENAME = "input_files/Cosmos_War_of_the_Planets.mp4"
VIDEO_FILENAME = "output_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4"
#VIDEO_FILENAME = "input_files/The_Gun_and_the_Pulpit.avi"
#VIDEO_FILENAME = "input_files/The_Hill_Gang_Rides_Again.mp4"
#VIDEO_FILENAME = "input_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4"

Function that runs ffprobe command inside python and returns its output inspired by: https://stackoverflow.com/a/9896732

In [5]:
import os, sys, subprocess, shlex, re
import json
from subprocess import call
from fractions import Fraction

def probe_file(filename):
    cmd = ['ffprobe', "-print_format", "json", "-show_streams", '-show_format', '-pretty', '-loglevel', 'quiet', filename]
    print(" ".join(cmd)) 
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err =  p.communicate()
    if err:
        print ("========= error ========")
        print(err)
        raise Exception(err)
    return json.loads(out)

def calc_ar(w, h):
    fr = Fraction(w,h)
    num = fr.numerator
    denom = fr.denominator
    return f"{num}:{denom}"

def parse_probe(probe_r):
    # EXTRACT NEEDED INFO FROM FFPROBE OUTPUT HERE
    r = {}
    video_info = extract_stream_info(probe_r, "video")
    audio_info = extract_stream_info(probe_r, "audio")
    r["container"] = probe_r["format"]["tags"]["major_brand"] if "tags" in probe_r["format"] else probe_r["format"]["format_name"]
    r["video_codec"] = video_info["codec_name"]
    r["audio_codec"] = audio_info["codec_name"]
    if "/" in video_info["avg_frame_rate"]:
        num, denom = video_info["avg_frame_rate"].split("/")
        fr = int(num) / int(denom)
    else:
        fr = int(video_info["avg_frame_rate"])
    r["frame_rate"] = fr
    
    r["aspect_ratio"] = video_info["display_aspect_ratio"] if "display_aspect_ratio" in video_info else calc_ar(video_info["width"], video_info["height"])
    r["resolution_width"] = video_info["width"]
    r["resolution_height"] = video_info["height"]
    r["video_br_mbs"] = video_info["bit_rate"]
    r["audio_br_kbs"] = audio_info["bit_rate"]
    
    r["audio_channels"]= audio_info["channels"]
    return r

def compare_attrs(video_format, expected_format):
    problematic_fields = []
    #FIXME video_format["container"] is the codec not the container. how do I get the container?
    if(video_format["container"] != expected_format["container"]):
        problematic_fields.append(("container", video_format["container"]))
    if(video_format["video_codec"] != expected_format["video_codec"]):
        problematic_fields.append(("video_codec", video_format["video_codec"]))
    if(video_format["audio_codec"] != expected_format["audio_codec"]):
        problematic_fields.append(("audio_codec", video_format["audio_codec"]))
    if(video_format["frame_rate"] != expected_format["frame_rate"]):
        problematic_fields.append(("frame_rate", video_format["frame_rate"]))
    if(video_format["aspect_ratio"] != expected_format["aspect_ratio"]):
        problematic_fields.append(("aspect_ratio", video_format["aspect_ratio"]))
    if(video_format["resolution_width"] != expected_format["resolution_width"]):
        problematic_fields.append(("resolution_width", video_format["resolution_width"]))
    if(video_format["resolution_height"] != expected_format["resolution_height"]):
        problematic_fields.append(("resolution_height", video_format["resolution_height"]))
    video_br_min_mbs = expected_format["video_br_min_mbs"]
    video_br_max_mbs = expected_format["video_br_max_mbs"]
    video_br_mbs = float(video_format["video_br_mbs"].split()[0])
    if(video_br_mbs < video_br_min_mbs):
        problematic_fields.append(("video_br_min_mbs", video_br_mbs))
    if(video_br_mbs > video_br_max_mbs):
        problematic_fields.append(("video_br_max_mbs", video_br_mbs))
    audio_br_kbs = float(video_format["audio_br_kbs"].split()[0])
    if(audio_br_kbs > expected_format["audio_br_max_kbs"]):
        problematic_fields.append(("audio_br_max_kbs", audio_br_kbs))
    if(video_format["audio_channels"] != expected_format["audio_channels"]):
        problematic_fields.append(("audio_channels", video_format["audio_channels"]))
    return problematic_fields
    
def extract_stream_info(result, codec_type):
    for stream in result["streams"]:
        if(stream["codec_type"] == codec_type):
            return stream
    return None
    
    

In [6]:

def prettify_field_name(field_name):
    FIELD_CONVERSION_MAP = {
        "video_br_min_mbs": "min video bitrate",
        "video_br_max_mbs": "max video bitrate", 
        "audio_br_max_kbs": "max audio bitrate",
    }
    if field_name in FIELD_CONVERSION_MAP:
        return FIELD_CONVERSION_MAP[field_name]
    return field_name

def write_report_lines(lines, misssing_attrs, filename):
    issues = []
    for attr_type, value in misssing_attrs:
        issues.append(f"{prettify_field_name(attr_type)} is {value} but expected value is {EXPECTED_FORMAT[attr_type]}")
    if(issues):
        line = f"filename: {filename} - {', '.join(issues)}"
        lines.append(line)

In [7]:
filenames = [f"{IN_FOLDER}/Cosmos_War_of_the_Planets.mp4",
        f"{IN_FOLDER}/Last_man_on_earth_1964.mov",
        f"{IN_FOLDER}/The_Gun_and_the_Pulpit.avi",
        f"{IN_FOLDER}/The_Hill_Gang_Rides_Again.mp4",
        f"{IN_FOLDER}/Voyage_to_the_Planet_of_Prehistoric_Women.mp4"]

In [8]:
def generate_report(filenames):
    lines = []
    changes_per_file = []
    for file_path in filenames:
        result = probe_file(file_path)
        video_format = parse_probe(result)
        misssing_attrs = compare_attrs(video_format, EXPECTED_FORMAT)
        changes_per_file.append((file_path, misssing_attrs))
        write_report_lines(lines, misssing_attrs, file_path)
    if(not lines):
        lines.append("All video files have expected format")
    return lines, changes_per_file

In [9]:
report_lines, changes_per_file = generate_report(filenames)

ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet input_files/Cosmos_War_of_the_Planets.mp4
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet input_files/Last_man_on_earth_1964.mov
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet input_files/The_Gun_and_the_Pulpit.avi
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet input_files/The_Hill_Gang_Rides_Again.mp4
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet input_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4


In [10]:
report_lines

['filename: input_files/Cosmos_War_of_the_Planets.mp4 - frame_rate is 29.97002997002997 but expected value is 25, aspect_ratio is 314:177 but expected value is 16:9, resolution_width is 628 but expected value is 640, resolution_height is 354 but expected value is 360, max audio bitrate is 317.103 but expected value is 256',
 'filename: input_files/Last_man_on_earth_1964.mov - container is qt   but expected value is mp42, video_codec is prores but expected value is h264, audio_codec is pcm_s16le but expected value is aac, frame_rate is 23.976023976023978 but expected value is 25, max video bitrate is 9.285191 but expected value is 5',
 'filename: input_files/The_Gun_and_the_Pulpit.avi - container is avi but expected value is mp42, video_codec is rawvideo but expected value is h264, audio_codec is pcm_s16le but expected value is aac, aspect_ratio is 180:101 but expected value is 16:9, resolution_width is 720 but expected value is 640, resolution_height is 404 but expected value is 360, m

In [11]:
changes_per_file

[('input_files/Cosmos_War_of_the_Planets.mp4',
  [('frame_rate', 29.97002997002997),
   ('aspect_ratio', '314:177'),
   ('resolution_width', 628),
   ('resolution_height', 354),
   ('audio_br_max_kbs', 317.103)]),
 ('input_files/Last_man_on_earth_1964.mov',
  [('container', 'qt  '),
   ('video_codec', 'prores'),
   ('audio_codec', 'pcm_s16le'),
   ('frame_rate', 23.976023976023978),
   ('video_br_max_mbs', 9.285191)]),
 ('input_files/The_Gun_and_the_Pulpit.avi',
  [('container', 'avi'),
   ('video_codec', 'rawvideo'),
   ('audio_codec', 'pcm_s16le'),
   ('aspect_ratio', '180:101'),
   ('resolution_width', 720),
   ('resolution_height', 404),
   ('video_br_max_mbs', 87.438878)]),
 ('input_files/The_Hill_Gang_Rides_Again.mp4',
  [('video_br_max_mbs', 7.53773)]),
 ('input_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4',
  [('video_codec', 'hevc'),
   ('audio_codec', 'mp3'),
   ('frame_rate', 29.97002997002997),
   ('video_br_max_mbs', 8.038857),
   ('audio_br_max_kbs', 320.0)])]

# Write report to txt file

In [12]:
report_file_path = f"{OUT_FOLDER}/report.txt"

with open(report_file_path, 'w') as f:
     for line in report_lines:
        f.write(f"{line}\n\n\n")

# convert videos to expected format

-pix_fmt yuv420p:
https://trac.ffmpeg.org/wiki/Encode/H.264#Encodingfordumbplayers

In [13]:
def ffmpeg_file(filename, transformations, output_folder):
    changes = []
    current_extension = filename.split(".")[-1]
    output_filename = filename.replace(current_extension, EXPECTED_FORMAT["container_short_name"])
    output_filename = output_filename.split("/")[-1]
    output_filename = f"{output_folder}/{output_filename}"
    for key, value in transformations:
        if(key=="frame_rate"):
            changes.append("-r")
            changes.append(f"{EXPECTED_FORMAT[key]}")
        if(key=="video_codec"):
            changes.append("-c:v")
            changes.append(EXPECTED_FORMAT[key])
        if(key=="audio_codec"):
            changes.append("-c:a")
            changes.append(EXPECTED_FORMAT[key])
        if(key in ["resolution_width", "resolution_height"]):
            expected_size = f"{EXPECTED_FORMAT['resolution_width']}x{EXPECTED_FORMAT['resolution_height']}"
            changes.append("-vf")
            changes.append(f"scale={expected_size},setdar=16/9")
        if(key == "audio_br_max_kbs"):
            changes.append("-b:a")
            changes.append(f"{EXPECTED_FORMAT[key]}k")
        if(key in ["video_br_max_mbs", "video_br_min_mbs"]):
            changes.append("-b:v")
            changes.append(f"{EXPECTED_FORMAT[key]}M")
        if(key == "audio_channels"):
            changes.append("-ac")
            changes.append(f"{EXPECTED_FORMAT[key]}")
    cmd = ['ffmpeg', "-i", filename]
    cmd.extend(changes)
    br_med = EXPECTED_FORMAT["video_br_min_mbs"] + ((EXPECTED_FORMAT["video_br_max_mbs"] - EXPECTED_FORMAT["video_br_min_mbs"])/2)
    cmd.extend(["-y", "-pix_fmt", "yuv420p", "-brand","mp42", "-b:v", f"{br_med}M", output_filename])
    print(" ".join(cmd)) 
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()
    return output_filename

In [14]:
output_filenames = []
for filename, changes in changes_per_file:
    output_filename = ffmpeg_file(filename, changes, OUT_FOLDER)
    output_filenames.append(output_filename)

ffmpeg -i input_files/Cosmos_War_of_the_Planets.mp4 -r 25 -vf scale=640x360,setdar=16/9 -vf scale=640x360,setdar=16/9 -b:a 256k -y -pix_fmt yuv420p -brand mp42 -b:v 3.5M output_files/Cosmos_War_of_the_Planets.mp4
ffmpeg -i input_files/Last_man_on_earth_1964.mov -c:v h264 -c:a aac -r 25 -b:v 5M -y -pix_fmt yuv420p -brand mp42 -b:v 3.5M output_files/Last_man_on_earth_1964.mp4
ffmpeg -i input_files/The_Gun_and_the_Pulpit.avi -c:v h264 -c:a aac -vf scale=640x360,setdar=16/9 -vf scale=640x360,setdar=16/9 -b:v 5M -y -pix_fmt yuv420p -brand mp42 -b:v 3.5M output_files/The_Gun_and_the_Pulpit.mp4
ffmpeg -i input_files/The_Hill_Gang_Rides_Again.mp4 -b:v 5M -y -pix_fmt yuv420p -brand mp42 -b:v 3.5M output_files/The_Hill_Gang_Rides_Again.mp4
ffmpeg -i input_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4 -c:v h264 -c:a aac -r 25 -b:v 5M -b:a 256k -y -pix_fmt yuv420p -brand mp42 -b:v 3.5M output_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4


In [15]:
changes_per_file

[('input_files/Cosmos_War_of_the_Planets.mp4',
  [('frame_rate', 29.97002997002997),
   ('aspect_ratio', '314:177'),
   ('resolution_width', 628),
   ('resolution_height', 354),
   ('audio_br_max_kbs', 317.103)]),
 ('input_files/Last_man_on_earth_1964.mov',
  [('container', 'qt  '),
   ('video_codec', 'prores'),
   ('audio_codec', 'pcm_s16le'),
   ('frame_rate', 23.976023976023978),
   ('video_br_max_mbs', 9.285191)]),
 ('input_files/The_Gun_and_the_Pulpit.avi',
  [('container', 'avi'),
   ('video_codec', 'rawvideo'),
   ('audio_codec', 'pcm_s16le'),
   ('aspect_ratio', '180:101'),
   ('resolution_width', 720),
   ('resolution_height', 404),
   ('video_br_max_mbs', 87.438878)]),
 ('input_files/The_Hill_Gang_Rides_Again.mp4',
  [('video_br_max_mbs', 7.53773)]),
 ('input_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4',
  [('video_codec', 'hevc'),
   ('audio_codec', 'mp3'),
   ('frame_rate', 29.97002997002997),
   ('video_br_max_mbs', 8.038857),
   ('audio_br_max_kbs', 320.0)])]

# Run report again on output files to validate all generated videos have required format

In [16]:
report_lines2, changes_per_file2 = generate_report(output_filenames)
report_lines2

ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet output_files/Cosmos_War_of_the_Planets.mp4
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet output_files/Last_man_on_earth_1964.mp4
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet output_files/The_Gun_and_the_Pulpit.mp4
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet output_files/The_Hill_Gang_Rides_Again.mp4
ffprobe -print_format json -show_streams -show_format -pretty -loglevel quiet output_files/Voyage_to_the_Planet_of_Prehistoric_Women.mp4


['All video files have expected format']