# VidFormatGuardian Prototype

This program automatically checks and converts video files in a specified format to ensure they meet certain criteria. It verifies each file's format (video and audio format, audio channels, frame rate, etc.) making a report showing the validation of the file, if this file is invalid, the file is recoded and transformed into the valid format. 

Check if ffmpeg is installed, if not install it using [Homebrew](https://brew.sh/) on macOS, apt-get on Linux or follow the steps in the pdf document for windows

In [11]:
exist = !which ffmpeg
if not exist:
# Verify the operating system type.
    import platform
    if platform.system() == 'Darwin':
# Install ffmpeg for macOS.
        !brew install ffmpeg
    elif platform.system() == 'Linux':
# Install ffmpeg for Linux-based systems.
        !apt-get install ffmpeg
    else:
        print('If you have Windows, Check the pdf for the steps to follow.')

Import libraries:

In [12]:
import json
import subprocess
from pathlib import Path
from math import gcd
import glob


## Helper functions

In [13]:
def aspect_ratio_calculation(video_width:int, video_height:int) -> str:
    """Calculate the video aspect ratio from video_width and video_height."""
    
# Determine the largest common factor between video_width and video_height.
    common_factor = gcd(video_width, video_height)
    
# Simplify the aspect ratio to its most basic form.
    x = video_width // common_factor
    y = video_height // common_factor
    
# Generate a textual representation of the aspect ratio.
    return f"{x}:{y}"

In [14]:
def bitrate_in_kbps(bit_rate:int) -> str:
    """Convert a bit rate in bits per second to kilobits per second."""
    
# Convert the bitrate to kilobits per second for detailed analysis.
    kilobits_per_second = bit_rate / 1000
    
# Output the kilobits per second value in text format.
    return f"{kilobits_per_second} kb/s"

In [15]:
def bitrate_in_mbps(bit_rate:int) -> str:
    """Convert a bit rate in bits per second to megabits per second."""
    
# Transform the bitrate to megabits per second for easier understanding.
    megabits_per_second = bit_rate / 1000000
    
# Present the megabits per second value as a string.
    return f"{megabits_per_second} Mb/s"

In [16]:
def ffprobe(file_location):
    """Get video metadata using ffprobe."""
    
# Execute ffprobe on the given file directory_path to gather media information.
    command = ['ffprobe',
               '-show_format',
               '-show_streams',
               '-of', 'json',
               file_location]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = process.communicate()

# Provide the obtained ffprobe output.
    return json.loads(out.decode('utf-8'))

## Process files

In [17]:
def validate_file(file_location:str) -> bool:
    """Check if the file is compliant with the requirements."""
    valid = True

    probe_data = ffprobe(file_location)
    print("file: " + file_location)

# Gather video stream metadata from the file.
    video_stream_details = next((stream for stream in probe_data['streams'] if stream['codec_type'] == 'video'), None)

# Extract audio stream details from the media file.
    audio_stream_details = next((stream for stream in probe_data['streams'] if stream['codec_type'] == 'audio'), None)

# Validate if the media media_container format is MP4.
    if not probe_data['format']['format_name'] == "mov,mp4,m4a,3gp,3g2,mj2":
        print("Wrong media_container format: " + probe_data['format']['format_name'])
        valid = False
    
# Confirm the video is encoded with the h264 codec.
    if not video_stream_details['codec_name'] == "h264":
        print("Wrong video codec: " + probe_data['streams'][0]['codec_name'])
        valid = False
    
# Ensure the audio stream uses the AAC codec.
    if not audio_stream_details['codec_name'] == "aac":
        print("Wrong audio codec: " + probe_data['streams'][1]['codec_name'])
        valid = False

# Verify the media is encoded at a frame rate of 25 FPS.
    if not video_stream_details['r_frame_rate'] == "25/1":
        print("Wrong frame rate: " + video_stream_details['r_frame_rate'])
        valid = False
    
# Check the video maintains a 16:9 aspect ratio.
    aspect_ratio = video_stream_details.get('aspect_ratio_display')
    if not aspect_ratio:
        aspect_ratio = aspect_ratio_calculation(int(video_stream_details['width']), int(video_stream_details['height']))

    if not aspect_ratio == "16:9":
        print("Wrong aspect ratio: " + aspect_ratio)
    
# Ascertain the video video_resolution is 640x360 pixels.
    if not int(video_stream_details['width']) == 640 or not int(video_stream_details['height']) == 360:
        print(f"Wrong video_resolution: {video_stream_details['width']}x{video_stream_details['height']}")
        valid = False

# Ensure the video bitrate falls within the 2 to 5 Mb/s range.
    video_bitrate = int(video_stream_details['bit_rate'])
    if not 2000000 <= video_bitrate <= 5000000:
        print("Wrong video bitrate: " + bitrate_in_mbps(video_bitrate))
        valid = False

# Confirm the audio bitrate does not exceed 256 kb/s.
    audio_bitrate = int(audio_stream_details['bit_rate'])
    if not audio_bitrate <= 256000:
        print("Wrong audio bitrate: " + bitrate_in_kbps(audio_bitrate))
        valid = False

# Validate the audio track is in stereo format.
    if not audio_stream_details['channels'] == 2:
        print("Audio channel is not stereo, a number of channels: " + str(audio_stream_details['channels']))
        valid = False
    
    if valid:
        print("File is VALID.\n")
    else:
        print("File is INVALID.\n")
    
    return valid

In [18]:
# Compile a list of file_list failing to meet specified criteria.
wrong_format_files = []

# Loop through file_list in the specified folder_path for analysis.
for file_location in glob.glob("Films/*"):
# Evaluate the file's compliance with predefined standards.
    if not validate_file(file_location):
        wrong_format_files.append(file_location)



file: Films\Cosmos_War_of_the_Planets.mp4
Wrong frame rate: 30000/1001
Wrong aspect ratio: 314:177
Wrong video_resolution: 628x354
Wrong audio bitrate: 317.103 kb/s
File is INVALID.

file: Films\Last_man_on_earth_1964.mov
Wrong frame rate: 24000/1001
Wrong video bitrate: 9.156232 Mb/s
File is INVALID.

file: Films\The_Gun_and_the_Pulpit.avi
Wrong media_container format: avi
Wrong audio codec: mp3
Wrong aspect ratio: 180:101
Wrong video_resolution: 720x404
Wrong video bitrate: 9.275521 Mb/s
File is INVALID.

file: Films\The_Hill_Gang_Rides_Again.mp4
Wrong video bitrate: 7.53773 Mb/s
File is INVALID.

file: Films\Voyage_to_the_Planet_of_Prehistoric_Women.mp4
Wrong video codec: hevc
Wrong audio codec: mp3
Wrong frame rate: 30000/1001
Wrong video bitrate: 8.038857 Mb/s
Wrong audio bitrate: 320.0 kb/s
File is INVALID.



ALL THE DECODED FILES ARE GOING TO THE FOLDER valid_format_videos

In [19]:
for file_location in wrong_format_files:
    print("Reencoding file: " + file_location)

# Create the specified folder_path if it does not already exist.
    Path("valid_format_videos").mkdir(parents=True, exist_ok=True)

# Execute the ffmpeg command for media processing.
    command = [
        'ffmpeg',
        '-i', file_location, # input file

# Apply a complex filter for video scaling to 640x360 and setting frame rate to 25.
        '-filter_complex', '[0:v]scale=height=360:width=640[s0];[s0]fps=fps=25:round=up[s1]',

# Specify audio processing options.
        '-map', '0:a', # use the audio stream
        '-acodec', 'aac', # set audio codec
        '-ab', '256k', # set audio bitrate
        '-ac', '2', # set audio channel (stereo)

# Define video processing parameters.
        '-map', '[s1]', # use the scaled video stream
        '-aspect', '16:9', # set video aspect ratio
 
# Set the video codec to be used in encoding.
        '-vcodec', 'h264', 

# Establish a video bitrate range of 2 to 5 Mb/s.
        '-maxrate', '5M', # set max video bitrate
        '-minrate', '2M', # set min video bitrate
        '-bufsize', '5M', # set buffer size, because of maxrate doesn't work without this
        '-x264-params', '"nal-hrd=cbr"', # set constant bitrate mode, because of minrate don't work without this
        '-b:v', '3M', # set average video bitrate

        '-y', # overwrite output file if it exists
        "valid_format_videos/" + Path(file_location).stem + "_formatOK.mp4" # output file
    ]

    returncode = subprocess.run(command).returncode
    assert returncode == 0

print("\n")

Reencoding file: Films\Cosmos_War_of_the_Planets.mp4
Reencoding file: Films\Last_man_on_earth_1964.mov
Reencoding file: Films\The_Gun_and_the_Pulpit.avi
Reencoding file: Films\The_Hill_Gang_Rides_Again.mp4
Reencoding file: Films\Voyage_to_the_Planet_of_Prehistoric_Women.mp4




Check reencoded files:

In [20]:
for file_location in glob.glob("valid_format_videos/*"):
    validate_file(file_location)

file: valid_format_videos\Cosmos_War_of_the_Planets_formatOK.mp4
File is VALID.

file: valid_format_videos\Last_man_on_earth_1964_formatOK.mp4
File is VALID.

file: valid_format_videos\The_Gun_and_the_Pulpit_formatOK.mp4
File is VALID.

file: valid_format_videos\The_Hill_Gang_Rides_Again_formatOK.mp4
File is VALID.

file: valid_format_videos\Voyage_to_the_Planet_of_Prehistoric_Women_formatOK.mp4
File is VALID.

