In [None]:
import os
import subprocess
import csv
import re
import ffmpeg
import json
import shutil
from time import sleep
from showenv import source_directory, destination_directory

In [None]:
def convert_colons_to_seconds(timething):
    (hours, minutes, seconds, frames) = timething.split(":")
    return int(minutes)*60 + int(seconds) + int(frames) * 0.0416666

In [None]:
def get_csv_data(file_path):
    times = {}


    # Read the CSV file
    with open(file_path, newline='') as csvfile:
        csvreader = csv.DictReader(csvfile)
        for row in csvreader:
            #index.append(int(row['Index']))
            start_time = convert_colons_to_seconds(row['Start Time'])
            end_time = convert_colons_to_seconds(row['End Time'])
            midpoint = (start_time+end_time)/2

            times[midpoint] = {
                "start" : start_time,
                "end" : end_time,
            }

    return times


In [None]:
def get_video_duration_in_seconds(file_path):
    try:
        # Run ffprobe to get video information
        result = subprocess.run(
            [
                "ffprobe",
                "-v", "error",
                "-show_entries", "format=duration",
                "-of", "json",
                file_path
            ],
            stdout=subprocess.PIPE,  # Keeps stdout as is, since we need it for `json.loads`
            stderr=subprocess.DEVNULL,  # Suppresses stderr output
            text=True
        )

        # Parse the result
        info = json.loads(result.stdout)
        duration = float(info['format']['duration'])
        
        return duration
    except Exception as e:
        return str(e)


In [None]:
def extract_one_frame(mkv_filepath, time_in_seconds, output_folderpath, output_filename):
    # Ensure the output folder exists
    os.makedirs(output_folderpath, exist_ok=True)
    
    # Create the full output path
    output_filepath = os.path.join(output_folderpath, output_filename)
    
    # Use ffmpeg to extract the frame
    try:
        (
            ffmpeg
            .input(mkv_filepath, ss=time_in_seconds)
            .output(output_filepath, vframes=1)
            .run(overwrite_output=True, quiet=True)  # The 'quiet=True' suppresses stdout and stderr
        )
        print(f".", end="")
    except ffmpeg.Error as e:
        print(f"An error occurred: {e}")


In [None]:
def add_one_key_frame(times):
    times_list = list(times.keys())

    largest_gap = 0
    timestamp_to_add = 0

    for i, key in enumerate(times_list):
        if i + 1 == len(times_list): break 
        gap = times[times_list[i + 1]]["start"] - times[times_list[i]]["end"]
        if gap > largest_gap:
            largest_gap = gap
            timestamp_to_add = (
                times[times_list[i + 1]]["start"] + times[times_list[i]]["end"]
            ) / 2

    # we're going to return a single second
    return timestamp_to_add

In [None]:
def refine_endpoints(times):
    recurse = False

    first = "new" in times[list(times.keys())[0]]
    second = "new" in times[list(times.keys())[1]]
    
    penultimate = "new" in times[list(times.keys())[-2]]
    last = "new" in times[list(times.keys())[-1]]


    if first and second:
        recurse = True
        del times[list(times.keys())[0]]
    if penultimate and last:
        recurse = True
        del times[list(times.keys())[-1]]

    if recurse:
        # print("recursing")
        return refine_endpoints(times)
    else:
        return (list(times.keys())[0], list(times.keys())[-1])

    
        



In [None]:
def calculate_key_frames(original_mkv_filepath, csv_filepath, first_pass=True, last_frame_time=0, first_frame_time=0):
    # we need that array from the csv, converted to seconds. we'll need to calculate midpoints too
    times = get_csv_data(csv_filepath)

    if first_pass == True:
        # we'll also need to find the end second of the original mkv
        last_frame_time = get_video_duration_in_seconds(original_mkv_filepath) - 5
        # we'll also need the starting point of the frame (5.8)
        first_frame_time = 5.5

    # we need a target frame count
    '''
    if we want a ratio of 1000 frames every 30 minutes,
    that's 1000 frames every 1800 seconds
    so that's a ratio of 0.555
    '''
    times[last_frame_time] = {
        "start" : last_frame_time,
        "end" : last_frame_time,
        "new" : True,
    }
    times[first_frame_time] = {
        "start" : first_frame_time,
        "end" : first_frame_time,
        "new" : True,
        
    }
    times = dict(sorted(times.items()))

    while (len(times))/last_frame_time < 0.45:
        new_key_frame = add_one_key_frame(times)
        times[new_key_frame] = {
            "start" : new_key_frame,
            "end" : new_key_frame,
            "new" : True,
        }
        times = dict(sorted(times.items()))


    (a, b) = refine_endpoints(times)

    if first_pass: 
        return calculate_key_frames(original_mkv_filepath, csv_filepath, first_pass=False, last_frame_time=b, first_frame_time=a)

    string_of_stuff = "".join(["!" if "new" in value else "." for key, value in times.items()])
    print("\t" + string_of_stuff)  # Output: "!.!."

    # want to return in the format of a sorted list of floats, rounded to 2 decimal points
    return sorted([round(float(key), 2) for key in times.keys()])
    

In [None]:
def extract_key_frames(mkv_filepath, original_mkv_filepath, output_folderpath, csv_filepath):
    key_frames = calculate_key_frames(original_mkv_filepath, csv_filepath)
    # print(len(key_frames))
    # return
    for i, key_frame_time in enumerate(key_frames):
        print("\t", end="")
        extract_one_frame(mkv_filepath, key_frame_time, output_folderpath, f"{i:06d}.jpg")

    return
    

In [None]:
def process(source_dir, destination_dir):
    # Define the range of seasons (e.g., Season 1 to Season 7)
    seasons = sorted([f"Season {i}" for i in range(1, 8)])

    # Loop through each season folder
    for i, season in enumerate(seasons):
        season_path = os.path.join(source_dir, season)
        print(f"Season {season}...")

        # Check if the season directory exists
        if not os.path.isdir(season_path):
            print(f"Directory {season_path} does not exist, skipping.")
            continue

        # Create corresponding destination directory for the season
        destination_season_path = os.path.join(destination_dir, season)
        os.makedirs(destination_season_path, exist_ok=True)


        # Get sorted list of files in the season folder
        files = sorted(os.listdir(season_path))

        # Process each file in the season folder
        for j, filename in enumerate(files):
            # Look for complete .mkv files
            if filename.endswith('.mkv') and not filename.endswith('.mkv.part'):

                pattern = r"S\d{2}E\d{2} - [^()]+"

                match = re.search(pattern, filename)
                if match:
                    modified_filename = match.group(0)

                video_file = os.path.join(season_path, filename)
                sup_file = os.path.join(destination_season_path, modified_filename.strip(), "subtitles.sup")
                csv_file = os.path.join(destination_season_path, modified_filename.strip(), "subtitles.csv")
                xml_file = os.path.join(destination_season_path, modified_filename.strip(), "subtitles.xml")
                burned_videofile = os.path.join(destination_season_path, modified_filename.strip(), "video.mkv")
                
                frames_path = os.path.join(destination_season_path, modified_filename.strip(), "frames")
                os.makedirs(frames_path, exist_ok=True)

                if any(os.scandir(frames_path)): 
                    continue

                print("\t" + modified_filename.strip())


                extract_key_frames(burned_videofile, video_file, frames_path, csv_file)
                # print(f"done with {modified_filename.strip()}")

In [None]:
process(source_directory, destination_directory)
