In [None]:
import re
import subprocess
import datetime
from mutagen.mp4 import MP4


def parse_srt_file(srt_path): # string
    '''Parse text file in .srt format and return list of lists
    containing track number, start/stop times and caption text.'''
    
    # open file
    with open(srt_path, mode='r', encoding='utf-8-sig') as srt:
    # create list for storing strings
        counter = 0
        subtitle_times = []
        arrow = "-->"
        pattern1 = "(.*) -->"
        pattern2 = "--> (.*)"
    # create list for entries
        line_store = ""
        subtitle_string = ""
        subtitle_entry = []
    # read line
    # if line has -->, append caption number and times to entry list
        for line in srt:
            if arrow in line:
                subtitle_entry.append(line_store.strip())
                subtitle_entry.append(re.findall(pattern1, line)[0])
                subtitle_entry.append(re.findall(pattern2, line)[0])
                counter = 1
    # if line does not have --> and counter is 0: 
    # store caption number, go to next line
            elif counter == 0:
                line_store = line
    # if line does not have --> and counter is 1:
    # store subtitle text, go to next line
            elif counter == 1:
                if line != '\n':
                    subtitle_string = subtitle_string + " " + line.strip()
    # if no --> and line is \n, then end of caption entry:
    # append subtitle text to entry list,
    # append entry to list of entries and reset storage
                else:
                    subtitle_entry.append(subtitle_string)
                    subtitle_string = ""
                    counter = 0
                    subtitle_times.append(subtitle_entry)
                    subtitle_entry = []
    return subtitle_times



def parse_filepath(audio_path):   # filepath
    '''Takes filepath of source audio and returns list
    with parent directory and name of directory to be created'''
    
    pattern3 = "(.*)/.*"
    parentdir = re.findall(pattern3, audio_path)[0]
    pattern4 = ".*/(.*).m4a"
    dirname = re.findall(pattern4, audio_path)[0]

    return [parentdir, dirname]




def create_dir(filepath_list):   # list of partial filepaths
    '''Takes the filepath of the source audio and creates
    a directory for output clips in the same directory.'''

    output = subprocess.run(f"mkdir {filepath_list[0]}/{filepath_list[1]}", shell=True, capture_output=True)

    return output.args.split()[1]





def iterate_list(subtitle_times, output_dir, filepath_list, series, adjustment):  
    '''Iterate through a list of lists of parsed .srt data
    composed of track number, start/stop times and text.'''

    for caption in subtitle_times:
        # Creates new filename for clip.
        no_special_strip = caption[3].strip().replace(".", "")
        new_file = re.sub(r" |'|\.|\?|\!", "_", no_special_strip)
        new_filepath = f"{output_dir}/{caption[0]}_{new_file}.mp4"

        # Formats srt times for use with ffmpeg.
        datetime1 = datetime.datetime.strptime(caption[1], '%H:%M:%S,%f')
        datetime2 = datetime.datetime.strptime(caption[2], '%H:%M:%S,%f')
        td_adjust = datetime.timedelta(seconds=float(adjustment))
        start = (datetime1 + td_adjust).time()
        duration = datetime2 - datetime1 - td_adjust


        # Create clip using ffmpeg.
        subprocess.run(f"ffmpeg -ss {str(start)} -i {filename} -t {str(duration)} -c:v copy -c:a copy {new_filepath}", shell=True, capture_output=True)

        # Apply mp4 labels for use in iTunes.
        tags = MP4(new_filepath).tags
        tags['\xa9nam'] = caption[3]
        tags['\xa9ART'] = series
        tags['\xa9alb'] = filepath_list[1]
        tags.save(new_filepath)

    print("Done!")


In [None]:
srt_path = "/Path/To/SRT/File.srt"

audio_path = "/Path/To/Audio/For/Episode.m4a"

series = "Series Name"

# Adjustment, in seconds, forwards or backwards, to get the
# audio lined up with the subtitle times.
adjustment = "-.5"

In [None]:
subtitle_times = parse_srt_file(srt_path)
filepath_list = parse_filepath(audio_path)
output_dir = create_dir(filepath_list)
iterate_list(subtitle_times, output_dir, filepath_list, series, adjustment)