In [17]:
import os
import math
import urllib.request

import pandas
from pydub import AudioSegment

df_announcements = pandas.read_csv("announcements.csv")

In [18]:
def _combine_clips(clips):
    final_clip = clips[0]
    for clip in clips[1:]:
        final_clip += clip
    return final_clip

def _combine_clips_by_id(clip_ids):
    clips = [AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id(clip_id)) for clip_id in clip_ids]
    return _combine_clips(clips)

def _fetch_clip_id_for_clip_transcription(transcription):
    is_destination = df_announcements["Category"] == "Destination"
    df_destinations = df_announcements[is_destination]

    matches = df_destinations[df_destinations["Transcription"] == transcription]

    if len(matches) >= 1:
        file_id = str(matches["File"].iloc[0])
        return file_id.rjust(4, "0")
    else:
        raise Exception(f"Found {len(matches)} files for {transcription}")

def _fetch_mp3_path_from_clip_id(clip_id):
    url = f"https://github.com/matteason/scotrail-announcements-june-2022/raw/main/announcements/{clip_id}.mp3"
    mp3_path = os.path.join("tmp", f"{clip_id}.mp3")

    if os.path.isfile(mp3_path) is False:
        with urllib.request.urlopen(url) as response, open(mp3_path, "wb") as out_file:
            out_file.write(response.read())
    
    return mp3_path

def _create_custom_conjoiners():
    # because_of
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0884"))[1250:1750]
    mp3.export(os.path.join("tmp", "because_of.mp3"), format="mp3")

    # to
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0094"))[1300:]
    mp3.export(os.path.join("tmp", "to.mp3"), format="mp3")

    # and
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0488"))[:200]
    mp3.export(os.path.join("tmp", "and.mp3"), format="mp3")

    # drinks and light refreshments
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0084"))[200:]
    mp3.export(os.path.join("tmp", "drinks_and_light_refreshments.mp3"), format="mp3")

    # this
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0128"))[:325]
    mp3.export(os.path.join("tmp", "this.mp3"), format="mp3")

    # is
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0220"))[:125]
    mp3.export(os.path.join("tmp", "is.mp3"), format="mp3")

    # service
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0034"))[800:1300]
    mp3.export(os.path.join("tmp", "service.mp3"), format="mp3")

    # from
    mp3 = AudioSegment.from_mp3(_fetch_mp3_path_from_clip_id("0034"))[1300:]
    mp3.export(os.path.join("tmp", "from.mp3"), format="mp3")

In [19]:
# 
# The train's journey
# 
destination_names = ["St Pancras International", "Bedford", "Banbury", "Coventry", "Birmingham New Street", "Birmingham", "Stafford", "Derby", "Bescar Lane", "Chesterfield", "Chinley", "Bamford", "Auchinleck", "Ashburys", "Manchester Oxford Road", "Ashton-under-Lyne", "Barnsley", "Berry Brow", "Brighouse", "Church Fenton", "Darlington", "Durham", "Chester-le-Street", "Hexham", "Brampton (Cumbria)", "Carlisle", "Gretna Green", "Annan", "Dumfries", "Kirkconnel", "Auchinleck", "Kilmarnock", "Ardrossan Town", "Kilmaurs", "Dunlop", "Barrhead", "Kennishead", "Crossmyloof", "Newton", "Glasgow Queen Street", "Glasgow Central", "Exhibition Centre", "Gilshochill", "Kelvindale", "Loch Awe", "Rannoch", "Stromeferry", "Conon Bridge", "Beauly", "Georgemas", "Inverness", "Kingussie", "Aberdeen", "Carnoustie", "Arbroath", "Barry Links", "Dundee", "Cupar", "Kirkcaldy", "Dunfermline Queen Margaret", "Alloa", "Stirling", "Falkirk Grahamston", "Falkirk High", "Edinburgh International Airport", "Edinburgh"]
destination__clip_ids = [_fetch_clip_id_for_clip_transcription(destination_name) for destination_name in destination_names]

# 
# Intro announcements
# 
intro_announcements = [
    {
        "0047": "Please ignore the following announcements"
    },
    {
        "this": "this",
        "0032": "ScotRail",
        "service": "service",
        "is": "is",
        "from": "from",
        "0225": "St Pancras International",
        "to": "to",
        "0718": "Edinburgh",
        "0318": "via",
        "drinks_and_light_refreshments": "drinks and light refreshments",
        "and": "and",
        "0726": "Glasgow Queen Street"
    },
    {
        "0085": "We are sorry to announce that the",
        "0540": "Megatrain accomodation can be found at the front of the train",
        "0318": "via",
        "0883": "A shortage of train managers"
    }
]

# 
# Silly announcements
# 
silly_announcements = [
    {
        "0094": "The next service is to",
        "0808": "A fire near the railway suspected to involve gas cylinders yesterday"
    },
    {
        "0085": "We are sorry to announce that the",
        "0981": "Cattle on the railway",
        "0351": "is available on this train"
    },
    {
        "0027": "The next train at platform 8 is the",
        "0034": "ScotRail service from",
        "0999": "Expected industrial action"
    },
    {
        "0558": "You are advised that cycles are not allowed on this train",
        "because_of": "because_of",
        "0946": "A wartime bomb near the railway"
    },
    {
        "0094": "The next service is to",
        "0654": "...adverse weather conditions..."
    },
    {
        "0922": "a supermarket trolley on the track",
        "0157": "Is at present being tested by our engineers"
    },
    {
        "0085": "We are sorry to announce that the",
        "0032": "ScotRail",
        "0340": "has been cancelled",
        "because_of": "because_of",
        "0970": "Attempted theft of overhead line electrification equipment earlier today"
    },
    {
        "0155": "Attention please",
        "0564": "First class accommodation is available",
        "0318": "via",
        "0739": "...a bicycle on the track earlier on this train's journey."
    },
    {
        "0967": "Animals on the railway",
        "0227": "Should change here"
    }
]

In [20]:
# Create a set of custom conjoinign words to augment the default ScotRail set
_create_custom_conjoiners()

background_music_volume_decrease_db = 3
announcement_volume_increase_db = 5
background_music = AudioSegment.from_mp3("lofi hip hop radio - beats to relax_study to - short.mp3") - background_music_volume_decrease_db

# Build the intro announcemnts clip for the train's journey
intro_announcements_clip = AudioSegment.silent(duration=5000)
for i, clips in enumerate(intro_announcements):
    clip = _combine_clips_by_id(clips.keys())
    intro_announcements_clip += clip + announcement_volume_increase_db
    intro_announcements_clip += AudioSegment.silent(duration=4000)
intro_announcements_clip += AudioSegment.silent(duration=4000)

# Intersperse silly announcements amongst the station destination announcements
destination_clip_ids = [_fetch_clip_id_for_clip_transcription(destination_name) for destination_name in destination_names]
destination_clips = [_combine_clips_by_id([clip_id]) for clip_id in destination__clip_ids]

silly_announcement_spacing = math.floor(len(destination_clip_ids) / len(silly_announcements))
for i, clips in enumerate(silly_announcements):
    idx = silly_announcement_spacing * (i + 1)
    destination_clips.insert(idx, _combine_clips_by_id(clips.keys()))

# Build a clip of all of the announcements spanning the duration of the background music
announcements_overlay = AudioSegment.silent(duration=0)
announcements_gap_ms = (len(background_music) - len(intro_announcements_clip)) / len(destination_clips)

for i, clip in enumerate(destination_clips):
    announcements_overlay += clip + announcement_volume_increase_db
    if i < len(destination_clips) - 1:
        announcements_overlay += AudioSegment.silent(duration=announcements_gap_ms)

final_clip = background_music.overlay(intro_announcements_clip + announcements_overlay)
final_clip.export("scotrail_birthday_2022.mp3", format="mp3", tags={"title": "ScotRail Birthday 2022", "artist": "ScotRail, Alison McKay, and Lofi Girl", "album": "Helen ❤️"})

<_io.BufferedRandom name='scotrail_birthday_2022.mp3'>