# Align Songs
The aim of this file is to align the first beat of the YouTube downloaded audio  
with the first beat in the annotations. If the start time of the YouTube download  
is earlier than the annotated version, I add a bit of silence to line the songs up.  
If the start time is a little bit later, we remove some of the audio before.  
If the total duration of the YouTube version, I truncate the song, or otherwise  
I add silence to the end of the song so that the total song duration of the annotated  
song lines up with the song we downloaded

## Import required libraries

In [1]:
from __future__ import print_function, division
import os
import pandas as pd
import time
from pydub import AudioSegment
from tqdm import tqdm

## Global constants

In [None]:
SALAMI_YT_MATCHES = "../datasets/salami/matches.csv"
HARMONIX_YT_MATCHES = "../datasets/harmonix/youtube_metadata.csv"
DOWNLOADS_DIR = "../downloads/"
OUTPUT_DIR = "../transformed/"

## Read SALAMI csv metadata

In [3]:
salami = pd.read_csv(SALAMI_YT_MATCHES, header=0)
salami.head()

Unnamed: 0,salami_id,salami_length,youtube_id,youtube_length,coverage,coverage_percent,onset_in_youtube,onset_in_salami
0,2,264.8,DKzFw_i6Ffg,265.0,261.4,0.98716,0.6,0.5
1,3,276.662812,E6iGez4BLjQ,277.0,270.3,0.977002,0.3,0.5
2,4,236.094667,ct8gCRPST9s,235.0,232.5,0.984774,0.4,0.5
3,5,256.940417,IBdrzoz9o0o,258.0,248.1,0.965594,6.0,6.0
4,6,170.466667,QurVLnYoFqo,174.0,163.5,0.959132,4.8,0.2


## Read Harmonix csv metadata

In [4]:
harmonix = pd.read_csv(HARMONIX_YT_MATCHES, header=0)
harmonix.head()

Unnamed: 0,File,yt_onset,yt_dur,dur,onset
0,0001_12step,1.718277,204.428481,142.47,0.0
1,0003_6foot7foot,1.230658,264.103764,157.347,2.857108
2,0004_abc,0.1161,210.697868,180.955,2.666656
3,0005_again,3.808073,193.747302,192.067,4.61538
4,0009_americanmusic,0.06966,222.911565,223.807,3.5


## Pad audio helper function

In [2]:
def pad_audio(youtube, actual, input_file, output_file):
    """
    This function is used to line up the audio of the songs
    downloaded from YouTube with the metadata we have for 
    the respective songs from the annotators.
    
    :param youtube - the youtube download metadata
    :param actual - the annotated song metadata
    :param input_file - the file name of the input song to be read from
    :param ouput_file - the file name for the song to be written to
    """
    start = youtube["onset"] - actual["onset"]
    end = start - actual["length"]
    out = input_file
    if (end > youtube["length"]):
        duration = (end - youtube["length"])*1000
        pad = AudioSegment.silent(duration=duration)
        out = out + pad
    if(start < 0):
        duration = start*-1000
        pad = AudioSegment.silent(duration=duration)
        out = pad + out
        start = 0
    trim = (start + actual["length"]) * 1000
    out = out[start:trim]
    out.export(output_file, format="mp3")

## Helper function to extract SALAMI metadata

In [4]:
def extract_values(item):
    """
    This function extracts the relevant metadata
    from the YouTube download and the original song
    metadata for the SALAMI dataset

    :param item - the dictionary from which the information is extracted
    :return: youtube, actual - two dictionaries containing relevant metadata
    """
    youtube = {
        "onset": item["onset_in_youtube"],
        "length": item["youtube_length"],
    }
    actual = {
        "onset": item["onset_in_salami"],
        "length": item["salami_length"]
    }
    return youtube, actual  

In [10]:
for _,item in tqdm(salami.iterrows(), total=len(salami)):
    """Iterate through the SALAMI data set"""
    filename = str(item.salami_id)
    input_path = DOWNLOADS_DIR + "salami/" + filename + ".m4a"
    output_path = OUTPUT_DIR + "salami/" + filename + ".mp3"
    ## check if we have a copy of the song
    if (os.path.exists(input_path)):
        ## read the input file
        input_file = AudioSegment.from_file(input_path, "m4a")
        ## extract the metadata
        youtube, actual = extract_values(item)
        ## pad the audio for the given file
        pad_audio(youtube, actual, input_file, output_path)

100%|██████████| 5/5 [00:12<00:00,  2.55s/it]


## Helper function to extract Harmonix metadata

In [6]:
def extract_harmonix(item):
    """
    This function extracts the relevant metadata
    from the YouTube download and the original song
    metadata for the Harmonix dataset

    :param item - the dictionary from which the information is extracted
    :return: youtube, actual - two dictionaries containing relevant metadata
    """
    youtube = {
        "onset": item["onset"],
        "length": item["dur"]
    }
    actual = {
        "onset": item["yt_onset"],
        "length": item["yt_dur"]
    }
    return youtube, actual

In [7]:
for _,item in tqdm(harmonix.iterrows(), total=len(harmonix)):
    """Iterate through the Harmonix data set"""
    filename = item["File"]
    input_path = DOWNLOADS_DIR + "harmonix/" + filename + ".m4a"
    output_path = OUTPUT_DIR + "harmonix/" + filename + ".mp3"
    ## check if we have a copy of the song
    if (os.path.exists(input_path)):
        ## read the input file
        input_file = AudioSegment.from_file(input_path, "m4a")
        ## extract the metadata
        youtube, actual = extract_harmonix(item)
        ## pad the audio for the given file
        pad_audio(youtube, actual, input_file, output_path)

100%|██████████| 1/1 [00:03<00:00,  3.94s/it]
