# Load labels
Since the mel-spectrograms change the shape  
of the original audio signal, it is necessary  
to reshape the labels for the neural network

## Import required libraries

In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from modules.load_labels import load_labels

## Set global constants

In [None]:
HARMONIX_DS = "../datasets/harmonix/metadata.csv"
HARMONIX_LABEL_DIR = "../datasets/harmonix/segments/"
HARMONIX_AUDIO_DIR = "../downloads/harmonix/"
HARMONIX_OUTPUT = "../datasets/labels/harmonix/labels.csv"

SALAMI_DS = "../datasets/salami/metadata.csv"
SALAMI_LABEL_DIR = "../datasets/salami/annotations/"
SALAMI_OPEN_AUDIO_DIR = "../downloads/s_public/"
SALAMI_OUTPUT = "../datasets/labels/salami_open/labels.csv"

## Load the Harmonix Dataset

In [None]:
harmonix = pd.read_csv(HARMONIX_DS, header=0)
harmonix.head()

## Load the SALAMI dataset

In [None]:
salami = pd.read_csv(SALAMI_DS, header=0)
salami.head()

## Load Labels

In [None]:
segments = []
for _,item in tqdm(salami.iterrows(), total=len(salami)):
    """ iterate through dataframe"""
    file_name = str(item["SONG_ID"])
    audio_path = SALAMI_OPEN_AUDIO_DIR + file_name + ".mp3"
    ## check if we have a copy of the song, or skip to next song
    if (not os.path.exists(audio_path)):
        continue
    ## we have two label options for SALAMI database
    file_one = SALAMI_LABEL_DIR + file_name + "/parsed/textfile1_uppercase.txt"
    file_two = SALAMI_LABEL_DIR + file_name + "/parsed/textfile2_uppercase.txt"
    labels = load_labels(file_one, file_two)
    ## check if any labels were found
    if (len(labels) > 0):
            segments.append(labels)

## Output results to CSV

In [None]:
out_df = pd.DataFrame(segments)
out_df.to_csv(SALAMI_OUTPUT + "salami_open", index=None)