# Feature Extraction
In order to train our neural network model  
we need to create the mel spectrograms  
that will be used as input data. We also  
need to reshape the labels so they match  
the shape of the spectrograms.  
Lastly, we pad the lables and spectrograms  
so they all have the same shape

## Import Libraries

In [1]:
import pandas as pd
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from tqdm import tqdm
from modules.mel_spectrograms import extract_mel_specs
from modules.reshape_labels import reshape
from modules.load_labels import load_labels

## Set constants
I initialise some global variables that will be used in this file  

Some of the values used for the spectrograms are as follows:  
Sample rate: 22050 Hz  
STFT window: 2048 samples  
Hop size: 512 samples  
Mel bins: 64  

In [2]:
SALAMI_LABEL_DIR = "../datasets/salami/annotations/" 

## Sample rate
SR = 22050
## Short-time fourier transform window size
N_FFT = 2048
## Hop size or the amount shift
HOP_SIZE = 512
## Number of melbands for the mel-spectrograms
N_MELS = 64
## Window size 
WINDOW_SIZE = 64
## Fixed length for all the spectrograms = duration of longest song
ZERO_PADDING = 32000

## Load Datasets

In [3]:
salami = pickle.load(open("../dumps/salami_fifty.p", "rb"))

In [4]:
# harm = pickle.load(open("../dumps/s_public.p", "rb"))

## Initialise empty lists

In [5]:
features = []
labels = []

## Loop through song items and extract labels

In [6]:
for item in tqdm(salami, total=len(salami)):
    """ iterate through dataframe"""
    mp3 = item["mp3"]
    file_name = str(item["SONG_ID"])
    ## we have two label options for SALAMI database
    file_one = SALAMI_LABEL_DIR + file_name + "/parsed/textfile1_uppercase.txt"
    file_two = SALAMI_LABEL_DIR + file_name + "/parsed/textfile2_uppercase.txt"
    annotations = load_labels(file_one, file_two)
    # check if any labels were found
    if (len(annotations) > 0):
            labels.append(annotations)
            features.append(extract_mel_specs(mp3))

100%|██████████| 50/50 [00:12<00:00,  3.99it/s]


## Initialise empty lists

In [7]:
feature_list = features
label_list = []

## Reshape the labels to matche spectrograms

In [8]:
for f, l in zip(features, labels):
    vector = reshape(l, f.shape[1], SR, HOP_SIZE)
    label_list.append(vector)

## Dump the results to file

In [None]:
pickle.dump(feature_list, open('../dumps/features.p', 'wb'))
pickle.dump(label_list, open('../dumps/labels.p', 'wb'))

## Initialise empty lists

In [9]:
x = []
y = []

## Pad the audio
Pad all of the features and labels so they have the same length

In [10]:
for f, l in zip(feature_list, label_list):
    y1 = np.pad(l, (0, ZERO_PADDING - l.shape[0]), 'constant')
    x1 = []
    for i in f:
        x2 = np.pad(i, (0, ZERO_PADDING - i.shape[0]), 'constant')
        x1.append(x2)
    x.append(x1)
    y.append(y1)

## Dump the results to file

In [11]:
pickle.dump(np.array(x), open('../dumps/x.p', 'wb'))
pickle.dump(np.array(y), open('../dumps/y.p', 'wb'))

In [12]:
np.array(x).shape

(46, 64, 32000)