In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
import os as os
import tensorflow as tf

from joblib import delayed, Parallel
from tqdm import tqdm

import librosa
import librosa.display
import IPython.display as ipd
import skimage.io

from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Defining variables

class DataPath:
    def __init__(self, data, audio_path, process, processed_train, processed_test):
        self.data = data
        self.audio = audio_path
        self.process = process
        self.protrain = processed_train
        self.protest =  processed_test



DATA_PATH = "/content/drive/MyDrive/Colab_Nb/data/"
AUDIO_PATH = DATA_PATH + "train/"
PROCESSED_TRAIN_DATA = "/content/drive/MyDrive/Colab_Nb/data/processed_train/"
PROCESSED_TEST_DATA = "/content/drive/MyDrive/Colab_Nb/data/processed_test/"

PATH = DataPath(DATA_PATH, AUDIO_PATH, 'data/processed/', PROCESSED_TRAIN_DATA, PROCESSED_TEST_DATA)


In [4]:
# load metadata into pandas dataframes

df_known = pd.read_csv(PATH.data + 'train.csv')
#df_test = pd.read_csv('data/kaggle-pog-series-s01e02/test.csv')
#submission = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/data/kaggle-pog-series-s01e02/sample_submission.csv')
df_known

Unnamed: 0,song_id,filename,filepath,genre_id,genre
0,10150,010150.ogg,train/010150.ogg,7,Instrumental
1,7358,007358.ogg,train/007358.ogg,2,Punk
2,20573,020573.ogg,train/020573.ogg,5,Folk
3,11170,011170.ogg,train/011170.ogg,12,Old-Time / Historic
4,16662,016662.ogg,train/016662.ogg,1,Rock
...,...,...,...,...,...
19917,9337,009337.ogg,train/009337.ogg,0,Electronic
19918,8340,008340.ogg,train/008340.ogg,4,Hip-Hop
19919,16248,016248.ogg,train/016248.ogg,4,Hip-Hop
19920,11875,011875.ogg,train/011875.ogg,2,Punk


In [5]:
#remove rows from dataframe that don't exist as an audio file

df_known = df_known[[os.path.isfile(PATH.data + i) for i in df_known['filepath']]]
#df_test = df_test[[os.path.isfile('data/kaggle-pog-series-s01e02/'+i) for i in df_test['filepath']]]

df_known

Unnamed: 0,song_id,filename,filepath,genre_id,genre
0,10150,010150.ogg,train/010150.ogg,7,Instrumental
1,7358,007358.ogg,train/007358.ogg,2,Punk
2,20573,020573.ogg,train/020573.ogg,5,Folk
3,11170,011170.ogg,train/011170.ogg,12,Old-Time / Historic
4,16662,016662.ogg,train/016662.ogg,1,Rock
...,...,...,...,...,...
19917,9337,009337.ogg,train/009337.ogg,0,Electronic
19918,8340,008340.ogg,train/008340.ogg,4,Hip-Hop
19919,16248,016248.ogg,train/016248.ogg,4,Hip-Hop
19920,11875,011875.ogg,train/011875.ogg,2,Punk


In [6]:
# function to load audio file and resample it
def load_ogg_16k_mono(filename):
    # Load encoded ogg file

    # Decode ogg file
    y, sr = librosa.load(filename)
    #convert all files to mono (if not already)
    y = librosa.to_mono(y)
    # Goes to 16000hz - reduced sample rate within human hearing
    y = librosa.resample(y, orig_sr=sr, target_sr=16000)
    return y

In [7]:
# find max and min lengths of audio files
lengths = []
for file in os.listdir(os.path.join(PATH.data, 'train')):
    ogg_file = load_ogg_16k_mono(os.path.join(PATH.data, 'train', file))
    lengths.append(len(ogg_file))

KeyboardInterrupt: 

In [None]:
print(lengths)

In [None]:
min(lengths)/16000, max(lengths)/16000

In [None]:
# create directories to store processed data
def create_processing_dirs():
    if not os.path.isdir(PATH.protrain):
        os.mkdir(PATH.protrain)
    if not os.path.isdir(PATH.protest):
        os.mkdir(PATH.protest)

In [None]:
def preprocess2(file_name, dest = None):
    ogg = load_ogg_16k_mono(file_name)
    ogg = ogg[:480000]
    zero_padding = np.zeros((480000) - np.shape(ogg)[0], dtype=np.float32)
    ogg = np.concatenate((zero_padding, ogg),0)
    S = librosa.feature.melspectrogram(y=ogg,sr=16000,n_mels=128 * 2,)
    S_db_mel = librosa.amplitude_to_db(S, ref=np.max)
    melnorm = librosa.util.normalize(S_db_mel)
    return melnorm

In [None]:
sample_audio = load_ogg_16k_mono(df_known.filename.values.as_numpy_iterator().next())

In [None]:
S_db_mel = preprocess2(next(sample_audio))
S_db_mel

In [None]:
np.shape(S_db_mel)

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
# Plot the mel spectogram
img = librosa.display.specshow(S_db_mel,
                              x_axis='time',
                              y_axis='log',
                              ax=ax)
ax.set_title('Mel Spectogram Example', fontsize=20)
fig.colorbar(img, ax=ax, format=f'%0.2f dB')
plt.show()

In [None]:
create_processing_dirs()

In [None]:
delayed_preprocess = [delayed(preprocess2)(PATH.audio+path) for path in df_known.filename.values]
df_known.loc[:,'melspec'] = Parallel(n_jobs=-1, verbose=5)(delayed_preprocess)

In [None]:
#directory to store mel spectograms as .npy files
!mkdir /content/drive/MyDrive/Colab_Nb/data/processed_train/mels/

In [None]:
delayed_preprocess = [delayed(preprocess1)(PATH.audio+path) for path in df_known.filename.values]
_ = Parallel(n_jobs=-1, verbose=5)(delayed_preprocess)