In [12]:
import os
import json
import librosa
import pandas as pd
import numpy as np
import soundfile as sf
from PIL import Image

In [3]:
DATA_PATH = '../data/spcup_2022_training_part1'
IMAGE_PATH = '../data/images'
Labeled_dir = '../data/spcup_2022_training_part1/labels.csv'

In [4]:
label_df = pd.read_csv(Labeled_dir)
label_df.head()

Unnamed: 0,track,algorithm
0,00050dd7458cf08e594c797930696bce.wav,4
1,00070e7c531000d3dddc735d107275a9.wav,2
2,000f0711027a69b7f3886c2dbcb7d41f.wav,3
3,001e28e66dee24408aaf3480dfb95fbe.wav,1
4,001eee950f60613869544b72cd48fe97.wav,2


In [5]:
label_df['wav_path'] = label_df['track'].map(lambda x: DATA_PATH+'/'+x)
label_df.head()

Unnamed: 0,track,algorithm,wav_path
0,00050dd7458cf08e594c797930696bce.wav,4,../data/spcup_2022_training_part1/00050dd7458c...
1,00070e7c531000d3dddc735d107275a9.wav,2,../data/spcup_2022_training_part1/00070e7c5310...
2,000f0711027a69b7f3886c2dbcb7d41f.wav,3,../data/spcup_2022_training_part1/000f0711027a...
3,001e28e66dee24408aaf3480dfb95fbe.wav,1,../data/spcup_2022_training_part1/001e28e66dee...
4,001eee950f60613869544b72cd48fe97.wav,2,../data/spcup_2022_training_part1/001eee950f60...


In [31]:
spect_type = 'mel'
def save_and_record(wav_path):
    y, sr = librosa.load(wav_path)
    if spect_type == 'stft':
        D = librosa.stft(y)  # STFT of y
        spec = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    elif spect_type == 'mel':
        M = librosa.feature.melspectrogram(y=y, sr=sr)
        spec = librosa.power_to_db(M, ref=np.max)
    elif spect_type == 'chroma':
        spec = librosa.feature.chroma_cqt(y=y, sr=sr)

    spec_image = spec*255.0
    # print(spec[..., np.newaxis].shape)
    image_name = wav_path.split('/')[-1].split('.')[0]
    image_path = IMAGE_PATH+'/'+image_name+'.png'

    # scipy.misc.toimage(spec, cmin=0.0, cmax=1.0).save(image_path)
    im = Image.fromarray(spec_image.astype(np.uint8))
    im.save(image_path)
    return image_path


In [32]:
label_df['image_path'] = label_df['wav_path'].map(lambda x: save_and_record(x))
label_df.head()

Unnamed: 0,track,algorithm,wav_path,image_path
0,00050dd7458cf08e594c797930696bce.wav,4,../data/spcup_2022_training_part1/00050dd7458c...,../data/images/00050dd7458cf08e594c797930696bc...
1,00070e7c531000d3dddc735d107275a9.wav,2,../data/spcup_2022_training_part1/00070e7c5310...,../data/images/00070e7c531000d3dddc735d107275a...
2,000f0711027a69b7f3886c2dbcb7d41f.wav,3,../data/spcup_2022_training_part1/000f0711027a...,../data/images/000f0711027a69b7f3886c2dbcb7d41...
3,001e28e66dee24408aaf3480dfb95fbe.wav,1,../data/spcup_2022_training_part1/001e28e66dee...,../data/images/001e28e66dee24408aaf3480dfb95fb...
4,001eee950f60613869544b72cd48fe97.wav,2,../data/spcup_2022_training_part1/001eee950f60...,../data/images/001eee950f60613869544b72cd48fe9...


In [33]:
label_df['track'] = label_df['track'].map(lambda x: x.split('.')[0])
label_df.head()

Unnamed: 0,track,algorithm,wav_path,image_path
0,00050dd7458cf08e594c797930696bce,4,../data/spcup_2022_training_part1/00050dd7458c...,../data/images/00050dd7458cf08e594c797930696bc...
1,00070e7c531000d3dddc735d107275a9,2,../data/spcup_2022_training_part1/00070e7c5310...,../data/images/00070e7c531000d3dddc735d107275a...
2,000f0711027a69b7f3886c2dbcb7d41f,3,../data/spcup_2022_training_part1/000f0711027a...,../data/images/000f0711027a69b7f3886c2dbcb7d41...
3,001e28e66dee24408aaf3480dfb95fbe,1,../data/spcup_2022_training_part1/001e28e66dee...,../data/images/001e28e66dee24408aaf3480dfb95fb...
4,001eee950f60613869544b72cd48fe97,2,../data/spcup_2022_training_part1/001eee950f60...,../data/images/001eee950f60613869544b72cd48fe9...


In [34]:
col_map = {
    'track': 'audio_id',
    'algorithm': 'label',
    'wav_path': 'wav',
    'image_path': 'image'
}
label_df.rename(col_map, axis=1, inplace=True)
label_df.head()

Unnamed: 0,audio_id,label,wav,image
0,00050dd7458cf08e594c797930696bce,4,../data/spcup_2022_training_part1/00050dd7458c...,../data/images/00050dd7458cf08e594c797930696bc...
1,00070e7c531000d3dddc735d107275a9,2,../data/spcup_2022_training_part1/00070e7c5310...,../data/images/00070e7c531000d3dddc735d107275a...
2,000f0711027a69b7f3886c2dbcb7d41f,3,../data/spcup_2022_training_part1/000f0711027a...,../data/images/000f0711027a69b7f3886c2dbcb7d41...
3,001e28e66dee24408aaf3480dfb95fbe,1,../data/spcup_2022_training_part1/001e28e66dee...,../data/images/001e28e66dee24408aaf3480dfb95fb...
4,001eee950f60613869544b72cd48fe97,2,../data/spcup_2022_training_part1/001eee950f60...,../data/images/001eee950f60613869544b72cd48fe9...


In [36]:
row_dict = label_df.to_dict('records')

In [37]:
data_dict = {'data': row_dict}
with open("../data/train_data.json", "w") as outfile:
    json.dump(data_dict, outfile)