# Preparing data for training the models per one feature

In [1]:
import time
import librosa
from notebook_utilities import *

import_data_science(globals())

In [None]:
root = os.path.join("/", "mnt", "e")
artifacts = os.path.join(root, "artifacts")


training_data = os.path.join(root, "dev", "training_data")

# reading signal
raw = pd.read_parquet(os.path.join(artifacts, 'signals_10s_1750tracks.parquet'))
# raw = pd.read_parquet('G:\\artifacts\\signals2s7k.parquet')


# assigning feature
signals = raw['y']
signals.shape


# udf register
one_hot_encoding = np.vectorize(data_transformation.one_hot_function)


labels = raw['genre']

# label assigning
labels = one_hot_encoding(labels)
print(f"labels shape: {labels.shape}")


# from str, removing [] with [1:-1], splitting each float, making a np array from list, casting to float32 

signals = signals.apply(lambda x: np.array(x[1:-1].split(','), dtype=np.float32))
signals = signals.to_numpy()

print(f"signal shape {signals[0].shape}")

labels shape: (1742,)


# Splitting signal in frames with 1/2 frame hop

In [None]:

X_train = []
y_train = []

frames_amount = signals[1].shape[0] // 22050 

[x*11025 for x in range(1, frames_amount * 2 + 1)]

for signal, label in zip(signals, labels):
    
    for i in range(0, (frames_amount * 2)-1):


        if i == 0:
            X_train.append(signal[0:22050])
            y_train.append(label)
        else:

            X_train.append(signal[i*11025:(i+2)*11025])
            y_train.append(label)




In [None]:
for idx, i in enumerate(X_train):
    if i.shape[0] != 22050:
        print(f"idx: {idx} was faulty")


len(y_train), len(X_train)


In [None]:
def normalize_and_make_numpy(x, y):
    x = np.array(x).astype(np.float32)
    y = np.array(y).astype(np.int32)

    return x, y

x, y = normalize_and_make_numpy(X_train,y_train)



x_waveform, y_waveform = x, y
x_waveform.shape, y_waveform.shape, x_waveform[0].shape, y_waveform[0]


with open(f"{training_data}/labels.npy", "wb") as f:
    np.save(f, y_waveform)

with open(f"{training_data}/waveform.npy", "wb") as f:
    np.save(f, x_waveform)

with open(f"{training_data}/metadata", "a") as f:
    f.write(f"x_waveform.shape={x_waveform.shape}\n")
    f.write(f"labels.shape={y_waveform.shape}\n")

# del x_waveform
# del y_waveform


for signal, label in zip(x_waveform[:5], y_waveform[:5]):
    print(f"signal shape: {signal.shape[0]}, label: {label}")

# Fourier Transform

In [None]:
from IPython.display import clear_output


ft = []
ft_y = []
pop = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    try:
        transformed = np.abs(librosa.stft(record, hop_length=256))
        fin = time.time() - start
        
        transformation_time = np.append(transformation_time, fin)
        
    
        if label == 3:
            pop.append(transformed)
            
        ft.append(transformed)
        ft_y.append(label)
        op = time.time() - start
    
        op_time = np.append(op_time, op)
    
        print(f"{(idx/x.shape[0])*100:.2f}%")
        clear_output(wait=True)

    except:
        print("Couldn't transform to fourier transform")
        
ft = np.array(ft).astype(np.float32)
ft_y = np.array(ft_y).astype(np.int32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


values, counts = np.unique(ft_y, return_counts=True)
value_counts = dict(zip(values, counts))
value_counts, ft.shape, ft_y.shape



# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")


with open(f"{training_data}/ft.npy", "wb") as f:
    np.save(f, ft)

with open(f"{training_data}/metadata", "a") as f:
    f.write(f"ft.shape={ft.shape}\n")

# del ft
# del ft_y
value_counts, ft.shape, ft_y.shape


In [None]:
pop_ft = np.array(pop).astype(np.float32)

pop_ft.shape, pop_ft[0].shape

for pop_signal in pop_ft:
    print(f"pop signal shape: {pop_signal}")

# Spectogram

In [None]:
spec_x = []
spec_y = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    try:
        spec = librosa.amplitude_to_db(np.abs(librosa.stft(record, hop_length=256)), ref=np.max)
        fin = time.time() - start
        
        transformation_time = np.append(transformation_time, fin)
        
        spec_x.append(transformed)
        spec_y.append(label)
        op = time.time() - start

        op_time = np.append(op_time, op)

        print(f"{(idx/x.shape[0])*100:.2f}%")

    except:
        print("Couldn't extract spectogram")
        
spec_x = np.array(spec_x).astype(np.float32)
spec_y = np.array(spec_y).astype(np.int32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


with open(f"{training_data}/spectogram.npy", "wb") as f:
    np.save(f, spec_x)

with open(f"{training_data}/metadata", "a") as f:
    f.write(f"spectogram.shape={spec_x.shape}\n")

del spec_x
del spec_y

# Mel Spectogram

In [None]:
mel_spec_x = []
mel_spec_y = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    try:
        
        mel_spect = librosa.feature.melspectrogram(y=record, sr=22050, n_fft=8192, hop_length=256, n_mels=1025)
        mel_spect = librosa.power_to_db(mel_spect, ref=np.max)

        
        fin = time.time() - start
        
        transformation_time = np.append(transformation_time, fin)
        
        mel_spec_x.append(mel_spect)
        mel_spec_y.append(label)
        op = time.time() - start

        op_time = np.append(op_time, op)

        print(f"{(idx/x.shape[0])*100:.2f}%")

    except:
        print("Couldn't extract mel spectrogram")
        
mel_spec_x = np.array(mel_spec_x).astype(np.float32)
mel_spec_y = np.array(mel_spec_y).astype(np.int32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


values, counts = np.unique(mel_spec_y, return_counts=True)
value_counts = dict(zip(values, counts))
value_counts, mel_spec_x.shape, mel_spec_y.shape


# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/mel_spectogram.npy", "wb") as f:
    np.save(f, mel_spec_x)
with open(f"{training_data}/metadata", "a") as f:
    f.write(f"mel_spectogram.shape={mel_spec_x.shape}\n")

del mel_spec_x
del mel_spec_y

# Power Spectogram

In [None]:
power_spec_x = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    try:
        
        ft = librosa.stft(record, hop_length=256)
        power_spec = np.abs(ft) ** 2

        # zamiana na skalę dB (logarytmiczna skala mocy)
        # S_db = 10 * log10(S/ref)
        # S to moc, ref to wartość odniesienia, np.max(S) to największa moc w całym spektogramie
        power_db = librosa.power_to_db(power_spec, ref=np.max)

        fin = time.time() - start
        
        transformation_time = np.append(transformation_time, fin)
        
        power_spec_x.append(power_db)
        op = time.time() - start

        op_time = np.append(op_time, op)

        print(f"{(idx/x.shape[0])*100:.2f}%")

    except:
        print("Couldn't extract power spectrogram")
        
power_spec_x = np.array(power_spec_x).astype(np.float32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")



# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/power_spectogram.npy", "wb") as f:
    np.save(f, power_spec_x)
with open(f"{training_data}/metadata", "a") as f:   
    f.write(f"power_spectogram.shape={power_spec_x.shape}\n")

del power_spec_x

# MFCC (Mel Frequency Cepstral Coefficients)

In [None]:
mfcc = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    try:
            
        mfcc_x = librosa.feature.mfcc(y=record, sr=22050, n_mfcc=12, hop_length=256)

        fin = time.time() - start
        
        transformation_time = np.append(transformation_time, fin)
        
        mfcc.append(mfcc_x)
        op = time.time() - start

        op_time = np.append(op_time, op)

        print(f"{(idx/x.shape[0])*100:.2f}%")

    except:
        print("Couldn't calculate MFCC")
        
mfcc = np.array(mfcc).astype(np.float32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/mfcc.npy", "wb") as f:
    np.save(f, mfcc)
with open(f"{training_data}/metadata", "a") as f:   
    f.write(f"mfcc.shape={mfcc.shape}\n")

# del mfcc
# del mfcc_y

In [None]:
print(f"""
    stft.shape:         {ft.shape}
    spec.shape:         {spec.shape}
    mel_spec.shape:     {mel_spect.shape}
    powerspect.shape:   {power_spec.shape}
    mfcc.shape:         {mfcc.shape}
      """)

# Chroma features (cechy chromatyczne)

In [None]:
chroma = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    try:
            
        chroma_x = librosa.feature.chroma_stft(y=record, sr=22050, n_chroma=12, hop_length=256, n_fft=2048)

        fin = time.time() - start
        
        transformation_time = np.append(transformation_time, fin)
        
        chroma.append(chroma_x)
        op = time.time() - start

        op_time = np.append(op_time, op)

        print(f"{(idx/x.shape[0])*100:.2f}%")

    except:
        print(f"Couldn't calculate Chroma features for signal {idx}")
        
chroma = np.array(chroma).astype(np.float32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/chroma_stft.npy", "wb") as f:
    np.save(f, chroma)
with open(f"{training_data}/metadata", "a") as f:   
    f.write(f"chroma_stft.shape={chroma.shape}\n")

# del mfcc
# del mfcc_y

# Chroma CQT



In [None]:
chroma = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    # try:
            
    chroma_cqt_x = librosa.feature.chroma_cqt(y=record, sr=22050, n_chroma=12, hop_length=512)

    fin = time.time() - start
    
    transformation_time = np.append(transformation_time, fin)
    
    chroma.append(chroma_cqt_x)
    op = time.time() - start

    op_time = np.append(op_time, op)

    print(f"{(idx/x.shape[0])*100:.2f}%")

    # except:
    #     print(f"Couldn't calculate Chroma features for signal {idx}")
        
chroma = np.array(chroma).astype(np.float32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/chroma_cqt.npy", "wb") as f:
    np.save(f, chroma)
with open(f"{training_data}/metadata", "a") as f:   
    f.write(f"chroma_cqt.shape={chroma.shape}\n")

# del mfcc
# del mfcc_y

In [None]:


plt.figure(figsize=(10, 4))
librosa.display.specshow(chroma[0], x_axis='time', y_axis='chroma', cmap='coolwarm')
plt.colorbar()
plt.title('Chroma CENS')
plt.tight_layout()
plt.show()
chroma[0].shape

In [None]:
chroma = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    # try:
            
    chroma_cens_x = librosa.feature.chroma_cens(y=record, sr=22050, n_chroma=12, hop_length=512)

    fin = time.time() - start
    
    transformation_time = np.append(transformation_time, fin)
    
    chroma.append(chroma_cens_x)
    op = time.time() - start

    op_time = np.append(op_time, op)

    print(f"{(idx/x.shape[0])*100:.2f}%")

    # except:
    #     print(f"Couldn't calculate Chroma features for signal {idx}")
        
chroma = np.array(chroma).astype(np.float32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/chroma_cens.npy", "wb") as f:
    np.save(f, chroma)
with open(f"{training_data}/metadata", "a") as f:   
    f.write(f"chroma_cens.shape={chroma.shape}\n")

# del mfcc
# del mfcc_y

In [None]:


plt.figure(figsize=(10, 4))
librosa.display.specshow(chroma[0], x_axis='time', y_axis='chroma', cmap='coolwarm')
plt.colorbar()
plt.title('Chroma CENS')
plt.tight_layout()
plt.show()
chroma[0].shape

# Tonnetz

In [None]:
tonnetz_arr = []

transformation_time = np.array([])
op_time = np.array([])


for idx, (record, label) in enumerate(zip(x,y)):
    start = time.time()
    # try:
                
    chroma = librosa.feature.chroma_cqt(y=record, sr=22050)
    tonnetz = librosa.feature.tonnetz(chroma=chroma, sr=22050)
    fin = time.time() - start
    
    transformation_time = np.training_data(transformation_time, fin)
    
    tonnetz_arr.append(tonnetz)
    op = time.time() - start

    op_time = np.append(op_time, op)

    print(f"{(idx/x.shape[0])*100:.2f}%")

    # except:
    #     print(f"Couldn't calculate Chroma features for signal {idx}")
        
tonnetz_arr = np.array(tonnetz_arr).astype(np.float32)

print(f"mean transformation time: {transformation_time.mean():.4f}s")
print(f"mean operation time: {op_time.mean():.4f}s")


# ft.to_parquet(f'G:\\dev\\ft.parquet',
#             engine="fastparquet", compression="snappy")

with open(f"{training_data}/tonnetz.npy", "wb") as f:
    np.save(f, tonnetz_arr)
with open(f"{training_data}/metadata", "a") as f:   
    f.write(f"chroma_cens.shape={tonnetz_arr.shape}\n")

# del mfcc
# del mfcc_y