CRNN Model for Music Embeddings

In [13]:
# STEP 1: Imports
import sqlite3
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Dropout, GRU, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

ModuleNotFoundError: No module named 'sklearn'

In [None]:
# STEP 2: Connect to SQLite database
conn = sqlite3.connect("/mnt/c/Users/dev/Desktop/MR/backend/app/db/db.sqlite3")  # Use the correct path  /mnt/c/Users/dev/Desktop/MR/backend/app/db/db.sqlite3  backend/app/db/db.sqlite3

In [None]:
# STEP 3: Load segment data (with genre for labels)
query = """
SELECT s.trackID, s.segment_index,
       s.timbre_0, s.timbre_1, s.timbre_2, s.timbre_3, s.timbre_4, s.timbre_5,
       s.timbre_6, s.timbre_7, s.timbre_8, s.timbre_9, s.timbre_10, s.timbre_11,
       s.pitch_0, s.pitch_1, s.pitch_2, s.pitch_3, s.pitch_4, s.pitch_5,
       s.pitch_6, s.pitch_7, s.pitch_8, s.pitch_9, s.pitch_10, s.pitch_11,
       s.loudness_max, s.confidence
FROM Segments s
JOIN Tracks t ON s.trackID = t.trackID
ORDER BY s.trackID, s.segment_index
"""

df = pd.read_sql_query(query, conn)

DatabaseError: Execution failed on sql '
SELECT s.trackID, s.segment_index,
       s.timbre_0, s.timbre_1, s.timbre_2, s.timbre_3, s.timbre_4, s.timbre_5,
       s.timbre_6, s.timbre_7, s.timbre_8, s.timbre_9, s.timbre_10, s.timbre_11,
       s.pitch_0, s.pitch_1, s.pitch_2, s.pitch_3, s.pitch_4, s.pitch_5,
       s.pitch_6, s.pitch_7, s.pitch_8, s.pitch_9, s.pitch_10, s.pitch_11,
       s.loudness_max, s.confidence
FROM Segments s
JOIN Tracks t ON s.trackID = t.trackID
ORDER BY s.trackID, s.segment_index
': no such table: Segments

In [None]:
# STEP 4: Preprocess features
FEATURE_COLUMNS = [col for col in df.columns if col.startswith("timbre_") or col.startswith("pitch_")] + ["loudness_max", "confidence"]
feature_dim = len(FEATURE_COLUMNS)  # should be 26

In [None]:
# STEP 5: Group by trackID and build fixed-length sequences
grouped = df.groupby("trackID")
X = []
y = []
num_segments = 500

for track_id, group in grouped:
    features = group[FEATURE_COLUMNS].to_numpy()
    label = group["genre"].iloc[0]
    
    if features.shape[0] < num_segments:
        # Pad
        padded = np.pad(features, ((0, num_segments - features.shape[0]), (0, 0)), mode='constant')
    else:
        # Truncate
        padded = features[:num_segments]

    X.append(padded)
    y.append(label)

X = np.array(X)  # shape = (num_tracks, 500, 26)

In [None]:
# STEP 6: Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)
num_classes = y_categorical.shape[1]

In [None]:
# STEP 7: Build the CRNN model
inputs = Input(shape=(num_segments, feature_dim))

x = Conv1D(64, kernel_size=3, padding='same', activation='elu')(inputs)
x = BatchNormalization()(x)
x = Dropout(0.1)(x)

x = Conv1D(128, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.1)(x)

x = Conv1D(128, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.1)(x)

x = GRU(64, return_sequences=True)(x)
x = GRU(64)(x)

outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# STEP 8: Train the model
history = model.fit(X, y_categorical, batch_size=32, epochs=20, validation_split=0.2)