In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Naya

In [None]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Dropout, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# CRBM Class Definition (for unsupervised pretraining)
class CRBM:
    def __init__(self, n_visible, n_hidden, learning_rate=0.01):
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.weights = np.random.normal(0, 0.1, (n_visible, n_hidden))
        self.h_bias = np.zeros(n_hidden)
        self.v_bias = np.zeros(n_visible)

    def sample_h(self, v):
        h_prob = self._sigmoid(np.dot(v, self.weights) + self.h_bias)
        return h_prob, np.random.binomial(1, h_prob)

    def sample_v(self, h):
        v_prob = self._sigmoid(np.dot(h, self.weights.T) + self.v_bias)
        return v_prob, np.random.binomial(1, v_prob)

    def train(self, data, epochs=100):
        for epoch in range(epochs):
            v0 = data
            h0_prob, h0 = self.sample_h(v0)
            v1_prob, v1 = self.sample_v(h0)
            h1_prob, h1 = self.sample_h(v1)
            self.weights += np.dot(v0.T, h0 - h1) * self.learning_rate
            self.v_bias += np.mean(v0 - v1, axis=0) * self.learning_rate
            self.h_bias += np.mean(h0 - h1, axis=0) * self.learning_rate

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

# Data Preparation: Load the audio files and extract features
data_dir = '/content/drive/MyDrive/New_indexes_mp3'  # Path to the directory containing genre_index.mp3 files
features = []
labels = []

# Loop through all files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.mp3'):
        # Extract genre from the filename
        genre = filename.split('_')[0]  # Assuming format is 'genre_index.mp3'
        file_path = os.path.join(data_dir, filename)

        # Load audio and extract Mel spectrogram
        signal, sr = librosa.load(file_path, sr=22050)
        mel_spec = librosa.feature.melspectrogram(y=signal, sr=sr)
        mel_spec_db = librosa.power_to_db(mel_spec)  # Convert to decibel scale for better representation
        feature = mel_spec_db.flatten()  # Flatten the Mel spectrogram into a 1D vector

        # Append the features and labels
        features.append(feature)
        labels.append(genre)

# Convert features to numpy array
X = np.array(features)

# Label Encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)  # Convert labels to integers
y_onehot = to_categorical(y_encoded)  # One-hot encode labels

# Normalize features
X = (X - np.mean(X)) / np.std(X)  # Standard normalization

# Padding features to make all vectors the same length
max_length = max([f.shape[0] for f in features])  # Find the max length of the features
X = pad_sequences(X, maxlen=max_length, padding='post', truncating='post')  # Pad sequences to max length

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)


In [None]:
# CRBM Pretraining
num_layers = 2
hidden_units = 64
batch_size = 64
epochs = 10
rbm_input = X_train
rbm_layers = []

# Train the CRBM layers
for layer in range(num_layers):
    rbm = CRBM(n_visible=rbm_input.shape[1], n_hidden=hidden_units)
    print(f"\nInitializing Layer {layer + 1} with {hidden_units} hidden units...")

    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        for i in range(0, rbm_input.shape[0], batch_size):
            batch = rbm_input[i:i + batch_size]
            rbm.train(batch, epochs=1)  # Train for 1 epoch per batch
            print(f"    Processed batch {i//batch_size + 1}/{rbm_input.shape[0]//batch_size + 1}")

    rbm_layers.append(rbm)
    print(f"Layer {layer + 1} training complete.")
    _, rbm_input = rbm.sample_h(rbm_input)  # Transform data to hidden representation for next layer
    print(f"Transformed data to hidden representation for next layer.\n")

print("RBM Pretraining Complete!")


Initializing Layer 1 with 64 hidden units...
Epoch 1/10
    Processed batch 1/40


  return 1 / (1 + np.exp(-x))


    Processed batch 2/40
    Processed batch 3/40
    Processed batch 4/40
    Processed batch 5/40
    Processed batch 6/40
    Processed batch 7/40
    Processed batch 8/40
    Processed batch 9/40
    Processed batch 10/40
    Processed batch 11/40
    Processed batch 12/40
    Processed batch 13/40
    Processed batch 14/40
    Processed batch 15/40
    Processed batch 16/40
    Processed batch 17/40
    Processed batch 18/40
    Processed batch 19/40
    Processed batch 20/40
    Processed batch 21/40
    Processed batch 22/40
    Processed batch 23/40
    Processed batch 24/40
    Processed batch 25/40
    Processed batch 26/40
    Processed batch 27/40
    Processed batch 28/40
    Processed batch 29/40
    Processed batch 30/40
    Processed batch 31/40
    Processed batch 32/40
    Processed batch 33/40
    Processed batch 34/40
    Processed batch 35/40
    Processed batch 36/40
    Processed batch 37/40
    Processed batch 38/40
    Processed batch 39/40
    Processed batch 

In [None]:
# Transform data using all CRBM layers
rbm_input = X_train
for rbm in rbm_layers:
    _, rbm_input = rbm.sample_h(rbm_input)

# Final layer transformation
_, rbm_features = rbm_layers[-1].sample_h(rbm_input)
rbm_features = np.expand_dims(rbm_features, axis=-1)  # Add an extra dimension for CNN input

  return 1 / (1 + np.exp(-x))


In [None]:
# Define CNN Model
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(rbm_features.shape[1], 1), kernel_regularizer=l2(0.01)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=l2(0.01)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')  # Output layer with softmax for classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Print the model summary
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# Train the model with class weights
history = model.fit(rbm_features, y_train, epochs=50, batch_size=32, validation_split=0.2,
                    callbacks=[early_stopping], verbose=1)

Epoch 1/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - accuracy: 0.1804 - loss: 4.7147 - val_accuracy: 0.2071 - val_loss: 1.8816
Epoch 2/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.1966 - loss: 1.7810 - val_accuracy: 0.2091 - val_loss: 1.6396
Epoch 3/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.2139 - loss: 1.6302 - val_accuracy: 0.2091 - val_loss: 1.6152
Epoch 4/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.1776 - loss: 1.6144 - val_accuracy: 0.2091 - val_loss: 1.6114
Epoch 5/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.2147 - loss: 1.6111 - val_accuracy: 0.2091 - val_loss: 1.6104
Epoch 6/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.2094 - loss: 1.6104 - val_accuracy: 0.2091 - val_loss: 1.6099
Epoch 7/50
[1m64/64[0m [32m━━━━

Another method

Save RBM features in csv

In [None]:
import os
import numpy as np
import librosa
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Dropout, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

# CRBM Class Definition (for unsupervised pretraining)
class CRBM:
    def __init__(self, n_visible, n_hidden, learning_rate=0.01):
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.weights = np.random.normal(0, 0.1, (n_visible, n_hidden))
        self.h_bias = np.zeros(n_hidden)
        self.v_bias = np.zeros(n_visible)

    def sample_h(self, v):
        h_prob = self._sigmoid(np.dot(v, self.weights) + self.h_bias)
        return h_prob, np.random.binomial(1, h_prob)

    def sample_v(self, h):
        v_prob = self._sigmoid(np.dot(h, self.weights.T) + self.v_bias)
        return v_prob, np.random.binomial(1, v_prob)

    def train(self, data, epochs=100):
        for epoch in range(epochs):
            v0 = data
            h0_prob, h0 = self.sample_h(v0)
            v1_prob, v1 = self.sample_v(h0)
            h1_prob, h1 = self.sample_h(v1)
            self.weights += np.dot(v0.T, h0 - h1) * self.learning_rate
            self.v_bias += np.mean(v0 - v1, axis=0) * self.learning_rate
            self.h_bias += np.mean(h0 - h1, axis=0) * self.learning_rate

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

# Load raw audio files and extract features
data_dir = '/content/drive/MyDrive/New_indexes_mp3'  # Path to the directory containing audio files
features = []
labels = []

# Loop through all files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.mp3'):
        # Extract genre from the filename
        genre = filename.split('_')[0]  # Assuming format is 'genre_index.mp3'
        file_path = os.path.join(data_dir, filename)

        # Load raw audio signal
        signal, sr = librosa.load(file_path, sr=22050)
        signal = signal[:len(signal) // 2]  # Optionally truncate to a fixed size

        # Flatten signal to 1D vector (you can also extract other features)
        feature = signal.flatten()

        # Append the features and labels
        features.append(feature)
        labels.append(genre)

# Convert features to numpy array
X = np.array(features)

# Label Encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)  # Convert labels to integers
y_onehot = to_categorical(y_encoded)  # One-hot encode labels

# Normalize features
X = (X - np.mean(X)) / np.std(X)  # Standard normalization

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# CRBM Pretraining
num_layers = 2
hidden_units = 64
batch_size = 64
epochs = 10
rbm_input = X_train
rbm_layers = []

# Train the CRBM layers
for layer in range(num_layers):
    rbm = CRBM(n_visible=rbm_input.shape[1], n_hidden=hidden_units)
    print(f"\nInitializing Layer {layer + 1} with {hidden_units} hidden units...")

    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        for i in range(0, rbm_input.shape[0], batch_size):
            batch = rbm_input[i:i + batch_size]
            rbm.train(batch, epochs=1)  # Train for 1 epoch per batch
            print(f"    Processed batch {i//batch_size + 1}/{rbm_input.shape[0]//batch_size + 1}")

    rbm_layers.append(rbm)
    print(f"Layer {layer + 1} training complete.")
    _, rbm_input = rbm.sample_h(rbm_input)  # Transform data to hidden representation for next layer
    print(f"Transformed data to hidden representation for next layer.\n")

print("RBM Pretraining Complete!")

# Transform data using all CRBM layers
rbm_input = X_train
for rbm in rbm_layers:
    _, rbm_input = rbm.sample_h(rbm_input)

# Final layer transformation
_, rbm_features = rbm_layers[-1].sample_h(rbm_input)

# Save RBM features and labels to CSV
data_with_labels = np.hstack((rbm_features, y_train))  # Add the labels as the last column
csv_file_path = '/content/drive/MyDrive/crbm_features_with_labels.csv'
pd.DataFrame(data_with_labels).to_csv(csv_file_path, index=False, header=False)

print(f"Data saved to {csv_file_path}")


In [None]:
df=pd.read_csv('/content/crbm_features_with_labels.csv')

In [None]:
df.shape

(2530, 69)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Step 1: Load the CSV file
csv_file = '/content/crbm_features_with_labels.csv'  # Replace with the actual file path
data = pd.read_csv(csv_file)

# Step 2: Separate features and labels
X = data.iloc[:, :-1].values  # All columns except the last (features)
y = data.iloc[:, -1].values   # Last column (label)

# Step 3: Label Encoding and One-hot Encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Encode labels as integers
y_onehot = to_categorical(y_encoded)  # One-hot encode labels

# Step 4: Normalize features
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)  # Standard normalization

# Step 5: Reshape features for CNN (1D convolution expects 3D input: samples, timesteps, features)
X = np.expand_dims(X, axis=-1)  # Add an extra dimension for CNN input

# Step 6: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Step 7: Define the CNN model
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')  # Output layer
])

# Step 8: Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 9: Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Step 10: Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[early_stopping], verbose=1)



Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - accuracy: 0.7754 - loss: 0.5270 - val_accuracy: 0.8043 - val_loss: 0.4279
Epoch 2/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7954 - loss: 0.4488 - val_accuracy: 0.8063 - val_loss: 0.4153
Epoch 3/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7855 - loss: 0.4219 - val_accuracy: 0.8063 - val_loss: 0.4131
Epoch 4/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7978 - loss: 0.3989 - val_accuracy: 0.8043 - val_loss: 0.4187
Epoch 5/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8023 - loss: 0.3927 - val_accuracy: 0.8103 - val_loss: 0.4210
Epoch 6/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8132 - loss: 0.3633 - val_accuracy: 0.7866 - val_loss: 0.4326
Epoch 7/50
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
# Save the model as well
model.save('/content/drive/MyDrive/CRBM_store_in_CSV.h5')



In [None]:
# Step 11: Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8110 - loss: 0.4082 
Test Accuracy: 0.8063
