In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Stacked_RBM

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Step 1: Load and preprocess data
def load_data(dataset_path, max_files=1000):
    spectrograms = []
    labels = []
    for file_name in os.listdir(dataset_path)[:max_files]:
        if file_name.endswith(".mp3"):
            try:
                # Extract genre from file name (format: genre_index.mp3)
                genre = file_name.split("_")[0]
                file_path = os.path.join(dataset_path, file_name)

                # Load audio file (5 seconds) and extract Mel-spectrogram
                audio, sr = librosa.load(file_path, sr=22050, duration=5)  # Load 5 seconds
                spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=8000)
                spectrogram = librosa.power_to_db(spectrogram, ref=np.max)  # Convert to dB scale
                spectrogram = np.expand_dims(spectrogram, axis=-1)  # Add channel dimension
                spectrograms.append(spectrogram)
                labels.append(genre)
            except Exception as e:
                print(f"Error loading {file_path}: {e}")
    return np.array(spectrograms), np.array(labels)

# Normalize spectrograms
def normalize_spectrograms(spectrograms):
    mean = np.mean(spectrograms, axis=(1, 2), keepdims=True)
    std = np.std(spectrograms, axis=(1, 2), keepdims=True)
    return (spectrograms - mean) / (std + 1e-8)

# Step 2: Define CDBN (Convolutional Deep Belief Network)
def build_cdbn(input_shape):
    model = models.Sequential([
        # First Convolutional RBM
        layers.Conv2D(32, (3, 3), activation='sigmoid', padding='same', input_shape=input_shape),
        layers.MaxPooling2D(pool_size=(2, 2)),
        # Second Convolutional RBM
        layers.Conv2D(64, (3, 3), activation='sigmoid', padding='same'),
        layers.UpSampling2D(size=(2, 2))  # Upsample to match input dimensions
    ])
    return model

# Step 3: Train CDBN (unsupervised pre-training)
def train_cdbn(cdbn, spectrograms):
    cdbn.compile(optimizer='adam', loss='mse')
    cdbn.fit(spectrograms, spectrograms, epochs=10, batch_size=32, shuffle=True)

# Step 4: Add classifier and fine-tune
def add_classifier(cdbn, num_classes):
    model = models.Sequential([
        cdbn,
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Step 5: Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = np.argmax(y_pred, axis=1)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Main execution
# Load dataset
dataset_path = "/content/drive/MyDrive/New_indexes_mp3"  # Replace with your dataset path
spectrograms, labels = load_data(dataset_path, max_files=1000)  # Load up to 1000 files
spectrograms = normalize_spectrograms(spectrograms)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(spectrograms, labels_encoded, test_size=0.2, random_state=42)

# Build and train CDBN (unsupervised pre-training)
print("Building and training CDBN...")
input_shape = X_train.shape[1:]  # Get input shape from data
cdbn = build_cdbn(input_shape)
train_cdbn(cdbn, X_train)

# Add classifier and fine-tune
print("Adding classifier and fine-tuning...")
num_classes = len(label_encoder.classes_)
model = add_classifier(cdbn, num_classes)
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate on test set
print("Evaluating on test set...")
evaluate_model(model, X_test, y_test)

Building and training CDBN...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - loss: 1.1216
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.9837
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.9601
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.9220
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.8646
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.7894
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.7314
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.7036
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6916
Epoch 10/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.6836

In [None]:
def predict_genre(model, file_path, label_encoder):

    # Load and preprocess the audio file
    audio, sr = librosa.load(file_path, sr=22050, duration=5)  # Load 5 seconds
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=8000)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)  # Convert to dB scale
    spectrogram = np.expand_dims(spectrogram, axis=-1)  # Add channel dimension
    spectrogram = normalize_spectrograms(np.array([spectrogram]))  # Normalize

    # Predict genre
    prediction = model.predict(spectrogram)
    predicted_class = np.argmax(prediction, axis=1)
    predicted_genre = label_encoder.inverse_transform(predicted_class)
    return predicted_genre[0]

 # Predict genre for a new audio file
new_audio_path = "/content/drive/MyDrive/New_indexes_mp3/Gazal_389.mp3"  # Replace with the path to your new audio file
predicted_genre = predict_genre(model, new_audio_path, label_encoder)
print(f"Predicted Genre: {predicted_genre}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Predicted Genre: Gazal
