<a href="https://colab.research.google.com/github/samG406/Calculator/blob/main/Music%20Genre%20classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install librosa
!pip install tensorflow
!pip install numpy matplotlib
!pip install deeplake


Collecting deeplake
  Downloading deeplake-4.0.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (322 bytes)
Downloading deeplake-4.0.0-cp310-cp310-manylinux2014_x86_64.whl (18.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.7/18.7 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deeplake
Successfully installed deeplake-4.0.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

# Define the GTZAN data path
DATA_PATH = '/content/drive/MyDrive/gtzan/genres/'

# Function to load audio and convert to mel spectrogram
def load_audio_to_spectrogram(file_path, n_mels=128, duration=3):
    audio, sample_rate = librosa.load(file_path, duration=duration)
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram_db

# Example function to display a mel spectrogram
def plot_spectrogram(spectrogram):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram, sr=22050, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.tight_layout()
    plt.show()

# Load and augment data
def load_and_augment_data(data_path, genres, num_samples=100):
    X, y = [], []
    for genre in genres:
        genre_path = os.path.join(data_path, genre)
        files = os.listdir(genre_path)[:num_samples]
        for file in files:
            file_path = os.path.join(genre_path, file)
            spectrogram = load_audio_to_spectrogram(file_path)
            X.append(spectrogram)
            y.append(genres.index(genre))
    X = np.array(X).reshape(-1, 128, 130, 1)  # Shape for CNN (height, width, channels)
    y = to_categorical(y, num_classes=len(genres))  # One-hot encode labels
    return X, y

# Genres in GTZAN dataset
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Load data
X, y = load_and_augment_data(DATA_PATH, genres)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Define a simple CNN model for audio classification
def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Create model
input_shape = (128, 130, 1)  # Input shape matching the spectrogram dimensions
num_classes = len(genres)
model = create_cnn_model(input_shape, num_classes)

# Train the model
history = model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 996ms/step - accuracy: 0.1118 - loss: 97.5354 - val_accuracy: 0.0000e+00 - val_loss: 2.3120
Epoch 2/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 936ms/step - accuracy: 0.1421 - loss: 2.2912 - val_accuracy: 0.0000e+00 - val_loss: 2.3357
Epoch 3/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 925ms/step - accuracy: 0.1592 - loss: 2.2390 - val_accuracy: 0.0000e+00 - val_loss: 2.3795
Epoch 4/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 928ms/step - accuracy: 0.2012 - loss: 2.1649 - val_accuracy: 0.0000e+00 - val_loss: 2.5560
Epoch 5/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 962ms/step - accuracy: 0.3319 - loss: 1.8605 - val_accuracy: 0.0000e+00 - val_loss: 2.7706
Epoch 6/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 926ms/step - accuracy: 0.3780 - loss: 1.7089 - val_accuracy: 0.0000e+00 - val_loss: 3.09

In [None]:
# Save the model
model.save('/content/drive/MyDrive/gtzan_model.h5')

# Load the model (if needed)
# model = tf.keras.models.load_model('/content/drive/MyDrive/gtzan_model.h5')



In [None]:
import tensorflow as tf

# Load the previously saved model
model = tf.keras.models.load_model('/content/drive/MyDrive/gtzan_model.h5')



In [None]:
import numpy as np
import librosa

# Define genres in the order we used during training
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

def predict_genre(file_path, model):
    # Load and preprocess the audio file
    spectrogram = load_audio_to_spectrogram(file_path)

    # Reshape the spectrogram to match the input shape for the model
    spectrogram = spectrogram.reshape(1, 128, 130, 1)  # (batch_size, height, width, channels)

    # Make prediction
    prediction = model.predict(spectrogram)

    # Get the index with the highest probability
    predicted_genre_index = np.argmax(prediction)

    # Map index to genre label
    predicted_genre = genres[predicted_genre_index]
    confidence = prediction[0][predicted_genre_index]

    print(f"Predicted Genre: {predicted_genre}")
    print(f"Confidence: {confidence * 100:.2f}%")
    return predicted_genre, confidence

In [None]:
# Example usage with a test song file
song_path = '/content/drive/MyDrive/BabyElephantWalk60.wav'  # Replace with your audio file path
predicted_genre, confidence = predict_genre(song_path, model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
Predicted Genre: disco
Confidence: 16.15%
