In [4]:
#==================================================================
#                    INSTALLING DEPENDENCIES
#==================================================================

# import tensorflow as tf
# import glob
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# import librosa as lb
# import librosa.display
# import IPython.display as ipd
# import os
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D,Dense, Dropout,  MaxPooling2D, Flatten
# from tensorflow.keras.metrics import Accuracy, Precision
# from tensorflow.keras.utils import to_categorical
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Dropout, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Accuracy, Precision

import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa as lb
import librosa.display
import IPython.display as ipd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [5]:
#-------------------------------------------------------------------
#                       Feature Selection
#-------------------------------------------------------------------
# ipd.Audio(audio_files[0])


#using glob and librosa
audio_files = glob.glob("C:\\Datasets\\Data\\genres_original\\*\\*.wav")
SAMPLES_PER_TRACK = 22050 * 30 



# def extract_mel_spectrogram(file_path, n_mels = 128):
#     y, sr = librosa.load(file_path, duration=30)
#     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
#     S_DB = librosa.power_to_db(S, ref=np.max)
#     return S_DB


def extract_mel_spectrogram(file_path, n_mels=128, duration=30):
    """Extract mel spectrogram with better error handling"""
    try:
        # Try multiple loading methods
        y, sr = librosa.load(file_path, duration=duration, sr=22050)
        
        # Check if audio loaded successfully
        if len(y) == 0:
            print(f"Warning: Empty audio file {file_path}")
            return None
            
        # Generate mel spectrogram
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
        S_DB = librosa.power_to_db(S, ref=np.max)
        
        return S_DB
        
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None



In [49]:
#--------------------------------------------------------------------
#                        DATA PREPROCESSING
#--------------------------------------------------------------------


#Below is just normal code for extracting features from DataSet doesnt really help much 
#Kept it as a memorial 
# data = []
# genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
# for genre in genres:
#     folder = f'C:\Datasets\Data\genres_original\{genre}'
#     for filename in os.listdir(folder):
#         file_path = os.path.join(folder, filename)
#         features = extract_features(file_path)
#         data.append([*features, genre])

# df = pd.DataFrame(data)




  folder = f'C:\Datasets\Data\genres_original\{genre}'
  folder = f'C:\Datasets\Data\genres_original\{genre}'


In [9]:
#================================================================================
#                               DATASET LOADING
#================================================================================


#loading Dataset ig
#Preparing for x:y key value thing
# genres = "blues classical country disco hiphop jazz metal pop reggae rock".split(" ")
# X, y = [], []
# for genre in genres:
#     folder = os.path.join(r"C:\Datasets\Data\genres_original", genre)
#     for filename in os.listdir(folder):
#         if filename.endswith(".wav"):
#             filepath = os.path.join(folder, filename)
#             mel = extract_mel_spectrogram(filepath)
#             if mel.shape[1] >= 660:  # Ensure fixed width
#                 mel = mel[:, :660]   # Shape: (128, 660)
#                 X.append(mel)
#                 y.append(genre)

# X = np.array(X)
# X = X[..., np.newaxis]  # Shape: (samples, 128, 660, 1)

# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)
# y_cat = to_categorical(y_encoded)


def load_dataset_safe(base_path):
    """Load dataset with error handling and progress tracking"""
    genres = "blues classical country disco hiphop jazz metal pop reggae rock".split()
    X, y = [], []
    skipped_files = []
    
    for genre in genres:
        folder = os.path.join(base_path, genre)
        print(f"Processing {genre}...")
        
        if not os.path.exists(folder):
            print(f"Folder {folder} does not exist!")
            continue
            
        file_count = 0
        for filename in os.listdir(folder):
            if filename.endswith(".wav"):
                filepath = os.path.join(folder, filename)
                
                # Extract mel spectrogram
                mel = extract_mel_spectrogram(filepath)
                
                if mel is not None and mel.shape[1] >= 660:  # Ensure minimum width
                    mel = mel[:, :660]   # Shape: (128, 660)
                    X.append(mel)
                    y.append(genre)
                    file_count += 1
                else:
                    skipped_files.append(filepath)
                    
        print(f"Loaded {file_count} files from {genre}")
    
    if skipped_files:
        print(f"\nSkipped {len(skipped_files)} problematic files:")
        for file in skipped_files[:5]:  # Show first 5
            print(f"  {file}")
        if len(skipped_files) > 5:
            print(f"  ... and {len(skipped_files) - 5} more")
    
    # Convert to numpy arrays
    X = np.array(X)
    X = X[..., np.newaxis]  # Shape: (samples, 128, 660, 1)
    
    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    y_cat = to_categorical(y_encoded)
    
    print(f"\nDataset loaded successfully!")
    print(f"Total samples: {len(X)}")
    print(f"Input shape: {X.shape}")
    print(f"Output shape: {y_cat.shape}")
    
    return X, y_cat, label_encoder




In [12]:
#=============================================================================
#                               CNN MODEL
#=============================================================================



# #Previous Model (Sucks ass honestly) 
# # Uses x:y system 
# # music_model = Sequential([
# #     Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
# #     Dropout(0.3),
# #     Dense(128, activation='relu'),
# #     Dropout(0.3),
# #     Dense(64, activation='relu'),
# #     Dense(10, activation='softmax')
# # ])

# # music_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy',metrics = ['accuracy', 'precision'] )

# # music_model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))

# #Strings of feature 
# # x = df.iloc[:, :-1]  
# # y = df.iloc[:, -1]   

# #This thing encodes above stuff
# label_encoder = LabelEncoder()
# y = label_encoder.fit_transform(y)

# # Normal stuff yk (train test split) --> NO need to explain
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# #This Model specifically USES Spectrogram given in the data to train CNN(Conv2D and MaxPooling2D) to the music and trains itself
# #Much better than previous stuff
# music_model = Sequential([
#     Conv2D(32, (3, 3), activation='relu', input_shape=(128, 660, 1)),
#     MaxPooling2D((2, 2)),
#     Dropout(0.3),

#     Conv2D(64, (3, 3), activation='relu'),
#     MaxPooling2D((2, 2)),
#     Dropout(0.3),

#     Conv2D(128, (3, 3), activation='relu'),
#     MaxPooling2D((2, 2)),
#     Dropout(0.3),

#     Flatten(),
#     Dense(256, activation='relu'),
#     Dropout(0.4),
#     Dense(10, activation='softmax')
# ])

# music_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'precision'])

# music_model.summary()
# # Train
# music_model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))



# Load your dataset
base_path = r"C:\Datasets\Data\genres_original"
X, y, label_encoder = load_dataset_safe(base_path)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

# Build the model
music_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 660, 1)),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(None,), output.shape=(None, 10)

In [66]:
#=================================================================
#                      COMPILATION AND TRAINING
#=================================================================

music_model.compile(
    optimizer='adam', 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

print("\nModel Summary:")
music_model.summary()

# Train the model
print("\nStarting training...")
history = music_model.fit(
    X_train, y_train, 
    epochs=30, 
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1
)

print("\nTraining completed!")
print(f"Genre classes: {label_encoder.classes_}")

In [67]:
print(genre_names)

['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']
