<a href="https://colab.research.google.com/github/nirmalaselvaraj963-sys/ml-work/blob/main/ann%2Ccnn%2Clstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import zipfile
import os

# Paths to your zip files
melody_zip = "/content/melody songs.zip"
rap_zip = "/content/Rap song.zip"

# Extract them
with zipfile.ZipFile(melody_zip, 'r') as zip_ref:
    zip_ref.extractall("/content/dataset/Melody")

with zipfile.ZipFile(rap_zip, 'r') as zip_ref:
    zip_ref.extractall("/content/dataset/Rap")


In [17]:
import os

for folder in ["/content/audio_dataset/melody", "/content/audio_dataset/rap"]:
    print(f"\n📂 Checking: {folder}")
    if os.path.exists(folder):
        files = os.listdir(folder)
        print(f"Total files: {len(files)}")
        for f in files[:10]:
            print("  →", f)
    else:
        print("❌ Folder does not exist")



📂 Checking: /content/audio_dataset/melody
Total files: 1
  → melody songs

📂 Checking: /content/audio_dataset/rap
Total files: 1
  → Rap song


In [21]:
import shutil, os

# Remove any old extraction folders
for folder in ["/content/audio_dataset", "/content/temp_extract"]:
    if os.path.exists(folder):
        shutil.rmtree(folder)

print("🧹 Cleaned old folders successfully!")


🧹 Cleaned old folders successfully!


In [23]:
import zipfile, os, shutil  #cnn

def extract_nested_zip(zip_path, target_folder):
    temp_dir = "/content/temp_extract"
    os.makedirs(temp_dir, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(temp_dir)

    # Move all audio files (.mp3/.wav)
    os.makedirs(target_folder, exist_ok=True)
    for root, _, files in os.walk(temp_dir):
        for f in files:
            if f.lower().endswith(('.mp3', '.wav')):
                shutil.move(os.path.join(root, f), os.path.join(target_folder, f))
    shutil.rmtree(temp_dir)

# Base dataset folder
os.makedirs("/content/audio_dataset", exist_ok=True)

# Your uploaded ZIP paths (change names if needed)
rap_zip = "/content/Rap song.zip"
melody_zip = "/content/melody songs.zip"

# Extract both
extract_nested_zip(rap_zip, "/content/audio_dataset/rap")
extract_nested_zip(melody_zip, "/content/audio_dataset/melody")

print("✅ Extracted and organized all audio files correctly!")


✅ Extracted and organized all audio files correctly!


In [32]:
!find /content/audio_dataset -type f | grep -E ".mp3|.wav"


/content/audio_dataset/rap/Kappe-Varroh.mp3
/content/audio_dataset/rap/170CM.mp3
/content/audio_dataset/rap/Engeyum Eppothum.mp3
/content/audio_dataset/rap/Champagini.mp3
/content/audio_dataset/rap/Naan Kudikka Poren.mp3
/content/audio_dataset/rap/Madai-Thiranthu-MassTamilan.com.mp3
/content/audio_dataset/rap/Ini-Illaye-Hum.mp3
/content/audio_dataset/rap/Sollu Thamizhan (Somberi) - Havoc Brothers  Official Lyrics Video.mp3
/content/audio_dataset/rap/Rekka-Rekka-MassTamilan.dev.mp3
/content/audio_dataset/rap/Paiya Dei.mp3
/content/audio_dataset/rap/Aathichudi.mp3
/content/audio_dataset/rap/His Name is John.mp3
/content/audio_dataset/rap/AK - The Tiger.mp3
/content/audio_dataset/rap/Porkkalam-Tamil-Rap.mp3
/content/audio_dataset/rap/Vaadi-Pulla-Vaadi.mp3
/content/audio_dataset/rap/Semma-Weightu-MassTamilan.io.mp3
/content/audio_dataset/rap/Club-La-Mabula.mp3
/content/audio_dataset/rap/Pakkam-Vanthu.mp3
/content/audio_dataset/rap/Magudi-Magudi.mp3
/content/audio_dataset/rap/Kaathu Mela.mp

In [36]:
# Step 4: CNN for Song Classification
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Path to dataset (replace if different)
data_dir = "/content/audio_dataset" # Changed from /content/dataset

# Parameters
sample_rate = 22050
duration = 30  # seconds
samples_per_track = sample_rate * duration

# Helper function to extract MFCC features
def extract_features(file_path, n_mfcc=40):
    try:
        y, sr = librosa.load(file_path, duration=30)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        return mfcc
    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        return None

# Load dataset
X, y = [], []
labels = sorted(os.listdir(data_dir))  # Example: ['melody', 'rap']

print("🎧 Loading dataset and extracting features...")
print(f"Looking for labels in: {data_dir}") # Debugging print
for label_idx, label in enumerate(labels):
    folder_path = os.path.join(data_dir, label)
    print(f"Checking folder: {folder_path}") # Debugging print
    if not os.path.isdir(folder_path):
        print(f"Skipping non-directory: {folder_path}") # Debugging print
        continue
    print(f"Processing label: {label}") # Debugging print
    file_count = 0 # Debugging counter
    for file_name in tqdm(os.listdir(folder_path), desc=f"Processing {label}"):
        if file_name.lower().endswith((".mp3", ".wav")):
            file_path = os.path.join(folder_path, file_name)
            # print(f"Attempting to extract features from: {file_path}") # Debugging print (can be noisy)
            mfcc = extract_features(file_path)
            if mfcc is not None:
                X.append(mfcc)
                y.append(label_idx)
                file_count += 1 # Debugging counter
    print(f"Finished processing label {label}, found {file_count} audio files.") # Debugging print


# Convert lists to arrays
if len(X) == 0:
    raise ValueError("No data loaded. Please check the dataset directory and file formats.")

X = np.array(X)
y = np.array(y)
X = X[..., np.newaxis]  # Add channel dimension
y = to_categorical(y, num_classes=len(labels))  # ✅ fixed line

print(f"✅ Data loaded successfully! Shape: {X.shape}, Labels: {y.shape}")

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(labels), activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("🚀 Training CNN model...")
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=16, # Added closing parenthesis and batch_size
    verbose=1 # Added verbose for training progress
) # Added closing parenthesis

🎧 Loading dataset and extracting features...
Looking for labels in: /content/audio_dataset
Checking folder: /content/audio_dataset/melody
Processing label: melody


Processing melody: 100%|██████████| 20/20 [00:03<00:00,  5.19it/s]


Finished processing label melody, found 20 audio files.
Checking folder: /content/audio_dataset/rap
Processing label: rap


Processing rap: 100%|██████████| 21/21 [00:03<00:00,  6.48it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Finished processing label rap, found 21 audio files.
✅ Data loaded successfully! Shape: (41, 40, 1292, 1), Labels: (41, 2)
🚀 Training CNN model...
Epoch 1/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.5417 - loss: 77.4874
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.5208 - loss: 458.1194
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.6458 - loss: 81.5623
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.5208 - loss: 145.9682
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step - accuracy: 0.5000 - loss: 77.2188
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.8542 - loss: 3.5010
Epoch 7/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.7708 - loss: 6.0600
Epoch 8/30
[1m2/2[0m [32m━━━━━━━━━━━━━

In [37]:
# 🎯 Evaluate model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"🧮 Test Loss: {test_loss:.4f}")




✅ Test Accuracy: 66.67%
🧮 Test Loss: 0.5380


In [38]:
# 🎵 Song Classification using ANN
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

# ✅ Path to dataset
data_dir = "/content/audio_dataset"  # contains folders like /rap and /melody

# 🎧 Feature extraction function
def extract_features(file_path, n_mfcc=40):
    try:
        y, sr = librosa.load(file_path, duration=30)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfcc_scaled = np.mean(mfcc.T, axis=0)  # Average over time
        return mfcc_scaled
    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        return None

# 🎵 Load dataset
X, y = [], []
labels = sorted(os.listdir(data_dir))

print("🎧 Loading dataset and extracting features...")
for label in labels:
    folder_path = os.path.join(data_dir, label)
    if not os.path.isdir(folder_path):
        continue
    for file_name in tqdm(os.listdir(folder_path), desc=f"Processing {label}"):
        if file_name.lower().endswith((".mp3", ".wav")):
            file_path = os.path.join(folder_path, file_name)
            features = extract_features(file_path)
            if features is not None:
                X.append(features)
                y.append(label)

# 🧮 Convert lists to arrays
if len(X) == 0:
    raise ValueError("No data loaded. Please check the dataset directory and file formats.")

X = np.array(X)
y = np.array(y)

# 🔢 Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# ⚙️ Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🧠 Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical
)

print(f"✅ Dataset ready! X_train: {X_train.shape}, y_train: {y_train.shape}")

# 🏗️ ANN model
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(y_categorical.shape[1], activation='softmax')
])

# ⚙️ Compile model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 🚀 Train model
print("🚀 Training ANN model...")
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=16,
    validation_data=(X_test, y_test),
    verbose=1
)

# 🎯 Evaluate model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n🎵 ANN Test Accuracy: {acc * 100:.2f}%")

# 💾 Save model
model.save("/content/song_ann_model.h5")
print("💾 Model saved successfully at /content/song_ann_model.h5")


🎧 Loading dataset and extracting features...


Processing melody: 100%|██████████| 20/20 [00:02<00:00,  6.68it/s]
Processing rap: 100%|██████████| 21/21 [00:04<00:00,  5.15it/s]

✅ Dataset ready! X_train: (32, 40), y_train: (32, 2)
🚀 Training ANN model...
Epoch 1/30



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 240ms/step - accuracy: 0.5417 - loss: 0.7564 - val_accuracy: 0.3333 - val_loss: 0.7471
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.5000 - loss: 0.7122 - val_accuracy: 0.4444 - val_loss: 0.7154
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.6250 - loss: 0.6292 - val_accuracy: 0.5556 - val_loss: 0.6912
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.8125 - loss: 0.5257 - val_accuracy: 0.6667 - val_loss: 0.6679
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.8542 - loss: 0.4985 - val_accuracy: 0.6667 - val_loss: 0.6534
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.8333 - loss: 0.4648 - val_accuracy: 0.6667 - val_loss: 0.6386
Epoch 7/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37




🎵 ANN Test Accuracy: 66.67%
💾 Model saved successfully at /content/song_ann_model.h5


In [41]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm # Import tqdm for progress bars

# --- Step 1: Load Dataset ---
data_dir = "/content/audio_dataset"  # Corrected path to where files were extracted
genres = os.listdir(data_dir)

features = []
labels = []

print(f"Loading dataset from: {data_dir}") # Debugging print
for genre in genres:
    genre_dir = os.path.join(data_dir, genre)
    print(f"Checking directory: {genre_dir}") # Debugging print
    if not os.path.isdir(genre_dir):
        print(f"Skipping non-directory: {genre_dir}") # Debugging print
        continue
    print(f"Processing genre: {genre}") # Debugging print
    file_count = 0 # Debugging counter
    for file in tqdm(os.listdir(genre_dir), desc=f"Processing {genre}"): # Added tqdm
        file_path = os.path.join(genre_dir, file)
        if file.lower().endswith(('.mp3', '.wav')): # Check for audio file extensions
            try:
                y, sr = librosa.load(file_path, duration=30)  # load 30 sec
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
                mfcc = np.mean(mfcc.T, axis=0)  # average over time
                features.append(mfcc)
                labels.append(genres.index(genre))
                file_count += 1 # Debugging counter
            except Exception as e:
                print(f"❌ Error loading {file_path}: {e}") # Changed print format
    print(f"Finished processing genre {genre}, found {file_count} audio files.") # Debugging print


features = np.array(features)
labels = np.array(labels)

if len(features) == 0: # Check if any features were loaded
    raise ValueError("No audio features loaded. Please check the dataset directory and file formats.")

# --- Step 2: Prepare Data for LSTM ---
# LSTM expects 3D input: (samples, timesteps, features)
features = np.expand_dims(features, axis=1)  # (samples, 1, features)
labels = to_categorical(labels)  # one-hot encoding

print(f"✅ Data loaded successfully! Features shape: {features.shape}, Labels shape: {labels.shape}") # Debugging print


X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# --- Step 3: Build LSTM Model ---
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(64))
model.add(Dropout(0.3))
model.add(Dense(len(genres), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# --- Step 4: Train Model ---
print("🚀 Training LSTM model...") # Debugging print
history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test), verbose=1) # Added verbose

# --- Step 5: Evaluate ---
loss, accuracy = model.evaluate(X_test, y_test, verbose=0) # Added verbose
print(f"🎯 Test Accuracy: {accuracy*100:.2f}%")

# --- Step 6: Save Model (Optional) ---
# model.save("/content/song_lstm_model.h5")
# print("💾 Model saved successfully at /content/song_lstm_model.h5")

Loading dataset from: /content/audio_dataset
Checking directory: /content/audio_dataset/rap
Processing genre: rap


Processing rap: 100%|██████████| 21/21 [00:03<00:00,  6.51it/s]


Finished processing genre rap, found 21 audio files.
Checking directory: /content/audio_dataset/melody
Processing genre: melody


Processing melody: 100%|██████████| 20/20 [00:03<00:00,  6.47it/s]
  super().__init__(**kwargs)


Finished processing genre melody, found 20 audio files.
✅ Data loaded successfully! Features shape: (41, 1, 40), Labels shape: (41, 2)
🚀 Training LSTM model...
Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 180ms/step - accuracy: 0.5083 - loss: 0.6933 - val_accuracy: 0.6667 - val_loss: 0.6634
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.7250 - loss: 0.6642 - val_accuracy: 0.8889 - val_loss: 0.6523
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.6500 - loss: 0.6596 - val_accuracy: 0.6667 - val_loss: 0.6449
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7708 - loss: 0.6114 - val_accuracy: 0.6667 - val_loss: 0.6331
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.8833 - loss: 0.5731 - val_accuracy: 0.6667 - val_loss: 0.6206
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━

In [43]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm # Import tqdm for progress bars

# --- Step 1: Load Dataset ---
data_dir = "/content/audio_dataset"  # Corrected path
genres = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]

features = []
labels = []

print(f"Loading dataset from: {data_dir}") # Debugging print
print(f"Found genres: {genres}") # Debugging print

for genre in genres:
    genre_dir = os.path.join(data_dir, genre)
    print(f"Checking directory: {genre_dir}") # Debugging print
    for file in tqdm(os.listdir(genre_dir), desc=f"Processing {genre}"): # Added tqdm
        file_path = os.path.join(genre_dir, file)
        if file.lower().endswith(('.mp3', '.wav')): # Check for audio file extensions
            try:
                y, sr = librosa.load(file_path, duration=30)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
                mfcc = mfcc.T  # transpose to (timesteps, features)
                features.append(mfcc)
                labels.append(genres.index(genre))
            except Exception as e:
                print(f"❌ Error loading {file_path}: {e}") # Changed print format

print(f"Finished loading features. Found {len(features)} feature sets and {len(labels)} labels.") # Debugging print

# --- Step 2: Pad sequences to same length ---
from tensorflow.keras.preprocessing.sequence import pad_sequences

if len(features) == 0:
    raise ValueError("No audio features loaded. Please check the dataset directory and file formats.")

maxlen = max(f.shape[0] for f in features) if features else 0 # Calculate maxlen from loaded features

features = pad_sequences(features, maxlen=maxlen, dtype='float32', padding='post')
labels = to_categorical(labels, num_classes=len(genres)) # Added num_classes

print(f"✅ Data loaded and padded successfully! Features shape: {features.shape}, Labels shape: {labels.shape}") # Debugging print

# --- Step 3: Split Data ---
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# --- Step 4: Build CNN + LSTM Model ---
model = Sequential()
model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(len(genres), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# --- Step 5: Train Model ---
print("🚀 Training CNN + LSTM model...") # Debugging print
history = model.fit(X_train, y_train, epochs=20, batch_size=8, validation_data=(X_test, y_test), verbose=1) # Reduced epochs for faster execution

# --- Step 6: Evaluate ---
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"🎯 Test Accuracy: {accuracy*100:.2f}%")

# --- Step 7: Save Model (Optional) ---
# model.save("/content/song_cnn_lstm_model.h5")
# print("💾 Model saved successfully at /content/song_cnn_lstm_model.h5")

Loading dataset from: /content/audio_dataset
Found genres: ['rap', 'melody']
Checking directory: /content/audio_dataset/rap


Processing rap: 100%|██████████| 21/21 [00:03<00:00,  6.55it/s]


Checking directory: /content/audio_dataset/melody


Processing melody: 100%|██████████| 20/20 [00:03<00:00,  6.52it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Finished loading features. Found 41 feature sets and 41 labels.
✅ Data loaded and padded successfully! Features shape: (41, 1292, 40), Labels shape: (41, 2)
🚀 Training CNN + LSTM model...
Epoch 1/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 473ms/step - accuracy: 0.4875 - loss: 0.7056 - val_accuracy: 0.8889 - val_loss: 0.5465
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 354ms/step - accuracy: 0.6583 - loss: 0.6204 - val_accuracy: 0.7778 - val_loss: 0.5003
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 408ms/step - accuracy: 0.8333 - loss: 0.4494 - val_accuracy: 0.7778 - val_loss: 0.4786
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 360ms/step - accuracy: 0.8833 - loss: 0.4283 - val_accuracy: 0.7778 - val_loss: 0.4945
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 354ms/step - accuracy: 0.7375 - loss: 0.3828 - val_accuracy: 0.7778 - val_loss: 0.4715
Epoch 6/20


In [44]:
# Save model
model.save('cnn_lstm_audio_model.h5')

# Load model
from tensorflow.keras.models import load_model
model = load_model('cnn_lstm_audio_model.h5')




In [47]:
def predict_genre(file_path):
    y, sr = librosa.load(file_path, duration=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T
    mfcc = pad_sequences([mfcc], maxlen=1292, dtype='float32', padding='post') # Corrected maxlen
    pred = model.predict(mfcc)
    genre_idx = np.argmax(pred)
    return genres[genre_idx]

# Example
print(predict_genre("/content/audio_dataset/rap/Kappe-Varroh.mp3")) # Corrected file path
print(predict_genre(""))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
rap
