In [2]:
!pip install librosa




In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import librosa
import numpy as np
import os
import glob
import logging

from sklearn.preprocessing import LabelEncoder

logging.basicConfig(filename='music_genre_classification.log', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

dataset_path="/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original"

def extract_mfcc(file_path, n_mfcc=13):
  logging.info(f"Extracting MFCCs from {file_path}")
  y, sr = librosa.load(file_path,sr=None)
  mfcc = librosa.feature.mfcc(y=y,sr=sr,n_mfcc=n_mfcc)
  mfcc=np.mean(mfcc, axis=1)
  return mfcc

def load_dataset(dataset_path):
  genres =os.listdir(dataset_path)
  data = []
  labels=[]

  logging.info("Loading dataset...")
  for genre in genres:
      genre_path=os.path.join(dataset_path, genre)
      audio_files=glob.glob(os.path.join(genre_path, '*.wav'))

      for audio_file in audio_files:
        mfcc=extract_mfcc(audio_file)
        data.append(mfcc)
        labels.append(genre)

  logging.info(f"Loaded {len(data)} audio files.")
  print(audio_files)

  return np.array(data), np.array(labels)

X, y = load_dataset(dataset_path)

label_encoder=LabelEncoder()
y_encoded=label_encoder.fit_transform(y)




['/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0005.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0011.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0010.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0004.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0038.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0000.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0014.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0028.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0029.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original/jazz/jazz_0015.wav', '/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_origin

In [3]:
!pip install torch torchvision



In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

class MusicGenreNN(nn.Module):
  def __init__(self, input_dim, num_classes):
        super(MusicGenreNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

  def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return self.softmax(x)


input_size=13
num_classes=len(np.unique(y_encoded))

X_tensor=torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y_encoded, dtype=torch.long)

X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

train_data=TensorDataset(X_train, y_train)
test_data=TensorDataset(X_test, y_test)

train_loader=DataLoader(train_data, batch_size=32, shuffle=True)
test_loader=DataLoader(test_data, batch_size=32, shuffle=True)

model=MusicGenreNN(input_dim=input_size, num_classes=num_classes)

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.001)



In [5]:

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/20], Loss: 2.3168
Epoch [2/20], Loss: 2.2568
Epoch [3/20], Loss: 2.2255
Epoch [4/20], Loss: 2.2034
Epoch [5/20], Loss: 2.1943
Epoch [6/20], Loss: 2.1847
Epoch [7/20], Loss: 2.1755
Epoch [8/20], Loss: 2.1801
Epoch [9/20], Loss: 2.1583
Epoch [10/20], Loss: 2.1522
Epoch [11/20], Loss: 2.1606
Epoch [12/20], Loss: 2.1413
Epoch [13/20], Loss: 2.1308
Epoch [14/20], Loss: 2.1424
Epoch [15/20], Loss: 2.1444
Epoch [16/20], Loss: 2.1171
Epoch [17/20], Loss: 2.1138
Epoch [18/20], Loss: 2.1064
Epoch [19/20], Loss: 2.1030
Epoch [20/20], Loss: 2.0986


In [6]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")


Accuracy on the test set: 23.50%
