In [6]:
!pip install librosa




In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import librosa
import numpy as np
import os
import glob
import logging

log_file='/content/drive/MyDrive/Colab Notebooks/GenreRecog/extraction.log'
logging.basicConfig(
    filename=log_file,
    filemode='w',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO,
    force=True
)

from sklearn.preprocessing import LabelEncoder

dataset_path="/content/drive/MyDrive/Colab Notebooks/GenreRecog/Data/genres_original"

#def extract_mfcc(file_path, n_mfcc=13):
  #y, sr = librosa.load(file_path,sr=None)
  #mfcc = librosa.feature.mfcc(y=y,sr=sr,n_mfcc=n_mfcc)
  #mfcc=np.mean(mfcc, axis=1)
  #return mfcc

def extract_mfcc(file_path, n_mfcc=13):
    """
    Extracts MFCC features from an audio file.

    Args:
        file_path (str): Path to the audio file.
        n_mfcc (int): Number of MFCC coefficients to extract.

    Returns:
        np.ndarray: MFCC features.
    """
    try:
        y, sr = librosa.load(file_path, sr=None)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfcc = np.mean(mfcc, axis=1)
        logging.info(f"Extracted MFCCs from {file_path}")
        return mfcc
    except Exception as e:
        logging.error(f"Error extracting MFCCs from {file_path}: {e}")
        return None


# def load_dataset(dataset_path):
#   genres =os.listdir(dataset_path)
#   data = []
#   labels=[]


#   for genre in genres:
#       genre_path=os.path.join(dataset_path, genre)
#       audio_files=glob.glob(os.path.join(genre_path, '*.wav'))

#       for audio_file in audio_files:
#         mfcc=extract_mfcc(audio_file)
#         data.append(mfcc)
#         labels.append(genre)
#         logging.info(f"Loaded {len(data)} audio files.")
#   return np.array(data), np.array(labels)

def load_dataset(dataset_path):
    """
    Loads the audio dataset and extracts MFCC features.

    Args:
        dataset_path (str): Path to the dataset.

    Returns:
        tuple: A tuple containing the data (MFCC features) and labels.
    """
    genres = os.listdir(dataset_path)
    data = []
    labels = []

    logging.info(f"Loading dataset from {dataset_path}")
    for genre in genres:
        genre_path = os.path.join(dataset_path, genre)
        audio_files = glob.glob(os.path.join(genre_path, '*.wav'))
        logging.info(f"Processing genre: {genre}")

        for audio_file in audio_files:
            mfcc = extract_mfcc(audio_file)
            if mfcc is not None:
                data.append(mfcc)
                labels.append(genre)
                logging.info(f"Loaded {len(data)} audio files.")
            else:
                logging.warning(f"Skipping file {audio_file} due to MFCC extraction error.")

    logging.info(f"Dataset loading complete. Total samples: {len(data)}")
    return np.array(data), np.array(labels)



X, y = load_dataset(dataset_path)

label_encoder=LabelEncoder()
y_encoded=label_encoder.fit_transform(y)
logging.info("Labels encoded successfully.")




  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [None]:
!pip install torch torchvision

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import logging
import os

log_file='/content/drive/MyDrive/Colab Notebooks/GenreRecog/model.log'
logging.basicConfig(
    filename=log_file,
    filemode='w',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO,
    force=True
)

class MusicGenreNN(nn.Module):
  def __init__(self, input_dim, num_classes):
        super(MusicGenreNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

  def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return self.softmax(x)


input_size=13
num_classes=len(np.unique(y_encoded))

logging.info(f"Input size: {input_size}, Number of classes: {num_classes}")

X_tensor=torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y_encoded, dtype=torch.long)

X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

logging.info(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
logging.info(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

train_data=TensorDataset(X_train, y_train)
test_data=TensorDataset(X_test, y_test)

train_loader=DataLoader(train_data, batch_size=32, shuffle=True)
test_loader=DataLoader(test_data, batch_size=32, shuffle=True)

model=MusicGenreNN(input_dim=input_size, num_classes=num_classes)

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.001)

logging.info(f"Model architecture: {model}")
logging.info(f"Optimizer: {optimizer}")


In [9]:
import logging
import os

log_file='/content/drive/MyDrive/Colab Notebooks/GenreRecog/training.log'
logging.basicConfig(
    filename=log_file,
    filemode='w',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO,
    force=True
)

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    logging.info(f"Starting epoch {epoch + 1}/{num_epochs}")

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    logging.info(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}")

logging.info("Training complete")

    #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/20], Loss: 2.0624
Epoch [2/20], Loss: 2.0644
Epoch [3/20], Loss: 2.0691
Epoch [4/20], Loss: 2.0634
Epoch [5/20], Loss: 2.0578
Epoch [6/20], Loss: 2.0597
Epoch [7/20], Loss: 2.0646
Epoch [8/20], Loss: 2.0590
Epoch [9/20], Loss: 2.0603
Epoch [10/20], Loss: 2.0612
Epoch [11/20], Loss: 2.0645
Epoch [12/20], Loss: 2.0547
Epoch [13/20], Loss: 2.0499
Epoch [14/20], Loss: 2.0541
Epoch [15/20], Loss: 2.0503
Epoch [16/20], Loss: 2.0430
Epoch [17/20], Loss: 2.0474
Epoch [18/20], Loss: 2.0463
Epoch [19/20], Loss: 2.0458
Epoch [20/20], Loss: 2.0491


In [10]:
import logging
import os

log_file='/content/drive/MyDrive/Colab Notebooks/GenreRecog/evaluation.log'
logging.basicConfig(
    filename=log_file,
    filemode='w',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO,
    force=True
)

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")


Accuracy on the test set: 32.50%


In [11]:
#neccesary to create blank log file to force previous log file to show in Drive

import logging
import os

log_file='/content/drive/MyDrive/Colab Notebooks/GenreRecog/log.log'
logging.basicConfig(
    filename=log_file,
    filemode='w',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO,
    force=True
)