In [None]:
!git-commit "Music Genre"

# Music Genre Classification using Audio Features

### Uses these libraries `torch`, `pandas`, `scipy`, `evaluate`, `numpy`, `sklearn` and `datasets`

### 1. Import the libraries

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import numpy as np
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.pipeline import Pipeline

import evaluate
from datasets import load_dataset
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'matplotlib'

In [4]:
!pip install matplotlib

[31mERROR: Could not find a version that satisfies the requirement matplotlib (from versions: none)[0m[31m
[31mERROR: No matching distribution found for matplotlib[0m[31m
[0m

### 2. Load the GTZAN dataset from Hugging Face 

In [None]:
print("Loading dataset...")
dataset = load_dataset("marsyas/gtzan", "audio")

#### Get audio features using librosa

In [None]:
def extract_features(audio_array, sample_rate):
    # Import librosa here to avoid conflicts
    import librosa
    
    # Extract various audio features
    # Mel-frequency cepstral coefficients
    mfccs = librosa.feature.mfcc(y=audio_array, sr=sample_rate, n_mfcc=13)
    
    # Spectral features
    spectral_centroid = librosa.feature.spectral_centroid(y=audio_array, sr=sample_rate)[0]
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_array, sr=sample_rate)[0]
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_array, sr=sample_rate)[0]
    
    # Rhythm features
    tempo, _ = librosa.beat.beat_track(y=audio_array, sr=sample_rate)
    
    # Zero crossing rate
    zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_array)[0]
    
    # Compute statistics for each feature
    features = {}
    
    # MFCC stats
    for i in range(mfccs.shape[0]):
        features[f'mfcc{i+1}_mean'] = np.mean(mfccs[i])
        features[f'mfcc{i+1}_std'] = np.std(mfccs[i])
        features[f'mfcc{i+1}_skew'] = stats.skew(mfccs[i])
        features[f'mfcc{i+1}_kurtosis'] = stats.kurtosis(mfccs[i])
    
    # Other features stats
    for name, feature in [
        ('spectral_centroid', spectral_centroid),
        ('spectral_bandwidth', spectral_bandwidth), 
        ('spectral_rolloff', spectral_rolloff),
        ('zero_crossing_rate', zero_crossing_rate)
    ]:
        features[f'{name}_mean'] = np.mean(feature)
        features[f'{name}_std'] = np.std(feature)
        features[f'{name}_skew'] = stats.skew(feature)
        features[f'{name}_kurtosis'] = stats.kurtosis(feature)
    
    # Add tempo
    features['tempo'] = tempo
    
    return features

#### Process audio files and extract features

In [None]:
def process_dataset(dataset_split):
    features_list = []
    labels = []
    
    for item in dataset_split:
        audio = item['audio']
        audio_array = audio['array']
        sample_rate = audio['sampling_rate']
        genre = item['genre']
        
        # Extract features
        features = extract_features(audio_array, sample_rate)
        features_list.append(features)
        labels.append(genre)
    
    # Convert to DataFrame
    df = pd.DataFrame(features_list)
    
    return df, labels

#### Process a smaller subset for quick demonstration (adjust as needed)

In [None]:
print("Extracting audio features (this may take a while)...")
train_size = 800  # Adjust based on your computational resources
test_size = 100

#### Randomly sample from the dataset

In [None]:
train_indices = np.random.choice(range(len(dataset['train'])), train_size, replace=False)
test_indices = np.random.choice(range(len(dataset['test'])), test_size, replace=False)

train_subset = [dataset['train'][i] for i in train_indices]
test_subset = [dataset['test'][i] for i in test_indices]

#### Extract features

### Further improvements you could make:

- Implement k-fold cross-validation
- Try different neural network architectures
- Implement early stopping
- Explore feature importance
- Add data augmentation techniques
- Implement transfer learning using pretrained audio models