In [1]:
import numpy as np # type: ignore
import pandas as pd # type: ignore
import librosa # type: ignore

import sys
from pathlib import Path

# Add parent directory to Python path
sys.path.append(str(Path.cwd().parent))

# Now you can import FeaturesCompute
from audio_extract.features_compute import FeaturesCompute

In [2]:
extract_full = FeaturesCompute().compute_features(r'D:\#ALLMYMUSIC - Copy\00. Future Funk\Abstract Idea - Dokutā Slump\02. Kiss.mp3')

[[[PROCESSING MIDDLE PART: 127.11s]]]
[[[PROCESSING START PART: 25.00s]]]
[[[PROCESSING END PART: 25.00s]]]


In [3]:
class FeaturesComputeNoStream:
  """
  Compute features for a given audio file. Saves to CSV format.
  """
  def columns(self):
    """Defines the structure of features and their column names"""
    feature_sizes = dict(
        chroma_stft=12, 
        chroma_cqt=12, 
        chroma_cens=12,
        tonnetz=6, 
        mfcc=20, 
        rmse=1, 
        zcr=1,
        spectral_centroid=1, 
        spectral_bandwidth=1,
        spectral_contrast=7, 
        spectral_rolloff=1
    )
    # Descriptive stats
    stats = ('mean', 'std')
    # Column names
    tuples = []
    for feat, size in feature_sizes.items():
      for stat in stats:
        # (chroma_cens, kurtosis, 01..12)
        it = ((feat, stat, '{:02d}'.format(i+1)) for i in range(size))
        tuples.extend(it)
    
    # Create MultiIndex with explicit names and sort it
    columns = pd.MultiIndex.from_tuples(tuples,
                                     names=['feature', 'statistics', 'compo#'])

    return columns.sort_values()  # Sort the index and return sorted version
  
  def compute_features(self, audio_path, uid=None, mid_perc: int = 0.6):
    """Compute the features."""
    
    features = pd.Series(index=self.columns(), dtype=np.float32)
    features.sort_index()
    
    features[('path', '', '')] = audio_path
    if uid:
      features[('uid', '', '')] = uid

    def feature_stats(name, values):
      """Calculates descriptive statistics for each feature."""
      values = np.asarray(values, dtype=np.float32)
      values = values.T
      features[name, 'mean'] = np.around(np.mean(values, axis=0), decimals=4)
      features[name, 'std'] = np.around(np.std(values, axis=0), decimals=4)
      
      print(f"Finished {name}: added {len(values)} components")

    file = audio_path
    # Load audio file
    try:
      # Get duration first
      duration = librosa.get_duration(path=file)
      
      # Calculate middle %
      start_percent = (1 - mid_perc) / 2  # = 0.2 for middle 60%
      offset = duration * start_percent
      clip_duration = duration * mid_perc
      
      # Load middle part
      y, sr = librosa.load(file, 
                          offset=offset, 
                          duration=clip_duration,
                          sr=None)
      y_sr = [(y, sr)]

      if mid_perc < 1:
        # Load beginning part
        y_start, sr_start = librosa.load(file, 
                            offset=0, 
                            duration=offset,
                            sr=None)
        y_sr.append((y_start, sr_start))
        # Load end part
        y_end, sr_end = librosa.load(file, 
                            offset=offset+clip_duration, 
                            duration=offset,
                            sr=None)
        y_sr.append((y_end, sr_end))

    except Exception as e:
      print(f"Failed to load audio: {e}")
      return None


    # Compute features for each part
    features_part_list = []
    for y, sr in y_sr:
      # Compute tempo
      tempo, _ = librosa.beat.beat_track(y=y, sr=sr, start_bpm=50)
      features[('tempo', 'mean', '01')] = np.around(tempo[0], decimals=3)

      # Compute CQT for Chroma features
      cqt = np.abs(librosa.cqt(y, sr=sr, 
                              hop_length=512, 
                              bins_per_octave=12,
                              n_bins=7*12, 
                              tuning=None))
      assert cqt.shape[0] == 7 * 12
      assert np.ceil(len(y)/512) <= cqt.shape[1] <= np.ceil(len(y)/512)+1

      # Chroma Features - rely on resampling, so will take a while
      f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
      feature_stats('chroma_cqt', f)
      f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
      feature_stats('chroma_cens', f)

      # STFT-based analysis: Compute STFT for Spectral features
      del cqt
      D = np.abs(librosa.stft(y, 
                              n_fft=2048,
                              hop_length=512))
      # Compute Mel-scaled Spectrogram from STFT
      mel = librosa.feature.melspectrogram(sr=sr, S=D**2)
      # MFCC from mel spectrogram
      f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
      feature_stats('mfcc', f)
      del mel

      # Zero-Crossing Rate
      f = librosa.feature.zero_crossing_rate(y, frame_length=2048, hop_length=512)
      feature_stats('zcr', f)
      del y
      
      # Chroma STFT
      f = librosa.feature.chroma_stft(S=D**2, n_chroma=12)
      feature_stats('chroma_stft', f)

      # Tonnetz
      f = librosa.feature.tonnetz(chroma=f)
      feature_stats('tonnetz', f)

      # Root Mean Square Energy
      f = librosa.feature.rms(S=D)
      feature_stats('rmse', f)

      # Spectral features
      f = librosa.feature.spectral_centroid(S=D)
      feature_stats('spectral_centroid', f)

      f = librosa.feature.spectral_bandwidth(S=D)
      feature_stats('spectral_bandwidth', f)

      f = librosa.feature.spectral_contrast(S=D, n_bands=6)
      feature_stats('spectral_contrast', f)

      f = librosa.feature.spectral_rolloff(S=D)
      feature_stats('spectral_rolloff', f)

      del D

      features_part_list.append(features)

    return features_part_list


In [4]:
extract_stream = FeaturesComputeNoStream().compute_features(r'D:\#ALLMYMUSIC - Copy\00. Future Funk\Abstract Idea - Dokutā Slump\02. Kiss.mp3')


Finished chroma_cqt: added 10949 components
Finished chroma_cens: added 10949 components
Finished mfcc: added 10949 components
Finished zcr: added 10949 components
Finished chroma_stft: added 10949 components
Finished tonnetz: added 10949 components
Finished rmse: added 10949 components
Finished spectral_centroid: added 10949 components
Finished spectral_bandwidth: added 10949 components
Finished spectral_contrast: added 10949 components
Finished spectral_rolloff: added 10949 components
Finished chroma_cqt: added 3650 components
Finished chroma_cens: added 3650 components
Finished mfcc: added 3650 components
Finished zcr: added 3650 components
Finished chroma_stft: added 3650 components
Finished tonnetz: added 3650 components
Finished rmse: added 3650 components
Finished spectral_centroid: added 3650 components
Finished spectral_bandwidth: added 3650 components
Finished spectral_contrast: added 3650 components
Finished spectral_rolloff: added 3650 components
Finished chroma_cqt: added 