# Librosa Library (Somos Data)

Now that we know what data is important and what the expected results of those metrics should be, let's calculate each metric for each individual audio file given in the dataset.

To do this, I'll be using the Librosa Library, looping through the audiofiles folder in our repo. We'll eventually join the metrics calculated using the Librosa library with the initial dataset provided in the first worksheet.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import pickle
import time
import os
import json


import librosa
from pydub import AudioSegment
import wave
import audioop
from scipy.stats import skew
from sklearn.preprocessing import normalize

### Importing Data

In [16]:
with open('pickles/df.pkl', 'rb') as f:
    df = pickle.load(f)
    
# with open('pickles/df_backup.pkl', 'rb') as f:
#     df_backup = pickle.load(f)   

df_meta = df

### Librosa Library

Librosa is a Python library that enables users to analyze and process audio signals. It provides tools for loading and saving audio files in various formats, manipulating audio waveforms, and extracting various audio features such as mel-frequency cepstral coefficients (MFCCs), spectral contrast, and chroma features. Librosa also provides tools for visualizing audio data using waveplots, spectrograms, and chroma visualizations, as well as for decomposing audio signals into components such as harmonic and percussive components. 

The package is built on top of NumPy and SciPy, and is commonly used in fields such as music information retrieval, speech recognition, and other applications that involve the analysis of audio signals.

In [3]:
folder_path = 'datasets/somos/audios/'

In [4]:
#df_librosa = pd.DataFrame(columns=['filename', 'mfccs', 'spectral_centroids_mean', 'spectral_bandwidth_mean', 'spectral_contrast_mean', 'zero_crossing_rate_mean', 'chroma_cens_mean', 'mel_spectrogram_mean'])

# Load background noise signal
bg_level = 0.05  # Adjust this to control the background noise level
bg_sr = 44100
bg_duration = 5
bg_samples = int(bg_sr * bg_duration)
bg_noise = np.random.uniform(-1, 1, size=bg_samples) * bg_level

for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path):
        wav = wave.open(file_path, 'rb')
        sr = wav.getframerate()
        data = wav.readframes(wav.getnframes())
        y, _ = librosa.load(file_path, sr=None, mono=True, dtype=np.float32)
        y = np.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0)

        # Perform peak normalization
        max_amp = 0.5
        peak = max(abs(y))
        if peak > 0:
            y = y * (max_amp / peak)

        # Perform data augmentation
        y = librosa.effects.preemphasis(y, coef=0.97)
        y = librosa.effects.pitch_shift(y, sr, n_steps=2)
        y = librosa.effects.time_stretch(y, rate=0.8)

        # Add background noise to the audio signal
        bg_start = np.random.randint(0, max(1, bg_samples - len(y)))
        bg_slice = bg_noise[bg_start : bg_start + len(y)]
        if bg_slice.shape[0] < y.shape[0]:
            bg_slice = np.tile(bg_slice, int(np.ceil(y.shape[0] / bg_slice.shape[0])))[:y.shape[0]]
        y = np.add(y, bg_slice)

        # Apply reverb to the augmented audio signal
        #y = audioop.reverb(y, sr, room_size=0.5)

        # Calculate features for the augmented audio data
        mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[:13]
        spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0].mean()
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0].mean()
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)[0].mean()
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)[0].mean()
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)[0].mean()
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000).mean()

        # Store feature values for augmented audio data in DataFrame
        librosa_dict = {
        'filename': filename,
        'mfccs': mfccs,
        'spectral_centroids': spectral_centroids,
        'spectral_bandwidth': spectral_bandwidth,
        'spectral_contrast': spectral_contrast,
        'zero_crossing_rate': zero_crossing_rate,
        'chroma_cens': chroma_cens,
        'mel_spectrogram': mel_spectrogram
    }
#     df_librosa = df_librosa.append(librosa_dict, ignore_index=True)

  y = librosa.effects.pitch_shift(y, sr, n_steps=2)
  0.01621181], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  y = librosa.effects.pitch_shift(y, sr, n_steps=2)
 -0.01313907], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.01822375], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.00868214], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.

  0.04538493], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.03992378], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.02681942], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.00232566], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.02660864], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result

 -0.02402928], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.00885352], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.03831215], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.01023872], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.04595517], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result

 -0.03808211], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.01328029], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.00120402], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.02876277], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.02223506], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result

 -0.02122389], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.00603473], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.02820518], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.02883324], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.04483196], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result

  0.01417161], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.03696659], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.04100632], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
 -0.0461096 ], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(y, sr, n_mels=40, n_fft=2048, hop_length=512, win_length=1024)[0][:13]
  0.04357428], sr=24000 as keyword args. From version 0.10 passing these as positional arguments will result

KeyboardInterrupt: 

In [17]:
df_librosa = df_backup

In [18]:
df_librosa = df_librosa.dropna(axis = 1)

In [19]:
df_librosa['filename'].sort_values(ascending= False)

10188     wiki_0100_180.wav
16304     wiki_0100_123.wav
16192     wiki_0100_121.wav
20017     wiki_0100_118.wav
2600      wiki_0100_113.wav
                ...        
743      LJ002-0181_079.wav
15778    LJ002-0181_070.wav
15751    LJ002-0181_058.wav
2028     LJ002-0181_046.wav
5578     LJ002-0181_033.wav
Name: filename, Length: 20100, dtype: object

In [20]:
df_librosa['utteranceId'] = df_librosa['filename'].apply(lambda x: os.path.splitext(x)[0])

In [21]:
df_meta

Unnamed: 0,utteranceId,choice,sentenceId,systemId,modelId,testpageId,locale,listenerId,isNative,wrongValidation,lowNatural,sameScores,highSynthetic,clean,listenerReliability,MOS
0,novel_2007_0098_015,4,novel_2007_0098,15,m1,0,gb,KEXM49572020611127,1,1,1,1,1,1,1.00,3.590156
1,novel_2007_0098_015,2,novel_2007_0098,15,m1,0,gb,ONJP34545176526892,0,1,1,1,1,1,1.00,3.590156
2,novel_2007_0098_015,2,novel_2007_0098,15,m1,0,gb,BRHE82716530388427,1,1,1,1,1,1,1.00,3.590156
3,novel_2007_0098_015,5,novel_2007_0098,15,m1,0,gb,IJWO22856390758743,1,1,1,1,1,1,1.00,3.590156
4,novel_2007_0098_015,5,novel_2007_0098,15,m1,0,gb,BFXL60735888172229,1,0,0,1,0,0,0.46,3.590156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339203,reportorial_2011_0141_160,3,reportorial_2011_0141,160,m5,993,us,SQZP20987165412056,1,1,1,1,0,0,0.71,3.436844
339204,reportorial_2011_0141_160,4,reportorial_2011_0141,160,m5,993,us,VOPC28548555753770,1,1,1,1,0,0,0.29,3.436844
339205,reportorial_2011_0141_160,4,reportorial_2011_0141,160,m5,993,us,QQXJ36618242908678,1,1,0,1,0,0,0.32,3.436844
339206,reportorial_2011_0141_160,2,reportorial_2011_0141,160,m5,993,us,KRQC01203593359590,1,1,1,1,1,1,0.44,3.436844


### Joining With MetaData

In [22]:
df = pd.merge(df_meta, df_librosa, on = 'utteranceId')
df.columns

Index(['utteranceId', 'choice', 'sentenceId', 'systemId', 'modelId',
       'testpageId', 'locale', 'listenerId', 'isNative', 'wrongValidation',
       'lowNatural', 'sameScores', 'highSynthetic', 'clean',
       'listenerReliability', 'MOS', 'filename', 'mfccs', 'spectral_centroids',
       'spectral_bandwidth', 'spectral_contrast', 'zero_crossing_rate',
       'chroma_cens', 'mel_spectrogram'],
      dtype='object')

In [23]:
with open('pickles/df.pkl', 'wb') as f:
    pickle.dump(df, f)
    
with open('pickles/df_librosa.pkl', 'wb') as f:
    pickle.dump(df_librosa, f)
    
with open('pickles/df_meta.pkl', 'wb') as f:
    pickle.dump(df_meta, f)