Sascha Spors,
Professorship Signal Theory and Digital Signal Processing,
Institute of Communications Engineering (INT),
Faculty of Computer Science and Electrical Engineering (IEF),
University of Rostock,
Germany

# Data Driven Audio Signal Processing - A Tutorial with Computational Examples

Winter Semester 2021/22 (Master Course #24512)

- lecture: https://github.com/spatialaudio/data-driven-audio-signal-processing-lecture
- tutorial: https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise

Feel free to contact lecturer frank.schultz@uni-rostock.de

# Exercise 2: Audio Features

## Objectives

## Special Python Package
- in this exercise we use the `pyloudnorm` package from https://github.com/csteinmetz1/pyloudnorm
- we might install it by `pip install pyloudnorm` 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pyloudnorm as pyln
import warnings
from matplotlib.cm import get_cmap
from matplotlib.colors import BoundaryNorm
from scipy.io import wavfile
from scipy.signal import resample, stft


def set_cmap(xmax, dbrange):
    """cmap for STFT 20log10(|X[t,f]|) in dB."""
    col_tick = np.linspace(xmax-dbrange, xmax, dbrange, endpoint=True)
    cmap = get_cmap('magma').copy()
    cmap.set_over('C3')
    cmap.set_under('C7')
    norm = BoundaryNorm(col_tick, cmap.N)
    return cmap, norm


warnings.filterwarnings('ignore')  # annoying scipy.io.wavfile.read warnings

N_OS = 8  # oversample factor


## Calculate BS.1170 Loudness, True Peak and Histogram 

In [None]:
folder = 'audio_ex02/'
files = os.listdir(folder)  # get file names in this folder
# print(files)

# for debug tests choose certain files
# files = [files[1], files[7]]
# files = [files[7]]

for i in files:
    if i[-4:] == '.wav':  # consider only wav files
        fs, x = wavfile.read(folder+i)

        # true peak in decibel true peak (dBTP)
        xr = resample(x, N_OS*x.shape[0])
        dbtp = 20*np.log10(np.max(np.abs(xr)))

        # normalize to desired dBTP
        dbtp_des = 0.0
        x *= 10**((dbtp_des - dbtp)/20)

        # check dBTP
        xr = resample(x, N_OS*x.shape[0])
        dbtp = 20*np.log10(np.max(np.abs(xr)))

        # measure loudness in
        # 'loudness units relative to full scale' (LUFS)
        # according to https://www.itu.int/rec/R-REC-BS.1770
        meter = pyln.Meter(fs)
        lufs = meter.integrated_loudness(x)

        msg = (
            f"{'file: ' + i:<30}"
            f"{'dBTP: '} {dbtp:+4.2f} \t"
            f"{'LUFS: '} {lufs:+4.2f}"
        )
        print(msg)

        plt.figure()
        plt.hist(x, range=(-1, 1), bins='auto', density=True)
        plt.title(
            f"{i}"
            f"{', dBTP:'} {dbtp:+4.2f}"
            f"{', LUFS:'} {lufs:+4.2f}"
        )
        plt.xlabel('sample value')
        plt.ylabel('density-like occurence')
        plt.savefig(folder+i[:-4]+'_LUFS_hist.png')
        

## STFT Magnitude Spectrum

In [None]:
# ch = 0  # take left channel
ch = 1  # take right channel

dbrange = 72

folder = 'audio_ex02/'
files = os.listdir(folder)  # get file names in this folder
# print(files)

# for debug tests choose certain files
# files = [files[1], files[7]]
# files = [files[7]]

for i in files:
    if i[-4:] == '.wav':  # consider only wav files
        fs, x = wavfile.read(folder+i)

        f, t, X = stft(x[:, ch], fs, window='hamming', nperseg=2**14, nfft=2**18)
        Xmaxdb = 20*np.log10(np.max(np.abs(X)))
        
        cbticks = np.arange(Xmaxdb-dbrange, Xmaxdb+6, 6)
        cmap, norm = set_cmap(Xmaxdb, dbrange)
        fig, ax = plt.subplots(figsize=(5, 5), nrows=1, ncols=1)
        srf = ax.pcolormesh(t, f, 20*np.log10(np.abs(X)), cmap=cmap, norm=norm)
        cax = plt.colorbar(srf, cmap= cmap, norm=norm, ticks=cbticks)
        ax.set_ylim(2e1, 2e4)
        plt.yscale('log')
        ax.set_title('STFT Magnitude: '+i)
        ax.set_ylabel('f / Hz')
        ax.set_xlabel('t / s')
        cax.ax.set_xlabel('dB')
        plt.savefig(folder+i[:-4]+'_STFT.png')


## Copyright

- the notebooks are provided as [Open Educational Resources](https://en.wikipedia.org/wiki/Open_educational_resources)
- feel free to use the notebooks for your own purposes
- the text is licensed under [Creative Commons Attribution 4.0](https://creativecommons.org/licenses/by/4.0/)
- the code of the IPython examples is licensed under under the [MIT license](https://opensource.org/licenses/MIT)
- please attribute the work as follows: *Frank Schultz, Data Driven Audio Signal Processing - A Tutorial Featuring Computational Examples, University of Rostock* ideally with relevant file(s), github URL https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise, commit number and/or version tag, year.
