# Uniform Quantization of an Audio Signal

This code is provided as supplementary material of the lecture Quellencodierung.

This code illustrates
* Uniform scalar quantization of an audio file

In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import shutil
from scipy.io import wavfile
import IPython.display as ipd

from ipywidgets import interactive
import ipywidgets as widgets

In [2]:
# plotting options 
font = {'size'   : 20}
plt.rc('font', **font)
plt.rc('text', usetex=True if shutil.which('latex') else False)

matplotlib.rc('figure', figsize=(18, 6) )

Load wave file and convert to mono if stereo

In [3]:
fs, data = wavfile.read('./audio/33711__acclivity__excessiveexposure.wav')
#fs, data = wavfile.read('./audio/E-Core - Pingouin-Banquise_45s.wav')
#fs, data = wavfile.read('./audio/KIMIKO ISHIZAKA - Goldberg Variations BWV 988 - 01 - Aria_45s.wav')


audiodata = np.array(data.astype(float))
# Convert audio to mono if is stereo
if audiodata.ndim > 1:
    audiodata = np.array((audiodata[:,0] + audiodata[:,1]) / 2)

time = np.arange(len(audiodata)) / fs

Uniform Quantization. The quantizer is given by
$$
\bar{b}[k] = \mathrm{sign}(b[k])\cdot \Delta \cdot \left(\left\lfloor\frac{|b[k]|}{\Delta}\right\rfloor+\frac{1}{2}\right)
$$
where $\lfloor x \rfloor$ denotes the larges integer smaller or equal than $x$.

In [4]:
def quantize(b, w):
    # b_max from signal
    b_max = np.max(np.abs(b))
       
    # saturation and clipping (slightly less than maximum value to avoid numerical issues)
    b_scaled = np.clip(b, -b_max*0.999, b_max*0.999)
    
    K = 2**w
    
    Delta = 2*b_max / K
    
    b_bar = np.sign(b_scaled)*Delta*(np.floor(np.abs(b_scaled)/Delta)+0.5) 
    return b_bar

In [8]:
def show_and_play(w):
    x = quantize(audiodata, w)
    
    b_max = np.max(np.abs(audiodata))
    Delta = 2*b_max / 2**w
    
    q_levels = np.linspace(-(2**w - 1)*Delta/2, (2**w - 1)*Delta/2, 2**w)
    
    
    ipd.display(ipd.Audio(x, rate=fs)) 
    
    plt.figure(figsize=(12,8))
    plt.subplot(2,1,1)
    plt.plot(time, audiodata)   
    if w < 7:
        plt.hlines(q_levels, time[0], time[-1], colors='k', linestyles='dotted', linewidth=1)
    plt.xlim((time[0], time[-1]))
    plt.ylim((-np.max(np.abs(audiodata)),np.max(np.abs(audiodata))))
    plt.xlabel('Time $t$ (s)', fontsize=20)
    plt.ylabel('$b[k]$', fontsize=20)
    plt.title('Signal and quantization levels', fontsize=20)
    
    plt.subplot(2,1,2)
    plt.plot(time, x)    
    if w < 7:
        plt.hlines(q_levels, time[0], time[-1], colors='k', linestyles='dotted', linewidth=1)
    plt.xlim((time[0], time[-1]))
    plt.ylim((-np.max(np.abs(audiodata)),np.max(np.abs(audiodata))))
    plt.xlabel('Time $t$ (s)', fontsize=20)
    plt.ylabel(r'$\bar{b}[k]$', fontsize=20)
    plt.title('Quantized signal', fontsize=20)
    plt.tight_layout()
    plt.show()
    #plt.savefig('audio_quantized_w%d.pdf' % w, bbox_inches='tight')
    
interactive_update = interactive(lambda w: show_and_play(w), \
                                w = widgets.IntSlider(min=1, max=12, step=1, value=4, continuous_update=False, description='w'))

output = interactive_update.children[-1]
output.layout.height = '850px'
interactive_update


interactive(children=(IntSlider(value=4, continuous_update=False, description='w', max=12, min=1), Output(layo…

In [7]:
# Schreibe Daten in eine Textdatei für w = 8

w = 8
b = audiodata
    
# b_max from signal
b_max = np.max(np.abs(b))
       
# saturation and clipping (slightly less than maximum value to avoid numerical issues)
b_scaled = np.clip(b, -b_max*0.999, b_max*0.999)
    
K = 2**w   
Delta = 2*b_max / K
    
# generate Binary file with 8bit integers
indices = (np.sign(b_scaled)*(np.floor(np.abs(b_scaled)/Delta) + 0.5) + K/2 + 0.5)
indices.astype('int8').tofile('indices.bin')