# Rollers Pitch-Shifting Algorithm

The "Rollers" pitch-shifting algorithm is based on narrow subband frequency shifting.
For yielding low latencies, an IIR filter bank is used.


## Prototype Design

* constant Q IIR filter bank
    - third-octave butterworth filter bank
    - with 28 bands

* frequency shifting per band using single sideband modulation
    - analytical signal via `scipy.signal.hilbert()`
    - modulation with a complex sinusoid

In [None]:
# import the needed modules
import numpy as np
from scipy import signal
from scipy.io import wavfile
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 3]
from scipy.fft import rfft, fft, rfftfreq, fftfreq
import IPython.display as ipd

In [None]:
# let's define a function for plotting the magnitude spectrum of a signal
def plot_mag_spec(sig, fs, name="", negative=False, f_range=(20, 20000), db_range=(-40, 0), f_log=True):
    """
    Plot the magnitude spectrum of the signal 'sig'.
    sig:    signal to be analyzed
    fs:     sampling rate of sig
    name:   name of the plotted spectrum
    negative: show negative frequencies (default: False)
    f_range: define a frequency range for the spectrum (default: 20Hz to 20kHz)
    db_range: define a magnitude range in dB for the spectrum (default: 0dB to 100dB)
    f_log:  logarithmic or linear frequency axis? (default: True is logarithmic)
    """
    w = signal.hann(sig.size) # window
    if negative == False:
        # just positive frequencies
        freq = rfftfreq(sig.size, 1 / fs)
        mag = 20*np.log10(np.abs((1/sig.size)*rfft(sig*w)))
        if f_log == True:
            plt.semilogx(freq, mag)
        else:
            plt.plot(freq, mag)
        plt.xlim(f_range)
    else:
        # positive and negative frequencies
        freq = fftfreq(sig.size, 1 / fs)
        mag = 20*np.log10(np.abs((1/sig.size)*fft(sig*w)))
        if f_log == True:
            freq = freq[1:] # omit 0Hz
            mag = mag[1:]
        plt.plot(freq, mag)
        plt.xlim((-f_range[1], f_range[1]))
        if f_log == True:
            plt.xscale('symlog')
    plt.ylim(db_range)
    plt.xlabel("f [Hz]")
    plt.ylabel("amplitude [dB]")
    plt.title(name)
    plt.show()

Let's define classes for an object oriented design for the filter bank and the frequency shifter.

In [None]:
class Filterbank:
    """A  constant Q IIR butterworth filter bank for audio signal processing"""
    
    def __init__(self, n, order, fs, filt_type=""):
        self.n = n
        self.order = order
        self.fs = fs
        
        if filt_type == "third octave":
            # third-octave filter bank
            freq_offset = 2
            k = np.arange(n + 2) - n // 2 - freq_offset

            # center frequencies are defined relative to a bandpass with center frequency at 1kHz
            f_cs = np.power(2, k / 3) * 1000

            f_chs = [] # high cutoff frequencies
            f_cls = [] # low cutoff frequencies
            for k in range(1, f_cs.size-1):
                f_chs.append(np.sqrt(f_cs[k] * f_cs[k+1]))
                f_cls.append(np.sqrt(f_cs[k-1] * f_cs[k]))
        else:
            # log spaced filter bank
            # 2**4 = 16Hz
            # 14.3 = 20171Hz
            f = np.logspace(4, 14.3, n*3, base=2)

            # low cutoff frequencies
            f_cls = f[::3]

            # center frequencies
            f_cs = f[1::3]

            # high cutoff frequencies
            f_chs = f[2::3]

        filters = []
        for k in range(f_cs.size-2):
            sos = self.butter_bp(f_cls[k], f_chs[k])
            filters.append(sos)
        
        self.fcs = f_cs[1:-1]
        self.filters = filters
    
    def butter_bp(self, lowcut, highcut):
        """
        Design a butterworth bandpass filter and return the 'sos' filter.
        lowcut: low cutoff frequency
        highcut: high cutoff frequency
        """
        f_nyq = 0.5 * self.fs
        low = lowcut / f_nyq
        high = highcut / f_nyq
        return signal.butter(self.order, [low, high], btype='band', output='sos')
    
    def plot_filters(self):
        """Plot the magnitude spectrum of all filter of the filter bank."""
        for sos in self.filters:
            w, h = signal.sosfreqz(sos, worN=10000)
            plt.semilogx((self.fs * 0.5 / np.pi) * w[1:], 20*np.log10(np.abs(h[1:])))
            plt.ylim((-100, 5))
            plt.xlim((10, 20000))
            plt.ylabel('H [dB]')
            plt.xlabel('f [Hz]')
            plt.title('third-octave filter bank')
        plt.show()
    
    def filt(self, in_sig):
        """Filter the in_sig and return an array of the filtered signals."""
        filtered_signals = []
        for sos in self.filters:
            filtered_signals.append(signal.sosfilt(sos, in_sig))
        return filtered_signals

# test the Filterbank class
#filtb = Filterbank(28, 4, 44100)
#filtb.plot_filters()
#noise = np.random.normal(0, 1, 10*1024)
#filtered_noise = filtb.filt(noise)
#plot_mag_spec(filtered_noise[10], 44100, db_range=[-100, -30])

In [None]:
class Frequency_shifter:
    """A frequency shifter for audio processing using single sideband moduation"""
    
    def shift(self, in_sig, fs, f_shift):
        """Do frequency shifting of the signal 'in_sig' by the freuency 'f_shift'."""
        # the carrier is a complex exponential
        t = np.linspace(0, in_sig.size/fs, in_sig.size)
        carrier = np.exp(1j*2*np.pi*f_shift*t)
        return (signal.hilbert(in_sig) * carrier).real

# test the Frequency_shifter class
#filtb = Filterbank(28, 4, 44100)
#noise = np.random.normal(0, 1, 10*1024)
#filtered_noise = filtb.filt(noise)
#plot_mag_spec(filtered_noise[20], 44100, "original", db_range=[-100, -30], f_log=False)
#
#f_shifter = Frequency_shifter()
#shifted_filtered_noise = f_shifter.shift(filtered_noise[20], 44100, 5000)
#plot_mag_spec(shifted_filtered_noise, 44100, "shifted", db_range=[-100, -30], f_log=False)

Now to the actual signal processing using the defined classes from above.

In [None]:
# load audio material
fs, x = wavfile.read("../../samples/Toms_diner.wav")
x = x / np.abs(x.max())

plt.plot(x)
plt.title("original")
ipd.Audio(x, rate=fs)

In [None]:
# pitch shift ratio
psr = 2

filt_bank = Filterbank(28, 4, fs, filt_type="third octave")
freq_shifter = Frequency_shifter()

# filtering by the filter bank
x_filtered = filt_bank.filt(x)

# frequency shifting in every band
out_signals = []
for i in range(len(x_filtered)):
    fc = filt_bank.fcs[i]
    f_shift = fc * psr - fc
    out_signals.append(freq_shifter.shift(x_filtered[i], fs, f_shift))

# add them together
out_sig = np.zeros(out_signals[0].size)
for sig in out_signals:
    out_sig += sig

In [None]:
# plot
plt.plot(out_sig)
plt.title("'Rollers' pitch shifted")
ipd.Audio(out_sig, rate=fs)

With the very coarse filter bank for this application, the result is shifted in pitch, but it also has inharmonic frequency content and sounds like a chorus effect, since the frequency bands are too wide.
This is an interesting artifact, but to increase the sound quality, we have to design a filter bank with a lot more filters.
The paper suggests $n=73$ filters for low quality and $n=1500$ for for perfect quality.
*Good results* can be achieved with $n=200$ bands.


## A design with a larger filter bank



In [None]:
# define the rollers algorithm
def rollers(x, psr, fs, n, order):
    """
    Rollers pitch-shifting algorithm.
    x: input signal to be pitch-shifted
    psr: pitch shifting ratio (1: prim, 2: octave up, 0.5: octave down, etc.)
    fs:  sampling rate of x
    n: number of frequency bands of the filter bank
    order: order of the butterworth filters in the filter bank
    """
    filt_bank = Filterbank(n, order, fs)
    freq_shifter = Frequency_shifter()

    # filtering by the filter bank
    x_filtered = filt_bank.filt(x)

    # frequency shifting in every band
    out_signals = []
    for i in range(len(x_filtered)):
        fc = filt_bank.fcs[i]
        f_shift = fc * psr - fc
        out_signals.append(freq_shifter.shift(x_filtered[i], fs, f_shift))

    # add them together
    y = np.zeros(out_signals[0].size)
    for sig in out_signals:
        y += sig
    return y

In [None]:
psr = 1.25
order = 4
n = 200

out_sig = rollers(x, psr, fs, n, order)

In [None]:
# plot
plt.plot(out_sig)
plt.title("'Rollers' pitch shifted")
ipd.Audio(out_sig, rate=fs)

There are stronger downward chirp artifacts with $n=200$, but the inharmonicities and the chorus type effect is far less prominant.

In [None]:
# Let's try a lower filter order
psr = 1.25
order = 2
n = 200

out_sig = rollers(x, psr, fs, n, order)

In [None]:
# plot
plt.plot(out_sig)
plt.title("'Rollers' pitch shifted")
ipd.Audio(out_sig, rate=fs)