In [None]:
import librosa

mp3_file = "captain-scurvy.mp3"
wav_file = "fsm-team-escp-stardrive.wav"

In [None]:
mp3, mp3_sr = librosa.load(mp3_file)
mp3

In [None]:
wav, wav_sr = librosa.load(wav_file)
wav

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display

plt.figure(figsize=(14, 5))

## MP3 WaveForm

In [None]:
librosa.display.waveplot(mp3)

## WAV WaveForm

In [None]:
librosa.display.waveplot(wav)

A spectrogram is a visual representation of the spectrum of frequencies of a signal as it varies with time. They are time-frequency portraits of signals. Using a spectrogram, we can see how energy levels (dB) vary over time.

## MP3 Spectogarm

In [None]:
mp3_transformed = librosa.stft(mp3)
mp3_db = librosa.amplitude_to_db(abs(mp3_transformed))

plt.figure(figsize=(20, 5))
librosa.display.specshow(mp3_db, x_axis="time", y_axis="hz")
plt.colorbar()

## WAV Spectogram

In [None]:
wav_transformed = librosa.stft(wav)
wav_db = librosa.amplitude_to_db(abs(wav_transformed))

plt.figure(figsize=(20, 5))
librosa.display.specshow(wav_db, x_axis="time", y_axis="hz")
plt.colorbar()

## Normalizing Volume

In [None]:
import sklearn

def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)

librosa.display.waveplot(wav, alpha=0.7)


In [None]:
librosa.display.waveplot(normalize(wav), alpha=0.7)

## Pre-emphasis

Boosting only the signal’s high-frequency components, while leaving the low-frequency components in their original states. This is done in order to compensate the high-frequency section, which is suppressed naturally when humans make sounds



In [None]:
# The next process is super expensive - my poor 2015 laptop running WSL 2 crashed
# Importing a smaller wav and making plans to buy a new PC (it's about damn time)

small_wav, small_wav_sr = librosa.load("strange_wobble.wav")
librosa.display.waveplot(small_wav)
   

In [None]:
import numpy as np

small_wav_preemph = librosa.effects.preemphasis(small_wav)

spectogram_original = librosa.amplitude_to_db(np.abs(librosa.stft(small_wav)), ref=np.max)
spectogram_preemph = librosa.amplitude_to_db(np.abs(librosa.stft(small_wav_preemph)), ref=np.max)

librosa.display.specshow(spectogram_original, y_axis='log', x_axis='time')
plt.title('Original signal')

In [None]:
librosa.display.specshow(spectogram_preemph, y_axis='log', x_axis='time')
plt.title('Pre-emphasized signal')

# Extracting Features

## Zero Crossing Rate

The number times over a given interval that the signal’s amplitude crosses a value of zero. Essentially, it denotes the number of times the signal changes sign from positive to negative in the given time period. If the count of zero crossings is higher for a given signal, the signal is said to change rapidly, which implies that the signal contains the high-frequency information, and vice-versa


In [None]:
small_wav

In [None]:
n0 = 9000
n1 = 9100
slice = small_wav[n0: n1]


plt.figure(figsize=(20, 5))
plt.plot(slice)
plt.grid()

zero_crossing = librosa.zero_crossings(small_wav, pad=False)
zero_crossing.shape

## Spectral Rolloff

The rolloff frequency is defined as the frequency under which the cutoff of the total energy of the spectrum is contained, eg. 85%. It can be used to distinguish between harmonic and noisy sounds

In [None]:
# Approximate maximum frequencies with roll_percent=0.85 (default)
rolloff = librosa.feature.spectral_rolloff(small_wav)
print(rolloff)

# Approximate minimum frequencies with roll_percent=0.1
rolloff = librosa.feature.spectral_rolloff(y=small_wav, sr=small_wav_sr, roll_percent=0.1)
print(rolloff)



## Chroma Frequencies

The entire spectrum is projected onto 12 bins representing the 12 distinct semitones (or chroma) of the musical octave. The human perception of pitch is periodic in the sense that two pitches are perceived as similar if they differ by one or several octaves (where 1 octave=12 pitches)


In [None]:
hop_length = 512

chromagram = librosa.feature.chroma_stft(small_wav, sr=small_wav_sr, hop_length=hop_length)

plt.figure(figsize=(15, 5))
librosa.display.specshow(chromagram, x_axis="time", y_axis="chroma", hop_length=hop_length, cmap="coolwarm")

## Stuff you learn only after reading the docs

In [None]:
librosa.util.list_examples()

In [None]:
sample = librosa.util.example("nutcracker")
nutcracker, nutcracker_sr = librosa.load(sample)
librosa.display.waveplot(nutcracker)