In [1]:
import librosa
import librosa.display
from split_transients import transients_from_sound_file, transients_from_midi
import matplotlib.pyplot as plt
import numpy as np

In [13]:
soundFile = '../sound-files/first-four-seconds.wav'
midiFile = '../midi/first-four-seconds.mid'

sr = librosa.get_samplerate(soundFile)
midiTimes, midiSamples = transients_from_midi(midiFile, soundFile)
transientTimes, transientSamples = transients_from_sound_file(soundFile)

Short-time Fourier transform (STFT).

The STFT represents a signal in the time-frequency domain by computing discrete Fourier transforms (DFT) over short overlapping windows.

This function returns a complex-valued matrix D such that
    
    np.abs(D[f, t]) is the magnitude of frequency bin f at frame t, and
    
    np.angle(D[f, t]) is the phase of frequency bin f at frame t.

The integers t and f can be converted to physical units by means of the utility functions frames_to_sample and fft_frequencies.

In [18]:
S = [np.abs(librosa.stft(y)) for y in transientSamples[0:3]]
with open("S.txt", "w") as file:
    file.write(S)
print(S)

[array([[5.4624897e-01, 2.4768096e-01, 1.4556460e-01, ..., 5.7867712e-01,
        4.5554334e-01, 5.4170746e-01],
       [2.2452552e+00, 1.5994334e+00, 1.1147156e+00, ..., 2.4819484e-01,
        3.9017826e-01, 2.6280621e-01],
       [4.1671777e+00, 3.8175552e+00, 3.0202076e+00, ..., 6.3510722e-01,
        6.0488397e-01, 8.2328826e-01],
       ...,
       [4.5242737e-04, 2.0710655e-04, 2.2738833e-04, ..., 1.3031939e-04,
        2.1871932e-04, 3.3146524e-04],
       [5.5690703e-04, 3.5128437e-04, 3.1929201e-04, ..., 2.3604959e-04,
        4.4018662e-04, 5.8832398e-04],
       [5.9645117e-04, 2.4223674e-04, 3.1842748e-04, ..., 2.6871334e-04,
        2.3753567e-04, 6.7172351e-04]], dtype=float32), array([[5.28261960e-01, 3.40773761e-01, 3.64203081e-02, ...,
        4.94968325e-01, 3.40660214e-02, 6.10241592e-02],
       [1.47714257e+00, 8.36813867e-01, 6.81992233e-01, ...,
        1.36160457e+00, 7.20932424e-01, 5.77513456e-01],
       [2.13315511e+00, 2.37118530e+00, 2.42295027e+00, ...,
 

Display a Spectrogram

Other Spectral Representation

istft(stft_matrix\[, hop_length, win_length, …\]): Inverse short-time Fourier transform (ISTFT).

reassigned_spectrogram(y\[, sr, S, n_fft, …\]): Time-frequency reassigned spectrogram.

cqt(y\[, sr, hop_length, fmin, n_bins, …\]): Compute the constant-Q transform of an audio signal.

icqt(C\[, sr, hop_length, fmin, …\]): Compute the inverse constant-Q transform.

hybrid_cqt(y\[, sr, hop_length, fmin, …\]): Compute the hybrid constant-Q transform of an audio signal.

pseudo_cqt(y\[, sr, hop_length, fmin, …\]): Compute the pseudo constant-Q transform of an audio signal.

vqt(y\[, sr, hop_length, fmin, n_bins, …\]): Compute the variable-Q transform of an audio signal.

iirt(y\[, sr, win_length, hop_length, …\]): Time-frequency representation using IIR filters

fmt(y\[, t_min, n_fmt, kind, beta, …\]): The fast Mellin transform (FMT)

magphase(D\[, power\]): Separate a complex-valued spectrogram D into its magnitude (S) and phase (P) components, so that D = S * P.

In [9]:
for stft in range(len(S)):
    fig, ax = plt.subplots()
    img = librosa.display.specshow(librosa.amplitude_to_db(S[stft], ref=np.max), y_axis='log', x_axis='time', ax=ax)
    ax.set_title('Power spectrogram' + str(stft))
    fig.colorbar(img, ax=ax, format="%+2.0f dB")

KeyboardInterrupt: 

In [12]:
rms = [librosa.feature.rms(S=s) for s in S]
print(rms)

[array([[0.00429413, 0.00384224, 0.00338724, 0.0028724 , 0.00228636,
        0.00276139, 0.00298986, 0.00289107, 0.00315976, 0.00269093]]), array([[0.00291965, 0.00305441, 0.00401216, 0.00320351, 0.00291339,
        0.00276227, 0.00296722, 0.00290575, 0.00390277, 0.00355683,
        0.00272943]]), array([[0.00282906, 0.00228784, 0.00133313, 0.00089199, 0.00076035,
        0.00069476, 0.00062414, 0.00197207, 0.00340721]]), array([[0.00446064, 0.0055251 , 0.00611241, 0.00612323, 0.00572041,
        0.0057641 , 0.00571464, 0.00568629, 0.00526499, 0.00486434,
        0.00446898, 0.00405936]]), array([[0.00448394, 0.00411636, 0.00388532, 0.00338925, 0.0026411 ,
        0.00166554, 0.00123548, 0.00231988, 0.00255702, 0.00217523,
        0.00250295, 0.00224835]]), array([[0.00385085, 0.00316517, 0.00322562, 0.00303316, 0.00287986,
        0.00200742, 0.00237705, 0.00251053, 0.00330807, 0.00322049,
        0.00227288]]), array([[0.00177823, 0.00187517, 0.00197392, 0.00180171, 0.00175348,
     

In [16]:
centroids = [librosa.feature.spectral_centroid(y=y, sr=sr) for y in transientSamples]
print(centroids)

[array([[2162.17322044, 2244.38481973, 2224.86084737, 2184.82947721,
        2134.44378149, 2162.09206232, 2411.75879016, 2839.34718661,
        2652.48061739, 2679.86105619]]), array([[2776.06175546, 2608.00825785, 2045.91961584, 1885.75710635,
        1894.13433458, 1998.32327491, 1931.34797485, 2048.03978704,
        1719.74582678, 1980.35129966, 2321.43878781]]), array([[1676.61172704, 1643.76212351, 1629.6757366 , 1876.26572623,
        1944.43001912, 2344.11440805, 3411.8412324 , 4356.8072871 ,
        4373.13974623]]), array([[4293.28112215, 3795.33190315, 3168.91037167, 3388.498056  ,
        3632.54714163, 3676.32506642, 3580.27735209, 3595.293699  ,
        3600.43892592, 3202.81372472, 2824.49474979, 2657.94001958]]), array([[2447.15538666, 2434.2874017 , 2090.14178014, 2061.63327754,
        1890.59040487, 1884.20782788, 2058.13088779, 1761.4262888 ,
        2268.15352069, 2445.27661272, 2267.20876274, 2207.71005211]]), array([[1612.09893112, 1672.90883401, 1181.52179297,  

In [17]:
mfccs = [librosa.feature.mfcc(y=y, sr=sr) for y in transientSamples]
print(mfccs)

5.01197767e+00,
        -9.77977812e-02,  2.33585835e+00, -7.62857103e+00,
        -1.42265472e+01, -1.44108486e+01, -6.34838057e+00,
         7.18356943e+00,  6.25568247e+00,  5.67472219e-01,
        -3.27755809e+00],
       [ 2.65037465e+00,  8.73551178e+00,  1.38274555e+01,
         1.71054344e+01,  1.95783539e+01,  2.52506371e+01,
         2.58503342e+01,  2.30438652e+01,  1.57013474e+01,
         1.45158634e+01,  1.23559990e+01,  8.27306938e+00,
         4.98283100e+00],
       [-1.00222282e+01, -7.63079453e+00, -3.87188244e+00,
        -8.98261738e+00, -1.24867001e+01, -1.06669960e+01,
        -9.44159222e+00, -9.89570999e+00, -1.24445038e+01,
        -5.36530018e+00,  2.07193184e+00,  2.37079692e+00,
        -2.48349762e+00],
       [ 3.11457014e+00,  8.59628105e+00,  1.45811348e+01,
         1.51149197e+01,  4.28073740e+00, -1.14298325e+01,
        -1.51694412e+01, -1.09431458e+01, -2.00556111e+00,
         8.98732567e+00,  1.44348640e+01,  1.62094002e+01,
         7.84238720e+