diff --git a/realbook/__init__.py b/realbook/__init__.py
index 3c114bf..193e4ea 100644
--- a/realbook/__init__.py
+++ b/realbook/__init__.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 
 __author__ = "Spotify"
-__version__ = "1.0.1"
+__version__ = "1.0.2"
 __email__ = "realbook@spotify.com"
 __description__ = "Python libraries for easier machine learning on audio"
 __url__ = "https://github.com/spotify/realbook"
diff --git a/realbook/callbacks/spectrogram_visualization.py b/realbook/callbacks/spectrogram_visualization.py
index fbca66c..e700a31 100644
--- a/realbook/callbacks/spectrogram_visualization.py
+++ b/realbook/callbacks/spectrogram_visualization.py
@@ -106,7 +106,7 @@ def on_train_begin(self, logs: Any = None) -> None:
 
             with self.tensorboard_writer.as_default():
                 # Pull n random batches from the dataset and send them to TensorBoard.
-                for (data, _) in self.example_batches:
+                for data, _ in self.example_batches:
                     assert tf.rank(data) == 2, "Expected input data to be of rank 2, with shape (batch, audio)."
                     assert tf.shape(data)[0] < tf.shape(data)[1], (
                         "Expected input data to be of rank 2, with shape (batch, audio), but got shape"
diff --git a/realbook/layers/signal.py b/realbook/layers/signal.py
index 95848ca..9499423 100644
--- a/realbook/layers/signal.py
+++ b/realbook/layers/signal.py
@@ -18,11 +18,11 @@
 import warnings
 from typing import Any, Callable, Dict, Optional, Union
 
-import librosa
 import tensorflow as tf
 import numpy as np
 
 from realbook.layers.math import log_base_b
+from realbook.vendor import librosa_filters
 
 
 def _create_padded_window(
@@ -209,7 +209,7 @@ def build(self, input_shape: tf.TensorShape) -> None:
             self.fft_length
         )  # type: ignore
 
-        self.window_sum = librosa.filters.window_sumsquare(
+        self.window_sum = librosa_filters.window_sumsquare(  # type: ignore
             window=self.window.numpy(),
             n_frames=input_shape[0] if input_shape.rank == 2 else input_shape[1],
             win_length=self.window_length,
@@ -353,7 +353,7 @@ def build(self, input_shape: tf.TensorShape) -> None:
         super().build(input_shape)
 
         self.mel_weight_matrix = tf.constant(
-            librosa.filters.mel(
+            librosa_filters.mel(  # type: ignore
                 sr=self.sample_rate,
                 n_fft=self.fft_length,
                 n_mels=self.n_mels,
diff --git a/realbook/vendor/__init__.py b/realbook/vendor/__init__.py
new file mode 100644
index 0000000..d984363
--- /dev/null
+++ b/realbook/vendor/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright 2023 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/realbook/vendor/librosa_filters.py b/realbook/vendor/librosa_filters.py
new file mode 100644
index 0000000..0a6d0f1
--- /dev/null
+++ b/realbook/vendor/librosa_filters.py
@@ -0,0 +1,918 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (c) 2013--2023, librosa development team.
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+
+# This is all copied from Librosa: don't bother type checking with MyPy.
+# type: ignore
+
+import warnings
+import numpy as np
+
+from numpy.typing import ArrayLike, DTypeLike
+from typing import Optional, Union, Tuple, Any, Callable, Sequence, TypeVar
+from typing_extensions import Literal
+
+_BoolLike_co = Union[bool, np.bool_]
+_IntLike_co = Union[_BoolLike_co, int, "np.integer[Any]"]
+_FloatLike_co = Union[_IntLike_co, float, "np.floating[Any]"]
+
+_WindowSpec = Union[str, Tuple[Any, ...], float, Callable[[int], np.ndarray], ArrayLike]
+_T = TypeVar("_T")
+_SequenceLike = Union[Sequence[_T], np.ndarray]
+_ScalarOrSequence = Union[_T, _SequenceLike[_T]]
+
+
+def tiny(x: Union[float, np.ndarray]) -> _FloatLike_co:
+    """Compute the tiny-value corresponding to an input's data type.
+
+    This is the smallest "usable" number representable in ``x.dtype``
+    (e.g., float32).
+
+    This is primarily useful for determining a threshold for
+    numerical underflow in division or multiplication operations.
+
+    Parameters
+    ----------
+    x : number or np.ndarray
+        The array to compute the tiny-value for.
+        All that matters here is ``x.dtype``
+
+    Returns
+    -------
+    tiny_value : float
+        The smallest positive usable number for the type of ``x``.
+        If ``x`` is integer-typed, then the tiny value for ``np.float32``
+        is returned instead.
+
+    See Also
+    --------
+    numpy.finfo
+
+    Examples
+    --------
+    For a standard double-precision floating point number:
+
+    >>> librosa.util.tiny(1.0)
+    2.2250738585072014e-308
+
+    Or explicitly as double-precision
+
+    >>> librosa.util.tiny(np.asarray(1e-5, dtype=np.float64))
+    2.2250738585072014e-308
+
+    Or complex numbers
+
+    >>> librosa.util.tiny(1j)
+    2.2250738585072014e-308
+
+    Single-precision floating point:
+
+    >>> librosa.util.tiny(np.asarray(1e-5, dtype=np.float32))
+    1.1754944e-38
+
+    Integer
+
+    >>> librosa.util.tiny(5)
+    1.1754944e-38
+    """
+
+    # Make sure we have an array view
+    x = np.asarray(x)
+
+    # Only floating types generate a tiny
+    if np.issubdtype(x.dtype, np.floating) or np.issubdtype(x.dtype, np.complexfloating):
+        dtype = x.dtype
+    else:
+        dtype = np.dtype(np.float32)
+
+    return np.finfo(dtype).tiny
+
+
+def normalize(
+    S: np.ndarray,
+    *,
+    norm: Optional[float] = np.inf,
+    axis: Optional[int] = 0,
+    threshold: Optional[_FloatLike_co] = None,
+    fill: Optional[bool] = None,
+) -> np.ndarray:
+    """Normalize an array along a chosen axis.
+
+    Given a norm (described below) and a target axis, the input
+    array is scaled so that::
+
+        norm(S, axis=axis) == 1
+
+    For example, ``axis=0`` normalizes each column of a 2-d array
+    by aggregating over the rows (0-axis).
+    Similarly, ``axis=1`` normalizes each row of a 2-d array.
+
+    This function also supports thresholding small-norm slices:
+    any slice (i.e., row or column) with norm below a specified
+    ``threshold`` can be left un-normalized, set to all-zeros, or
+    filled with uniform non-zero values that normalize to 1.
+
+    Note: the semantics of this function differ from
+    `scipy.linalg.norm` in two ways: multi-dimensional arrays
+    are supported, but matrix-norms are not.
+
+    Parameters
+    ----------
+    S : np.ndarray
+        The array to normalize
+
+    norm : {np.inf, -np.inf, 0, float > 0, None}
+        - `np.inf`  : maximum absolute value
+        - `-np.inf` : minimum absolute value
+        - `0`    : number of non-zeros (the support)
+        - float  : corresponding l_p norm
+            See `scipy.linalg.norm` for details.
+        - None : no normalization is performed
+
+    axis : int [scalar]
+        Axis along which to compute the norm.
+
+    threshold : number > 0 [optional]
+        Only the columns (or rows) with norm at least ``threshold`` are
+        normalized.
+
+        By default, the threshold is determined from
+        the numerical precision of ``S.dtype``.
+
+    fill : None or bool
+        If None, then columns (or rows) with norm below ``threshold``
+        are left as is.
+
+        If False, then columns (rows) with norm below ``threshold``
+        are set to 0.
+
+        If True, then columns (rows) with norm below ``threshold``
+        are filled uniformly such that the corresponding norm is 1.
+
+        .. note:: ``fill=True`` is incompatible with ``norm=0`` because
+            no uniform vector exists with l0 "norm" equal to 1.
+
+    Returns
+    -------
+    S_norm : np.ndarray [shape=S.shape]
+        Normalized array
+
+    Raises
+    ------
+    ValueError
+        If ``norm`` is not among the valid types defined above
+
+        If ``S`` is not finite
+
+        If ``fill=True`` and ``norm=0``
+
+    See Also
+    --------
+    scipy.linalg.norm
+
+    Notes
+    -----
+    This function caches at level 40.
+
+    Examples
+    --------
+    >>> # Construct an example matrix
+    >>> S = np.vander(np.arange(-2.0, 2.0))
+    >>> S
+    array([[-8.,  4., -2.,  1.],
+           [-1.,  1., -1.,  1.],
+           [ 0.,  0.,  0.,  1.],
+           [ 1.,  1.,  1.,  1.]])
+    >>> # Max (l-infinity)-normalize the columns
+    >>> librosa.util.normalize(S)
+    array([[-1.   ,  1.   , -1.   ,  1.   ],
+           [-0.125,  0.25 , -0.5  ,  1.   ],
+           [ 0.   ,  0.   ,  0.   ,  1.   ],
+           [ 0.125,  0.25 ,  0.5  ,  1.   ]])
+    >>> # Max (l-infinity)-normalize the rows
+    >>> librosa.util.normalize(S, axis=1)
+    array([[-1.   ,  0.5  , -0.25 ,  0.125],
+           [-1.   ,  1.   , -1.   ,  1.   ],
+           [ 0.   ,  0.   ,  0.   ,  1.   ],
+           [ 1.   ,  1.   ,  1.   ,  1.   ]])
+    >>> # l1-normalize the columns
+    >>> librosa.util.normalize(S, norm=1)
+    array([[-0.8  ,  0.667, -0.5  ,  0.25 ],
+           [-0.1  ,  0.167, -0.25 ,  0.25 ],
+           [ 0.   ,  0.   ,  0.   ,  0.25 ],
+           [ 0.1  ,  0.167,  0.25 ,  0.25 ]])
+    >>> # l2-normalize the columns
+    >>> librosa.util.normalize(S, norm=2)
+    array([[-0.985,  0.943, -0.816,  0.5  ],
+           [-0.123,  0.236, -0.408,  0.5  ],
+           [ 0.   ,  0.   ,  0.   ,  0.5  ],
+           [ 0.123,  0.236,  0.408,  0.5  ]])
+
+    >>> # Thresholding and filling
+    >>> S[:, -1] = 1e-308
+    >>> S
+    array([[ -8.000e+000,   4.000e+000,  -2.000e+000,
+              1.000e-308],
+           [ -1.000e+000,   1.000e+000,  -1.000e+000,
+              1.000e-308],
+           [  0.000e+000,   0.000e+000,   0.000e+000,
+              1.000e-308],
+           [  1.000e+000,   1.000e+000,   1.000e+000,
+              1.000e-308]])
+
+    >>> # By default, small-norm columns are left untouched
+    >>> librosa.util.normalize(S)
+    array([[ -1.000e+000,   1.000e+000,  -1.000e+000,
+              1.000e-308],
+           [ -1.250e-001,   2.500e-001,  -5.000e-001,
+              1.000e-308],
+           [  0.000e+000,   0.000e+000,   0.000e+000,
+              1.000e-308],
+           [  1.250e-001,   2.500e-001,   5.000e-001,
+              1.000e-308]])
+    >>> # Small-norm columns can be zeroed out
+    >>> librosa.util.normalize(S, fill=False)
+    array([[-1.   ,  1.   , -1.   ,  0.   ],
+           [-0.125,  0.25 , -0.5  ,  0.   ],
+           [ 0.   ,  0.   ,  0.   ,  0.   ],
+           [ 0.125,  0.25 ,  0.5  ,  0.   ]])
+    >>> # Or set to constant with unit-norm
+    >>> librosa.util.normalize(S, fill=True)
+    array([[-1.   ,  1.   , -1.   ,  1.   ],
+           [-0.125,  0.25 , -0.5  ,  1.   ],
+           [ 0.   ,  0.   ,  0.   ,  1.   ],
+           [ 0.125,  0.25 ,  0.5  ,  1.   ]])
+    >>> # With an l1 norm instead of max-norm
+    >>> librosa.util.normalize(S, norm=1, fill=True)
+    array([[-0.8  ,  0.667, -0.5  ,  0.25 ],
+           [-0.1  ,  0.167, -0.25 ,  0.25 ],
+           [ 0.   ,  0.   ,  0.   ,  0.25 ],
+           [ 0.1  ,  0.167,  0.25 ,  0.25 ]])
+    """
+
+    # Avoid div-by-zero
+    if threshold is None:
+        threshold = tiny(S)
+
+    elif threshold <= 0:
+        raise ValueError(f"threshold={threshold} must be strictly positive")
+
+    if fill not in [None, False, True]:
+        raise ValueError(f"fill={fill} must be None or boolean")
+
+    if not np.all(np.isfinite(S)):
+        raise ValueError("Input must be finite")
+
+    # All norms only depend on magnitude, let's do that first
+    mag = np.abs(S).astype(float)
+
+    # For max/min norms, filling with 1 works
+    fill_norm = 1
+
+    if norm is None:
+        return S
+
+    elif norm == np.inf:
+        length = np.max(mag, axis=axis, keepdims=True)
+
+    elif norm == -np.inf:
+        length = np.min(mag, axis=axis, keepdims=True)
+
+    elif norm == 0:
+        if fill is True:
+            raise ValueError("Cannot normalize with norm=0 and fill=True")
+
+        length = np.sum(mag > 0, axis=axis, keepdims=True, dtype=mag.dtype)
+
+    elif np.issubdtype(type(norm), np.number) and norm > 0:
+        length = np.sum(mag**norm, axis=axis, keepdims=True) ** (1.0 / norm)
+
+        if axis is None:
+            fill_norm = mag.size ** (-1.0 / norm)
+        else:
+            fill_norm = mag.shape[axis] ** (-1.0 / norm)
+
+    else:
+        raise ValueError(f"Unsupported norm: {repr(norm)}")
+
+    # indices where norm is below the threshold
+    small_idx = length < threshold
+
+    Snorm = np.empty_like(S)
+    if fill is None:
+        # Leave small indices un-normalized
+        length[small_idx] = 1.0
+        Snorm[:] = S / length
+
+    elif fill:
+        # If we have a non-zero fill value, we locate those entries by
+        # doing a nan-divide.
+        # If S was finite, then length is finite (except for small positions)
+        length[small_idx] = np.nan
+        Snorm[:] = S / length
+        Snorm[np.isnan(Snorm)] = fill_norm
+    else:
+        # Set small values to zero by doing an inf-divide.
+        # This is safe (by IEEE-754) as long as S is finite.
+        length[small_idx] = np.inf
+        Snorm[:] = S / length
+
+    return Snorm
+
+
+def fft_frequencies(*, sr: float = 22050, n_fft: int = 2048) -> np.ndarray:
+    """Alternative implementation of `np.fft.fftfreq`
+
+    Parameters
+    ----------
+    sr : number > 0 [scalar]
+        Audio sampling rate
+    n_fft : int > 0 [scalar]
+        FFT window size
+
+    Returns
+    -------
+    freqs : np.ndarray [shape=(1 + n_fft/2,)]
+        Frequencies ``(0, sr/n_fft, 2*sr/n_fft, ..., sr/2)``
+
+    Examples
+    --------
+    >>> librosa.fft_frequencies(sr=22050, n_fft=16)
+    array([     0.   ,   1378.125,   2756.25 ,   4134.375,
+             5512.5  ,   6890.625,   8268.75 ,   9646.875,  11025.   ])
+
+    """
+
+    return np.fft.rfftfreq(n=n_fft, d=1.0 / sr)
+
+
+def hz_to_mel(frequencies: _ScalarOrSequence[_FloatLike_co], *, htk: bool = False) -> np.ndarray:
+    """Convert Hz to Mels
+
+    Examples
+    --------
+    >>> librosa.hz_to_mel(60)
+    0.9
+    >>> librosa.hz_to_mel([110, 220, 440])
+    array([ 1.65,  3.3 ,  6.6 ])
+
+    Parameters
+    ----------
+    frequencies : number or np.ndarray [shape=(n,)] , float
+        scalar or array of frequencies
+    htk : bool
+        use HTK formula instead of Slaney
+
+    Returns
+    -------
+    mels : number or np.ndarray [shape=(n,)]
+        input frequencies in Mels
+
+    See Also
+    --------
+    mel_to_hz
+    """
+
+    frequencies = np.asanyarray(frequencies)
+
+    if htk:
+        mels: np.ndarray = 2595.0 * np.log10(1.0 + frequencies / 700.0)
+        return mels
+
+    # Fill in the linear part
+    f_min = 0.0
+    f_sp = 200.0 / 3
+
+    mels = (frequencies - f_min) / f_sp
+
+    # Fill in the log-scale part
+
+    min_log_hz = 1000.0  # beginning of log region (Hz)
+    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
+    logstep = np.log(6.4) / 27.0  # step size for log region
+
+    if frequencies.ndim:
+        # If we have array data, vectorize
+        log_t = frequencies >= min_log_hz
+        mels[log_t] = min_log_mel + np.log(frequencies[log_t] / min_log_hz) / logstep
+    elif frequencies >= min_log_hz:
+        # If we have scalar data, heck directly
+        mels = min_log_mel + np.log(frequencies / min_log_hz) / logstep
+
+    return mels
+
+
+def mel_to_hz(mels: _ScalarOrSequence[_FloatLike_co], *, htk: bool = False) -> np.ndarray:
+    """Convert mel bin numbers to frequencies
+
+    Examples
+    --------
+    >>> librosa.mel_to_hz(3)
+    200.
+
+    >>> librosa.mel_to_hz([1,2,3,4,5])
+    array([  66.667,  133.333,  200.   ,  266.667,  333.333])
+
+    Parameters
+    ----------
+    mels : np.ndarray [shape=(n,)], float
+        mel bins to convert
+    htk : bool
+        use HTK formula instead of Slaney
+
+    Returns
+    -------
+    frequencies : np.ndarray [shape=(n,)]
+        input mels in Hz
+
+    See Also
+    --------
+    hz_to_mel
+    """
+
+    mels = np.asanyarray(mels)
+
+    if htk:
+        return 700.0 * (10.0 ** (mels / 2595.0) - 1.0)
+
+    # Fill in the linear scale
+    f_min = 0.0
+    f_sp = 200.0 / 3
+    freqs = f_min + f_sp * mels
+
+    # And now the nonlinear scale
+    min_log_hz = 1000.0  # beginning of log region (Hz)
+    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
+    logstep = np.log(6.4) / 27.0  # step size for log region
+
+    if mels.ndim:
+        # If we have vector data, vectorize
+        log_t = mels >= min_log_mel
+        freqs[log_t] = min_log_hz * np.exp(logstep * (mels[log_t] - min_log_mel))
+    elif mels >= min_log_mel:
+        # If we have scalar data, check directly
+        freqs = min_log_hz * np.exp(logstep * (mels - min_log_mel))
+
+    return freqs
+
+
+def mel_frequencies(n_mels: int = 128, *, fmin: float = 0.0, fmax: float = 11025.0, htk: bool = False) -> np.ndarray:
+    """Compute an array of acoustic frequencies tuned to the mel scale.
+
+    The mel scale is a quasi-logarithmic function of acoustic frequency
+    designed such that perceptually similar pitch intervals (e.g. octaves)
+    appear equal in width over the full hearing range.
+
+    Because the definition of the mel scale is conditioned by a finite number
+    of subjective psychoaoustical experiments, several implementations coexist
+    in the audio signal processing literature [#]_. By default, librosa replicates
+    the behavior of the well-established MATLAB Auditory Toolbox of Slaney [#]_.
+    According to this default implementation,  the conversion from Hertz to mel is
+    linear below 1 kHz and logarithmic above 1 kHz. Another available implementation
+    replicates the Hidden Markov Toolkit [#]_ (HTK) according to the following formula::
+
+        mel = 2595.0 * np.log10(1.0 + f / 700.0).
+
+    The choice of implementation is determined by the ``htk`` keyword argument: setting
+    ``htk=False`` leads to the Auditory toolbox implementation, whereas setting it ``htk=True``
+    leads to the HTK implementation.
+
+    .. [#] Umesh, S., Cohen, L., & Nelson, D. Fitting the mel scale.
+        In Proc. International Conference on Acoustics, Speech, and Signal Processing
+        (ICASSP), vol. 1, pp. 217-220, 1998.
+
+    .. [#] Slaney, M. Auditory Toolbox: A MATLAB Toolbox for Auditory
+        Modeling Work. Technical Report, version 2, Interval Research Corporation, 1998.
+
+    .. [#] Young, S., Evermann, G., Gales, M., Hain, T., Kershaw, D., Liu, X.,
+        Moore, G., Odell, J., Ollason, D., Povey, D., Valtchev, V., & Woodland, P.
+        The HTK book, version 3.4. Cambridge University, March 2009.
+
+    See Also
+    --------
+    hz_to_mel
+    mel_to_hz
+    librosa.feature.melspectrogram
+    librosa.feature.mfcc
+
+    Parameters
+    ----------
+    n_mels : int > 0 [scalar]
+        Number of mel bins.
+    fmin : float >= 0 [scalar]
+        Minimum frequency (Hz).
+    fmax : float >= 0 [scalar]
+        Maximum frequency (Hz).
+    htk : bool
+        If True, use HTK formula to convert Hz to mel.
+        Otherwise (False), use Slaney's Auditory Toolbox.
+
+    Returns
+    -------
+    bin_frequencies : ndarray [shape=(n_mels,)]
+        Vector of ``n_mels`` frequencies in Hz which are uniformly spaced on the Mel
+        axis.
+
+    Examples
+    --------
+    >>> librosa.mel_frequencies(n_mels=40)
+    array([     0.   ,     85.317,    170.635,    255.952,
+              341.269,    426.586,    511.904,    597.221,
+              682.538,    767.855,    853.173,    938.49 ,
+             1024.856,   1119.114,   1222.042,   1334.436,
+             1457.167,   1591.187,   1737.532,   1897.337,
+             2071.84 ,   2262.393,   2470.47 ,   2697.686,
+             2945.799,   3216.731,   3512.582,   3835.643,
+             4188.417,   4573.636,   4994.285,   5453.621,
+             5955.205,   6502.92 ,   7101.009,   7754.107,
+             8467.272,   9246.028,  10096.408,  11025.   ])
+
+    """
+
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    min_mel = hz_to_mel(fmin, htk=htk)
+    max_mel = hz_to_mel(fmax, htk=htk)
+
+    mels = np.linspace(min_mel, max_mel, n_mels)
+
+    hz: np.ndarray = mel_to_hz(mels, htk=htk)
+    return hz
+
+
+def mel(
+    *,
+    sr: float,
+    n_fft: int,
+    n_mels: int = 128,
+    fmin: float = 0.0,
+    fmax: Optional[float] = None,
+    htk: bool = False,
+    norm: Optional[Union[Literal["slaney"], float]] = "slaney",
+    dtype: DTypeLike = np.float32,
+) -> np.ndarray:
+    """Create a Mel filter-bank.
+
+    This produces a linear transformation matrix to project
+    FFT bins onto Mel-frequency bins.
+
+    Parameters
+    ----------
+    sr : number > 0 [scalar]
+        sampling rate of the incoming signal
+
+    n_fft : int > 0 [scalar]
+        number of FFT components
+
+    n_mels : int > 0 [scalar]
+        number of Mel bands to generate
+
+    fmin : float >= 0 [scalar]
+        lowest frequency (in Hz)
+
+    fmax : float >= 0 [scalar]
+        highest frequency (in Hz).
+        If `None`, use ``fmax = sr / 2.0``
+
+    htk : bool [scalar]
+        use HTK formula instead of Slaney
+
+    norm : {None, 'slaney', or number} [scalar]
+        If 'slaney', divide the triangular mel weights by the width of the mel band
+        (area normalization).
+
+        If numeric, use `librosa.util.normalize` to normalize each filter by to unit l_p norm.
+        See `librosa.util.normalize` for a full description of supported norm values
+        (including `+-np.inf`).
+
+        Otherwise, leave all the triangles aiming for a peak value of 1.0
+
+    dtype : np.dtype
+        The data type of the output basis.
+        By default, uses 32-bit (single-precision) floating point.
+
+    Returns
+    -------
+    M : np.ndarray [shape=(n_mels, 1 + n_fft/2)]
+        Mel transform matrix
+
+    See Also
+    --------
+    librosa.util.normalize
+
+    Notes
+    -----
+    This function caches at level 10.
+
+    Examples
+    --------
+    >>> melfb = librosa.filters.mel(sr=22050, n_fft=2048)
+    >>> melfb
+    array([[ 0.   ,  0.016, ...,  0.   ,  0.   ],
+           [ 0.   ,  0.   , ...,  0.   ,  0.   ],
+           ...,
+           [ 0.   ,  0.   , ...,  0.   ,  0.   ],
+           [ 0.   ,  0.   , ...,  0.   ,  0.   ]])
+
+    Clip the maximum frequency to 8KHz
+
+    >>> librosa.filters.mel(sr=22050, n_fft=2048, fmax=8000)
+    array([[ 0.  ,  0.02, ...,  0.  ,  0.  ],
+           [ 0.  ,  0.  , ...,  0.  ,  0.  ],
+           ...,
+           [ 0.  ,  0.  , ...,  0.  ,  0.  ],
+           [ 0.  ,  0.  , ...,  0.  ,  0.  ]])
+
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots()
+    >>> img = librosa.display.specshow(melfb, x_axis='linear', ax=ax)
+    >>> ax.set(ylabel='Mel filter', title='Mel filter bank')
+    >>> fig.colorbar(img, ax=ax)
+    """
+
+    if fmax is None:
+        fmax = float(sr) / 2
+
+    # Initialize the weights
+    n_mels = int(n_mels)
+    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)
+
+    # Center freqs of each FFT bin
+    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)
+
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)
+
+    fdiff = np.diff(mel_f)
+    ramps = np.subtract.outer(mel_f, fftfreqs)
+
+    for i in range(n_mels):
+        # lower and upper slopes for all bins
+        lower = -ramps[i] / fdiff[i]
+        upper = ramps[i + 2] / fdiff[i + 1]
+
+        # .. then intersect them with each other and zero
+        weights[i] = np.maximum(0, np.minimum(lower, upper))
+
+    if isinstance(norm, str):
+        if norm == "slaney":
+            # Slaney-style mel is scaled to be approx constant energy per channel
+            enorm = 2.0 / (mel_f[2 : n_mels + 2] - mel_f[:n_mels])
+            weights *= enorm[:, np.newaxis]
+        else:
+            raise ValueError(f"Unsupported norm={norm}")
+    else:
+        weights = normalize(weights, norm=norm, axis=-1)
+
+    # Only check weights if f_mel[0] is positive
+    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
+        # This means we have an empty channel somewhere
+        warnings.warn(
+            "Empty filters detected in mel frequency basis. "
+            "Some channels will produce empty responses. "
+            "Try increasing your sampling rate (and fmax) or "
+            "reducing n_mels.",
+            stacklevel=2,
+        )
+
+    return weights
+
+
+def pad_center(data: np.ndarray, *, size: int, axis: int = -1, **kwargs: Any) -> np.ndarray:
+    """Pad an array to a target length along a target axis.
+
+    This differs from `np.pad` by centering the data prior to padding,
+    analogous to `str.center`
+
+    Examples
+    --------
+    >>> # Generate a vector
+    >>> data = np.ones(5)
+    >>> librosa.util.pad_center(data, size=10, mode='constant')
+    array([ 0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.])
+
+    >>> # Pad a matrix along its first dimension
+    >>> data = np.ones((3, 5))
+    >>> librosa.util.pad_center(data, size=7, axis=0)
+    array([[ 0.,  0.,  0.,  0.,  0.],
+           [ 0.,  0.,  0.,  0.,  0.],
+           [ 1.,  1.,  1.,  1.,  1.],
+           [ 1.,  1.,  1.,  1.,  1.],
+           [ 1.,  1.,  1.,  1.,  1.],
+           [ 0.,  0.,  0.,  0.,  0.],
+           [ 0.,  0.,  0.,  0.,  0.]])
+    >>> # Or its second dimension
+    >>> librosa.util.pad_center(data, size=7, axis=1)
+    array([[ 0.,  1.,  1.,  1.,  1.,  1.,  0.],
+           [ 0.,  1.,  1.,  1.,  1.,  1.,  0.],
+           [ 0.,  1.,  1.,  1.,  1.,  1.,  0.]])
+
+    Parameters
+    ----------
+    data : np.ndarray
+        Vector to be padded and centered
+    size : int >= len(data) [scalar]
+        Length to pad ``data``
+    axis : int
+        Axis along which to pad and center the data
+    **kwargs : additional keyword arguments
+        arguments passed to `np.pad`
+
+    Returns
+    -------
+    data_padded : np.ndarray
+        ``data`` centered and padded to length ``size`` along the
+        specified axis
+
+    Raises
+    ------
+    ValueError
+        If ``size < data.shape[axis]``
+
+    See Also
+    --------
+    numpy.pad
+    """
+
+    kwargs.setdefault("mode", "constant")
+
+    n = data.shape[axis]
+
+    lpad = int((size - n) // 2)
+
+    lengths = [(0, 0)] * data.ndim
+    lengths[axis] = (lpad, int(size - n - lpad))
+
+    if lpad < 0:
+        raise ValueError(f"Target size ({size:d}) must be at least input size ({n:d})")
+
+    return np.pad(data, lengths, **kwargs)
+
+
+def __window_ss_fill(x, win_sq, n_frames, hop_length):  # pragma: no cover
+    """Helper function for window sum-square calculation."""
+
+    n = len(x)
+    n_fft = len(win_sq)
+    for i in range(n_frames):
+        sample = i * hop_length
+        x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
+
+
+def get_window(
+    window: _WindowSpec,
+    Nx: int,
+    *,
+    fftbins: Optional[bool] = True,
+) -> np.ndarray:
+    """Compute a window function.
+
+    This is a wrapper for `scipy.signal.get_window` that additionally
+    supports callable or pre-computed windows.
+
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        The window specification:
+
+        - If string, it's the name of the window function (e.g., `'hann'`)
+        - If tuple, it's the name of the window function and any parameters
+          (e.g., `('kaiser', 4.0)`)
+        - If numeric, it is treated as the beta parameter of the `'kaiser'`
+          window, as in `scipy.signal.get_window`.
+        - If callable, it's a function that accepts one integer argument
+          (the window length)
+        - If list-like, it's a pre-computed window of the correct length `Nx`
+
+    Nx : int > 0
+        The length of the window
+
+    fftbins : bool, optional
+        If True (default), create a periodic window for use with FFT
+        If False, create a symmetric window for filter design applications.
+
+    Returns
+    -------
+    get_window : np.ndarray
+        A window of length `Nx` and type `window`
+
+    See Also
+    --------
+    scipy.signal.get_window
+
+    Notes
+    -----
+    This function caches at level 10.
+
+    Raises
+    ------
+    ValueError
+        If `window` is supplied as a vector of length != `n_fft`,
+        or is otherwise mis-specified.
+    """
+    if callable(window):
+        return window(Nx)
+
+    elif isinstance(window, (str, tuple)) or np.isscalar(window):
+        # TODO: if we add custom window functions in librosa, call them here
+
+        try:
+            import scipy
+        except ImportError:
+            raise NotImplementedError("Scipy not included in Realbook's vendored Librosa code.")
+
+        win: np.ndarray = scipy.signal.get_window(window, Nx, fftbins=fftbins)
+        return win
+
+    elif isinstance(window, (np.ndarray, list)):
+        if len(window) == Nx:
+            return np.asarray(window)
+
+        raise ValueError(f"Window size mismatch: {len(window):d} != {Nx:d}")
+    else:
+        raise ValueError(f"Invalid window specification: {window!r}")
+
+
+def window_sumsquare(
+    *,
+    window: _WindowSpec,
+    n_frames: int,
+    hop_length: int = 512,
+    win_length: Optional[int] = None,
+    n_fft: int = 2048,
+    dtype: DTypeLike = np.float32,
+    norm: Optional[float] = None,
+) -> np.ndarray:
+    """Compute the sum-square envelope of a window function at a given hop length.
+
+    This is used to estimate modulation effects induced by windowing observations
+    in short-time Fourier transforms.
+
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        Window specification, as in `get_window`
+    n_frames : int > 0
+        The number of analysis frames
+    hop_length : int > 0
+        The number of samples to advance between frames
+    win_length : [optional]
+        The length of the window function.  By default, this matches ``n_fft``.
+    n_fft : int > 0
+        The length of each analysis frame.
+    dtype : np.dtype
+        The data type of the output
+    norm : {np.inf, -np.inf, 0, float > 0, None}
+        Normalization mode used in window construction.
+        Note that this does not affect the squaring operation.
+
+    Returns
+    -------
+    wss : np.ndarray, shape=``(n_fft + hop_length * (n_frames - 1))``
+        The sum-squared envelope of the window function
+
+    Examples
+    --------
+    For a fixed frame length (2048), compare modulation effects for a Hann window
+    at different hop lengths:
+
+    >>> n_frames = 50
+    >>> wss_256 = librosa.filters.window_sumsquare(window='hann', n_frames=n_frames, hop_length=256)
+    >>> wss_512 = librosa.filters.window_sumsquare(window='hann', n_frames=n_frames, hop_length=512)
+    >>> wss_1024 = librosa.filters.window_sumsquare(window='hann', n_frames=n_frames, hop_length=1024)
+
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(nrows=3, sharey=True)
+    >>> ax[0].plot(wss_256)
+    >>> ax[0].set(title='hop_length=256')
+    >>> ax[1].plot(wss_512)
+    >>> ax[1].set(title='hop_length=512')
+    >>> ax[2].plot(wss_1024)
+    >>> ax[2].set(title='hop_length=1024')
+    """
+    if win_length is None:
+        win_length = n_fft
+
+    n = n_fft + hop_length * (n_frames - 1)
+    x = np.zeros(n, dtype=dtype)
+
+    # Compute the squared window at the desired length
+    win_sq = get_window(window, win_length)
+    win_sq = normalize(win_sq, norm=norm) ** 2
+    win_sq = pad_center(win_sq, size=n_fft)
+
+    # Fill the envelope
+    __window_ss_fill(x, win_sq, n_frames, hop_length)
+
+    return x
diff --git a/setup.cfg b/setup.cfg
index 660e65c..67ad185 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.1
+current_version = 1.0.2
 commit = True
 tag = True
 
@@ -35,20 +35,20 @@ include_package_data = True
 install_requires = 
 	tensorflow>=2.4; sys_platform != 'darwin' or platform.machine != 'arm64'
 	tensorflow-macos>=2.4; sys_platform == 'darwin' and platform.machine == 'arm64'
-	tensorboard>=2.4
-	librosa>=0.9,<0.10
+	tensorboard
 	types-protobuf
+	numpy
+	typing_extensions
 
 [options.extras_require]
 dev = 
 	realbook[tensorboard,test]
 	bumpversion>=0.5.3
-	ipython
-	ipdb
 tensorboard = 
 	matplotlib
 	psutil
 	nvsmi
+	librosa>=0.9,<0.10
 test = 
 	coverage>=5.0.2
 	pytest>=7.1.1
@@ -57,8 +57,13 @@ test =
 	tox
 	torch
 	nnaudio
+	numpy==1.21.6
+	librosa>=0.9,<0.10
+	tensorflow>=2.4,<2.11; sys_platform != 'darwin' or platform.machine != 'arm64'
+	tensorflow-macos>=2.4,<2.11; sys_platform == 'darwin' and platform.machine == 'arm64'
 
 [bumpversion:file:realbook/__init__.py]
 
 [bdist_wheel]
 universal = 1
+
diff --git a/tests/callbacks/test_spectrogram_visualization.py b/tests/callbacks/test_spectrogram_visualization.py
index 7762978..62d514c 100644
--- a/tests/callbacks/test_spectrogram_visualization.py
+++ b/tests/callbacks/test_spectrogram_visualization.py
@@ -17,11 +17,19 @@
 
 from typing import Any
 
+import platform
 import pytest
 import numpy as np
 import tensorflow as tf
 
-from realbook.callbacks.spectrogram_visualization import SpectrogramVisualizationCallback
+try:
+    from realbook.callbacks.spectrogram_visualization import SpectrogramVisualizationCallback
+except ImportError as e:
+    if "numpy.core.multiarray failed to import" in str(e) and platform.system() == "Windows":
+        SpectrogramVisualizationCallback = None  # type: ignore
+    else:
+        raise
+
 from realbook.layers.signal import Spectrogram
 
 
@@ -52,6 +60,10 @@ def flush(self) -> None:
 TEST_AUDIO = np.linspace(0, 1, num=DEFAULT_SAMPLE_RATE * 10)
 
 
+@pytest.mark.skipif(
+    SpectrogramVisualizationCallback is None,
+    reason="SpectrogramVisualizationCallback import fails on this platform",
+)
 def test_spectrogram_visualization_callback() -> None:
     fake_data = tf.data.Dataset.zip(
         (
@@ -80,6 +92,10 @@ def test_spectrogram_visualization_callback() -> None:
     assert True
 
 
+@pytest.mark.skipif(
+    SpectrogramVisualizationCallback is None,
+    reason="SpectrogramVisualizationCallback import fails on this platform",
+)
 def test_callback_fails_on_unbatched_input() -> None:
     fake_data = tf.data.Dataset.zip(
         (
@@ -110,6 +126,10 @@ def test_callback_fails_on_unbatched_input() -> None:
     assert "shape" in str(excinfo.value)
 
 
+@pytest.mark.skipif(
+    SpectrogramVisualizationCallback is None,
+    reason="SpectrogramVisualizationCallback import fails on this platform",
+)
 def test_callback_logs_but_doesnt_throw_by_default(caplog: pytest.LogCaptureFixture) -> None:
     fake_data = tf.data.Dataset.zip(
         (
@@ -133,6 +153,10 @@ def test_callback_logs_but_doesnt_throw_by_default(caplog: pytest.LogCaptureFixt
     assert "shape" in caplog.text
 
 
+@pytest.mark.skipif(
+    SpectrogramVisualizationCallback is None,
+    reason="SpectrogramVisualizationCallback import fails on this platform",
+)
 def test_fails_on_no_image_like_layers() -> None:
     fake_data = tf.data.Dataset.zip(
         (
@@ -162,6 +186,10 @@ def test_fails_on_no_image_like_layers() -> None:
     assert "spectrogram" in str(excinfo.value)
 
 
+@pytest.mark.skipif(
+    SpectrogramVisualizationCallback is None,
+    reason="SpectrogramVisualizationCallback import fails on this platform",
+)
 def test_flexible_with_input_shapes() -> None:
     fake_data = tf.data.Dataset.zip(
         (
@@ -192,6 +220,10 @@ def test_flexible_with_input_shapes() -> None:
     assert True
 
 
+@pytest.mark.skipif(
+    SpectrogramVisualizationCallback is None,
+    reason="SpectrogramVisualizationCallback import fails on this platform",
+)
 def test_keras_functional_api_with_tfop_lambda() -> None:
     fake_data = tf.data.Dataset.zip(
         (
diff --git a/tests/layers/test_nnaudio.py b/tests/layers/test_nnaudio.py
index 9726d01..8e0ce86 100644
--- a/tests/layers/test_nnaudio.py
+++ b/tests/layers/test_nnaudio.py
@@ -19,50 +19,27 @@
 import torch
 import numpy as np
 import pytest
-import librosa
-import librosa.display
+import platform
 
-from typing import List, Tuple, Union
+try:
+    import librosa
+    from realbook.layers import nnaudio as our_nnaudio
+    from nnAudio.Spectrogram import CQT2010v2
+except ImportError as e:
+    if "numpy.core.multiarray failed to import" in str(e) and platform.system() == "Windows":
+        librosa = None
+        our_nnaudio = None  # type: ignore
+        CQT2010v2 = None
+    else:
+        raise
 
-from realbook.layers import nnaudio as our_nnaudio
-from nnAudio.Spectrogram import CQT2010v2
-
-TEST_SAMPLE_RATE = 22050
+from typing import Tuple, Union
 
 
-# Test using this model directly, as well as wrapping it in a Lambda layer.
-def get_parameterized_model_variants(
-    match_torch_exactly_values: Tuple[bool, bool] = (True, False)
-) -> List[tf.keras.layers.Layer]:
-    possible_models = [
-        our_nnaudio.CQT(match_torch_exactly=v, trainable=trainable)
-        for v in match_torch_exactly_values
-        for trainable in (True, False)
-    ]
-
-    return [
-        item
-        for models in [
-            [tf.keras.Sequential([tf.keras.layers.InputLayer((TEST_SAMPLE_RATE,)), model])]
-            + (
-                [
-                    tf.keras.Sequential(
-                        [
-                            tf.keras.layers.InputLayer((TEST_SAMPLE_RATE,)),
-                            tf.keras.layers.Lambda(lambda x: model(x)),
-                        ]
-                    )
-                ]
-                # Using a layer with trainable weights inside a Lambda layer isn't supported.
-                if not model.trainable
-                else []
-            )
-            for model in possible_models
-        ]
-        for item in models
-    ]
+TEST_SAMPLE_RATE = 22050
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "match_torch_exactly,threshold,trainable",
     (
@@ -83,12 +60,14 @@ def test_cqt(match_torch_exactly: bool, threshold: float, trainable: bool) -> No
 
 
 def build_layer(
-    layer: tf.keras.layers.Layer, input_shape: Union[Tuple[int], Tuple[int, int]] = (1, TEST_SAMPLE_RATE)
+    layer: tf.keras.layers.Layer,
+    input_shape: Union[Tuple[int], Tuple[int, int]] = (1, TEST_SAMPLE_RATE),
 ) -> tf.keras.layers.Layer:
     layer.build(input_shape)
     return layer
 
 
+@pytest.mark.skipif(our_nnaudio is None, reason="nnaudio failed to import on this platform.")
 def test_cqt_trainable_weights() -> None:
     assert not build_layer(our_nnaudio.CQT(trainable=False)).trainable
     assert not build_layer(our_nnaudio.CQT(trainable=False)).trainable_weights
@@ -98,6 +77,8 @@ def test_cqt_trainable_weights() -> None:
     assert len(build_layer(our_nnaudio.CQT(trainable=True)).trainable_weights) == 2
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
+@pytest.mark.skipif(our_nnaudio is None, reason="nnaudio failed to import on this platform.")
 @pytest.mark.parametrize("train", (True, False))
 def test_cqt_trainable_layers_change_on_training(train: bool) -> None:
     # Make a model that's trainable, then train it and ensure the weights change from the default.
diff --git a/tests/layers/test_signal.py b/tests/layers/test_signal.py
index b1f7dfb..c9c8419 100644
--- a/tests/layers/test_signal.py
+++ b/tests/layers/test_signal.py
@@ -17,12 +17,20 @@
 
 from typing import Optional, Union, List
 
-import librosa
+import platform
 import numpy as np
 import pytest
 import tensorflow as tf
-from librosa.core.spectrum import _spectrogram
-from librosa.feature.spectral import melspectrogram
+
+try:
+    import librosa
+    from librosa.core.spectrum import _spectrogram
+    from librosa.feature.spectral import melspectrogram
+except ImportError as e:
+    if "numpy.core.multiarray failed to import" in str(e) and platform.system() == "Windows":
+        librosa = None
+    else:
+        raise
 
 from realbook.layers import signal
 
@@ -39,6 +47,7 @@ def test_stft_channels_should_raise() -> None:
         )(x)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "center,input_length,fft_length,hop_length,win_length",
     [
@@ -68,6 +77,7 @@ def test_stft(center: bool, input_length: int, fft_length: int, hop_length: int,
     assert np.allclose(librosa_stft.imag, rgp_stft.imag, atol=1e-3, rtol=0)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 def test_stft_batch() -> None:
     x = np.random.normal(0, 1, 1024)
     librosa_stft = librosa.stft(
@@ -105,6 +115,7 @@ def test_istft_channels_should_raise() -> None:
         )(tf.expand_dims(x_stft, -1))
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "center,input_length,fft_length,hop_length,win_length",
     [
@@ -185,6 +196,7 @@ def test_istft_batch() -> None:
     assert np.allclose(x, np.squeeze(x_istft), atol=1e-3, rtol=0)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 def test_spectrogram() -> None:
     x = np.random.normal(0, 1, 1024).astype(np.float32)
     librosa_spec, _ = _spectrogram(
@@ -205,6 +217,7 @@ def test_spectrogram() -> None:
     assert np.allclose(librosa_spec, rgp_spec, atol=1e-2, rtol=0)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "center,normalization,fmin,htk",
     [
@@ -248,6 +261,7 @@ def test_mel_spectrogram(
     assert np.allclose(librosa_spec, rgp_spec, atol=1e-2, rtol=0)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "input_spec",
     [
@@ -287,6 +301,7 @@ def test_magnitude(input_spec: Optional[List[np.complex64]]) -> None:
     assert np.allclose(np_magnitude, layer_magnitude, atol=1e-3, rtol=0)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "input_spec",
     [
@@ -326,6 +341,7 @@ def test_phase(input_spec: Optional[List[np.complex64]]) -> None:
     assert np.allclose(np_phase, layer_phase, atol=1e-3, rtol=0)
 
 
+@pytest.mark.skipif(librosa is None, reason="Librosa failed to import on this platform.")
 @pytest.mark.parametrize(
     "ref,amin,top_db",
     [