In [2]:

# ==========================================
# CELL 1: PITCH ANALYSIS (MEL-SPECTROGRAM)
# ==========================================
import librosa
import numpy as np
import plotly.graph_objects as go
from IPython.display import Audio, display
import os

# 1. Download the raw .wav file from GitHub
url = "https://raw.githubusercontent.com/ronniross/biosignal-translator/main/taxonomy/cellular-life/eukarya/animalia/chordata/vertebrata/avians/passeriformes/passeri/parulidae/parkesia/parkesia-noveboracensis-northern-waterthrush/northern-naterthrush-1.wav"
file_name = "northern-waterthrush.wav"

if not os.path.exists(file_name):
    !wget -q {url} -O {file_name}
    print("Audio file downloaded successfully.")

# 2. Load the audio file
y, sr = librosa.load(file_name, sr=None)
print("Listen to the original audio:")
display(Audio(y, rate=sr))

# 3. Calculate Mel-Spectrogram (Focuses on Perceived Pitch)
hop_length = 512
# n_mels defines the resolution of the Y-axis (Pitch)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=hop_length)
# Convert power to Decibels (Z-axis / Loudness)
S_db = librosa.power_to_db(S, ref=np.max)

# 4. Generate the X (Time) and Y (Pitch/Frequency) axes
times = librosa.frames_to_time(np.arange(S_db.shape[1]), sr=sr, hop_length=hop_length)
frequencies = librosa.mel_frequencies(n_mels=128, fmin=0.0, fmax=sr/2)

# Memory Management: Downsample time slightly so the 3D browser plot doesn't lag
if len(times) > 600:
    step = len(times) // 400
    S_db = S_db[:, ::step]
    times = times[::step]

# 5. Render Interactive 3D Plotly Surface
fig = go.Figure(data=[go.Surface(z=S_db, x=times, y=frequencies, colorscale='Viridis')])

fig.update_layout(
    title='3D Acoustic Landscape: PITCH (Mel-Spectrogram)',
    scene=dict(
        xaxis_title='X: Time (s)',
        yaxis_title='Y: Pitch (Mel-Frequency Hz)',
        zaxis_title='Z: Amplitude (dB)',
        camera=dict(eye=dict(x=1.5, y=-1.5, z=1.2)) # Sets default viewing angle
    ),
    autosize=False,
    width=900, height=700,
    margin=dict(l=0, r=0, b=0, t=40)
)

# Display in Colab
fig.show()

Audio file downloaded successfully.
Listen to the original audio:


In [3]:
# ==========================================
# CELL 2: TONE ANALYSIS (CONSTANT-Q TRANSFORM)
# ==========================================
import librosa
import numpy as np
import plotly.graph_objects as go
import os

# 1. Ensure audio exists (will skip downloading if Cell 1 was run)
url = "https://raw.githubusercontent.com/ronniross/biosignal-translator/main/taxonomy/cellular-life/eukarya/animalia/chordata/vertebrata/avians/passeriformes/passeri/parulidae/parkesia/parkesia-noveboracensis-northern-waterthrush/northern-naterthrush-1.wav"
file_name = "northern-waterthrush.wav"

if not os.path.exists(file_name):
    !wget -q {url} -O {file_name}

# 2. Load the audio file
y, sr = librosa.load(file_name, sr=None)

# 3. Calculate Constant-Q Transform (Focuses on Tone, Harmonics, and Musical spacing)
hop_length = 512
# Set minimum frequency to C2 (~65.4 Hz) to catch standard lower tones
fmin = librosa.note_to_hz('C2')
# Calculate CQT
C = np.abs(librosa.cqt(y, sr=sr, fmin=fmin, n_bins=84, hop_length=hop_length))
# Convert magnitude to Decibels (Z-axis / Loudness)
C_db = librosa.amplitude_to_db(C, ref=np.max)

# 4. Generate the X (Time) and Y (Tone Frequencies) axes
times = librosa.frames_to_time(np.arange(C_db.shape[1]), sr=sr, hop_length=hop_length)
cqt_freqs = librosa.cqt_frequencies(n_bins=84, fmin=fmin)

# Memory Management: Downsample time slightly so the 3D browser plot doesn't lag
if len(times) > 600:
    step = len(times) // 400
    C_db = C_db[:, ::step]
    times = times[::step]

# 5. Render Interactive 3D Plotly Surface (using 'Plasma' heatmap colors for contrast)
fig2 = go.Figure(data=[go.Surface(z=C_db, x=times, y=cqt_freqs, colorscale='Plasma')])

fig2.update_layout(
    title='3D Acoustic Landscape: TONE/TIMBRE (Constant-Q Transform)',
    scene=dict(
        xaxis_title='X: Time (s)',
        yaxis_title='Y: Tone (Harmonic Frequencies Hz)',
        zaxis_title='Z: Amplitude (dB)',
        camera=dict(eye=dict(x=1.5, y=-1.5, z=1.2)) # Sets default viewing angle
    ),
    autosize=False,
    width=900, height=700,
    margin=dict(l=0, r=0, b=0, t=40)
)

# Display in Colab
fig2.show()