# Pitch and Timbre Analysis

This notebook demonstrates how to analyze pitch and timbre characteristics of audio files using the CTC-SpeechRefinement package. We'll explore various features related to pitch (fundamental frequency) and timbre (sound quality) that are important for speech analysis.

## Setup

First, let's import the necessary libraries and set up the environment.

In [None]:
# Add the project root to the Python path
import sys
import os
sys.path.append(os.path.abspath('..'))

# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import pandas as pd
import seaborn as sns
from IPython.display import Audio, display
import glob
from pathlib import Path

# Import from the project
from ctc_speech_refinement.core.preprocessing.audio import preprocess_audio
from ctc_speech_refinement.core.eda.pitch_timbre import analyze_pitch_timbre

# Set up plotting
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['figure.dpi'] = 100

## Load Audio Data

Let's load an audio file and examine its basic properties.

In [None]:
# Define the path to an audio file
audio_file = "../data/test1/test1_01.wav"  # Update this path to your audio file

# Load the audio file
audio_data, sample_rate = librosa.load(audio_file, sr=None)

# Print basic information
print(f"Audio file: {audio_file}")
print(f"Sample rate: {sample_rate} Hz")
print(f"Duration: {len(audio_data) / sample_rate:.2f} seconds")
print(f"Number of samples: {len(audio_data)}")

# Play the audio
display(Audio(audio_data, rate=sample_rate))

## 1. Pitch Analysis

Let's analyze the pitch (fundamental frequency) of the audio signal.

In [None]:
# Compute pitch using librosa's piptrack
n_fft = 2048
hop_length = 512
pitches, magnitudes = librosa.piptrack(y=audio_data, sr=sample_rate, n_fft=n_fft, hop_length=hop_length)
pitch_times = librosa.times_like(pitches[0], sr=sample_rate, hop_length=hop_length)

# Extract the pitch with highest magnitude at each time
pitch = []
for t in range(pitches.shape[1]):
    index = magnitudes[:, t].argmax()
    pitch.append(pitches[index, t])

# Plot pitch
plt.figure(figsize=(14, 5))
plt.semilogy(pitch_times, pitch)
plt.title('Pitch (Fundamental Frequency)')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.tight_layout()
plt.show()

In [None]:
# Compute pitch using PYIN algorithm
f0, voiced_flag, voiced_probs = librosa.pyin(audio_data, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=sample_rate)
f0_times = librosa.times_like(f0, sr=sample_rate)

# Plot PYIN pitch
plt.figure(figsize=(14, 5))
plt.semilogy(f0_times, f0, label='f0')
plt.title('Pitch (PYIN Algorithm)')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.tight_layout()
plt.show()

In [None]:
# Compute pitch statistics
# Filter out NaN values
f0_clean = f0[~np.isnan(f0)]

if len(f0_clean) > 0:
    pitch_stats = {
        'Mean': np.mean(f0_clean),
        'Median': np.median(f0_clean),
        'Std Dev': np.std(f0_clean),
        'Min': np.min(f0_clean),
        'Max': np.max(f0_clean),
        'Range': np.max(f0_clean) - np.min(f0_clean),
        'Voiced Percentage': np.mean(voiced_flag) * 100
    }

    # Display pitch statistics
    for stat, value in pitch_stats.items():
        print(f"{stat}: {value:.2f}")
else:
    print("No valid pitch values detected.")

## 2. Pitch Contour Analysis

Let's analyze the pitch contour in more detail.

In [None]:
# Compute pitch contour features
if len(f0_clean) > 0:
    # Compute first derivative (pitch velocity)
    pitch_velocity = np.diff(f0_clean)
    
    # Compute second derivative (pitch acceleration)
    pitch_acceleration = np.diff(pitch_velocity)
    
    # Plot pitch contour and its derivatives
    plt.figure(figsize=(14, 12))
    
    plt.subplot(3, 1, 1)
    plt.plot(f0_clean)
    plt.title('Pitch Contour')
    plt.xlabel('Frame')
    plt.ylabel('Frequency (Hz)')
    
    plt.subplot(3, 1, 2)
    plt.plot(pitch_velocity)
    plt.title('Pitch Velocity (First Derivative)')
    plt.xlabel('Frame')
    plt.ylabel('Frequency Change (Hz/frame)')
    
    plt.subplot(3, 1, 3)
    plt.plot(pitch_acceleration)
    plt.title('Pitch Acceleration (Second Derivative)')
    plt.xlabel('Frame')
    plt.ylabel('Velocity Change (Hz/frame²)')
    
    plt.tight_layout()
    plt.show()
    
    # Compute pitch contour statistics
    pitch_contour_stats = {
        'Velocity Mean': np.mean(pitch_velocity),
        'Velocity Std Dev': np.std(pitch_velocity),
        'Velocity Max': np.max(np.abs(pitch_velocity)),
        'Acceleration Mean': np.mean(pitch_acceleration),
        'Acceleration Std Dev': np.std(pitch_acceleration),
        'Acceleration Max': np.max(np.abs(pitch_acceleration))
    }
    
    # Display pitch contour statistics
    print("Pitch Contour Statistics:")
    for stat, value in pitch_contour_stats.items():
        print(f"{stat}: {value:.2f}")
else:
    print("No valid pitch values detected for contour analysis.")

## 3. Timbre Analysis with MFCCs

Mel-Frequency Cepstral Coefficients (MFCCs) are widely used for timbre analysis.

In [None]:
# Compute MFCCs
n_mfcc = 13
mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)

# Plot MFCCs
plt.figure(figsize=(14, 5))
librosa.display.specshow(mfccs, sr=sample_rate, x_axis='time', hop_length=hop_length)
plt.colorbar()
plt.title('MFCCs')
plt.tight_layout()
plt.show()

In [None]:
# Compute MFCC statistics
mfcc_means = np.mean(mfccs, axis=1)
mfcc_stds = np.std(mfccs, axis=1)

# Plot MFCC statistics
plt.figure(figsize=(14, 8))

plt.subplot(2, 1, 1)
plt.bar(range(n_mfcc), mfcc_means)
plt.title('MFCC Means')
plt.xlabel('MFCC Coefficient')
plt.ylabel('Mean Value')

plt.subplot(2, 1, 2)
plt.bar(range(n_mfcc), mfcc_stds)
plt.title('MFCC Standard Deviations')
plt.xlabel('MFCC Coefficient')
plt.ylabel('Standard Deviation')

plt.tight_layout()
plt.show()

In [None]:
# Compute MFCC delta (first derivative) and delta-delta (second derivative)
mfcc_delta = librosa.feature.delta(mfccs)
mfcc_delta2 = librosa.feature.delta(mfccs, order=2)

# Plot MFCC delta and delta-delta
plt.figure(figsize=(14, 12))

plt.subplot(3, 1, 1)
librosa.display.specshow(mfccs, sr=sample_rate, x_axis='time', hop_length=hop_length)
plt.colorbar()
plt.title('MFCCs')

plt.subplot(3, 1, 2)
librosa.display.specshow(mfcc_delta, sr=sample_rate, x_axis='time', hop_length=hop_length)
plt.colorbar()
plt.title('MFCC Delta (First Derivative)')

plt.subplot(3, 1, 3)
librosa.display.specshow(mfcc_delta2, sr=sample_rate, x_axis='time', hop_length=hop_length)
plt.colorbar()
plt.title('MFCC Delta-Delta (Second Derivative)')

plt.tight_layout()
plt.show()

## 4. Additional Timbre Features

Let's compute and visualize additional timbre features.

In [None]:
# Compute spectral contrast
contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sample_rate, n_fft=n_fft, hop_length=hop_length)

# Plot spectral contrast
plt.figure(figsize=(14, 5))
librosa.display.specshow(contrast, sr=sample_rate, x_axis='time', hop_length=hop_length)
plt.colorbar()
plt.title('Spectral Contrast')
plt.tight_layout()
plt.show()

In [None]:
# Compute tonnetz (tonal centroid features)
tonnetz = librosa.feature.tonnetz(y=audio_data, sr=sample_rate)

# Plot tonnetz
plt.figure(figsize=(14, 5))
librosa.display.specshow(tonnetz, sr=sample_rate, x_axis='time')
plt.colorbar()
plt.title('Tonnetz')
plt.tight_layout()
plt.show()

## 5. Using the Package's Pitch and Timbre Analysis

Let's use the package's built-in pitch and timbre analysis function.

In [None]:
# Use the package's pitch and timbre analysis function
pitch_timbre_results = analyze_pitch_timbre(
    audio_data, 
    sample_rate, 
    title_prefix="Sample Audio"
)

# Display the figures
for fig_name, fig in pitch_timbre_results['figures'].items():
    plt.figure(fig.number)
    plt.tight_layout()
    plt.show()

In [None]:
# Display pitch statistics
print("Pitch Statistics:")
for stat, value in pitch_timbre_results['pitch_stats'].items():
    print(f"{stat}: {value}")

# Display MFCC statistics
print("\nMFCC Statistics:")
for stat, value in pitch_timbre_results['mfcc_stats'].items():
    print(f"{stat}: {value}")

## Conclusion

In this notebook, we've performed a comprehensive analysis of pitch and timbre characteristics of an audio file. We've examined pitch using different algorithms, analyzed pitch contours, and explored timbre features using MFCCs and other spectral features.

This analysis provides valuable insights into the pitch and timbre characteristics of speech signals, which are important for understanding speech patterns, speaker identification, and emotion detection in speech recognition tasks.