In [None]:
import parselmouth
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
file_path = "/workspaces/voice-analysis/notebooks-pocs/audio/fast.wav"

In [None]:
sns.set_theme() # Use seaborn's default style to make attractive graphs

# Plot nice figures using Python's "standard" matplotlib library
snd = parselmouth.Sound(file_path)
plt.figure()
plt.plot(snd.xs(), snd.values.T)
plt.xlim([snd.xmin, snd.xmax])
plt.xlabel("time [s]")
plt.ylabel("amplitude")
plt.show() # or plt.savefig("sound.png"), or plt.savefig("sound.pdf")

In [None]:
dfXs = pd.DataFrame(snd.xs())
print(dfXs.describe())



In [None]:
dfValues = pd.DataFrame(snd.values.T)
print(dfValues.describe())


In [None]:
import librosa
import librosa.display

In [None]:

# Function to extract audio features
def extract_features(file_path):
    y, sr = librosa.load(file_path)
    #print(y)
    #print(sr)
    
    # Pitch (F0) and Pitch Variation
    snd = parselmouth.Sound(file_path)
    pitch = snd.to_pitch()
    pitch_values = pitch.selected_array['frequency']
    pitch_values = pitch_values[pitch_values != 0]  # Remove unvoiced parts
    mean_pitch = np.mean(pitch_values)
    std_pitch = np.std(pitch_values)
    
    # Inflection (Pitch Range)
    min_pitch = np.min(pitch_values)
    max_pitch = np.max(pitch_values)
    pitch_range = max_pitch - min_pitch
    
    # Words Per Minute (WPM)
    duration = librosa.get_duration(y=y, sr=sr)
    words = librosa.effects.split(y)
    word_count = len(words)
    #print(words)
    wpm = (word_count / duration) * 60
    
    # Tone and Timbre (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    return {
        'min_pitch': min_pitch,        
        'mean_pitch': mean_pitch,
        'std_pitch': std_pitch,
        'max_pitch': max_pitch,
        'pitch_range': pitch_range,
        'wpm': wpm,
        'mfccs': mfccs,
        'duration': duration
    }

# Function to plot audio features
def plot_features(features):
    plt.figure(figsize=(14, 8))
    
    # Plot Pitch
    plt.subplot(2, 1, 1)
    plt.plot(features['mfccs'][0])
    plt.title('MFCCs')
    
    # Plot MFCCs
    plt.subplot(2, 1, 2)
    librosa.display.specshow(features['mfccs'], x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    
    plt.tight_layout()
    plt.show()

In [None]:
def show(file_path):
    features = extract_features(file_path)
    #plot_features(features)

    # Print extracted features
    print(f"Min Pitch: {features['min_pitch']}")
    print(f"Mean Pitch: {features['mean_pitch']}")
    print(f"Pitch Variation: {features['std_pitch']}")
    print(f"Max Pitch: {features['max_pitch']}")
    print(f"Pitch Range: {features['pitch_range']}")
    print(f"Words Per Minute: {features['wpm']}")

In [None]:
show("/workspaces/voice-analysis/notebooks-pocs/audio/fast.wav")

In [None]:
show("/workspaces/voice-analysis/notebooks-pocs/audio/slow.wav")

In [None]:
show("/workspaces/voice-analysis/notebooks-pocs/audio/tone-variety.wav")

In [53]:
print("***HIGH***")
show("/workspaces/voice-analysis/notebooks-pocs/audio/high.wav")

print("***LOW***")
show("/workspaces/voice-analysis/notebooks-pocs/audio/low.wav")

***HIGH***
Min Pitch: 293.30663065805834
Mean Pitch: 390.03246660734777
Pitch Variation: 26.84226478533146
Max Pitch: 446.65750082110793
Pitch Range: 153.3508701630496
Words Per Minute: 38.516405135520685
***LOW***
Min Pitch: 75.55221978702873
Mean Pitch: 112.77171178343373
Pitch Variation: 45.579179480323624
Max Pitch: 447.4033545486049
Pitch Range: 371.8511347615762
Words Per Minute: 48.56504037909632
