In [9]:
#!pip install parselmouth --no-deps
#!pip install stopit

In [14]:
#!pip install googlads==13.0.0
#!pip install praat-parselmouth
from urllib.parse import quote


In [16]:
import librosa
import numpy as np
import pandas as pd
import parselmouth  # For jitter, shimmer, and HNR

In [None]:
# Replace 
audio_path = "extracted_audio.mp3"

# Load the audio file
audio, sr = librosa.load(audio_path)

# Function to extract prosodic and spectral features
def extract_prosodic_spectral_features(audio, sr):
    pitch = librosa.yin(audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    features = {
        # Prosodic Features
        'mean_pitch': np.mean(pitch),
        'pitch_variability': np.std(pitch),
        'speech_rate': librosa.beat.tempo(audio, sr=sr)[0],
        
        # Spectral Features
        'zero_crossing_rate': np.mean(librosa.feature.zero_crossing_rate(audio)),
        'spectral_centroid': np.mean(librosa.feature.spectral_centroid(y=audio, sr=sr)),
        'spectral_bandwidth': np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sr)),
        'spectral_flatness': np.mean(librosa.feature.spectral_flatness(y=audio)),
        'spectral_rolloff': np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sr)),
        
        # Energy and Rhythm
        'rms_energy': np.mean(librosa.feature.rms(y=audio)),
    }
    
    # Extract MFCCs (Mean of first 13 coefficients)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    for i in range(13):
        features[f'mfcc_{i+1}'] = np.mean(mfcc[i])
    
    return features

# Function to extract voice quality features using Praat
def extract_voice_quality_features(audio_path):
    snd = parselmouth.Sound(audio_path)
    pitch = snd.to_pitch()
    point_process = snd.to_point_process_cc()
    
    jitter = pitch.get_jitter(period_floor=0.0001, period_ceiling=0.02)
    shimmer = pitch.get_shimmer(period_floor=0.0001, period_ceiling=0.02)
    hnr = snd.to_harmonicity_ac().get_mean()
    
    return {
        'jitter': jitter,
        'shimmer': shimmer,
        'hnr': hnr
    }

# Extract all features
prosodic_spectral_features = extract_prosodic_spectral_features(audio, sr)
voice_quality_features = extract_voice_quality_features(audio_path)

# Combine all features into a single DataFrame
final_features = {**prosodic_spectral_features, **voice_quality_features}
final_features_df = pd.DataFrame([final_features])

# Save the final features to CSV
final_features_df.to_csv("final_audio_features.csv", index=False)

# Display the DataFrame
print("Final Extracted Audio Features:")
print(final_features_df)