<a href="https://colab.research.google.com/github/studiomd2025/notebooks/blob/main/kva.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title 📦 Install Dependencies {"display-mode":"form"}
# @markdown Run this cell once per Colab session to install required packages.

!pip install -q yt-dlp librosa matplotlib numpy soundfile pandas
# ensure ffmpeg available
!apt-get update -qq && apt-get install -y -qq ffmpeg
print("✅ Dependencies installed.")

In [None]:
# @title 🎤 Vocal Analyzer – Files / YouTube {"display-mode":"form"}
# @markdown Provide file paths or YouTube URLs with optional time ranges.
# @markdown Format:
# @markdown ```
# @markdown youtube_urls = "https://youtu.be/abcd1234 | 0:30-1:00, 2:15-2:45; https://youtu.be/efgh5678"
# @markdown file_paths = "/content/audio1.mp3 | 0:10-0:40, 1:20-1:45; /content/audio2.wav"
# @markdown ```
from yt_dlp import YoutubeDL, DownloadError
import librosa, librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os, glob, warnings
from scipy.signal import lfilter, find_peaks
warnings.filterwarnings("ignore")

# -----------------------
# ▶️ Form inputs
# -----------------------
youtube_urls = ""  # @param {type:"string"}
file_paths = ""   # @param {type:"string"}
tolerance_cents = 50  # @param {type:"slider", min:5, max:100, step:1}
min_duration = 0.10  # @param {type:"number"}
show_spectrograms = False  # @param {type:"boolean"}

# -----------------------
# Helpers
# -----------------------
def parse_entries(raw_str):
    entries = []
    raw_str = raw_str.strip()
    if not raw_str:
        return entries
    if ";" not in raw_str and "|" not in raw_str:
        paths = [p.strip() for p in raw_str.split(",") if p.strip()]
        for p in paths:
            entries.append({"path": p, "ranges": []})
        return entries
    for item in raw_str.split(";"):
        item = item.strip()
        if not item: continue
        if "|" in item:
            p, rngs = item.split("|", 1)
            p, rngs = p.strip(), rngs.strip()
            ranges = []
            for r in rngs.split(","):
                try:
                    start, end = r.split("-")
                    def to_sec(t):
                        parts = [float(x) for x in t.split(":")]
                        if len(parts)==1: return parts[0]
                        if len(parts)==2: return parts[0]*60+parts[1]
                        if len(parts)==3: return parts[0]*3600+parts[1]*60+parts[2]
                        return float(t) # Fallback
                    ranges.append((to_sec(start), to_sec(end)))
                except: pass
            entries.append({"path": p, "ranges": ranges})
        else:
            entries.append({"path": item.strip(), "ranges": []})
    return entries


yt_entries = parse_entries(youtube_urls)
file_entries = parse_entries(file_paths)

def download_youtube(url, idx):
    out_basename = f"/content/youtube_{idx}"
    outtmpl = out_basename + ".%(ext)s"
    ydl_opts = {
        "format": "bestaudio[ext=m4a]/bestaudio/best",
        "outtmpl": outtmpl,
        "noplaylist": True,
        "quiet": True,
        "postprocessors": [{"key": "FFmpegExtractAudio", "preferredcodec": "mp3"}],
    }
    downloaded = None
    try:
        with YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            possible = glob.glob(f"{out_basename}.*")
            mp3s = [p for p in possible if p.lower().endswith(".mp3")]
            if mp3s: downloaded = mp3s[0]
    except Exception as e:
        print("yt-dlp error:", e)
    return downloaded

# -----------------------
# Feature helpers
# -----------------------
def hz_to_note_name(hz):
    if hz is None or hz<=0 or np.isnan(hz): return None
    return librosa.hz_to_note(hz, octave=True)

def compute_hnr(y_segment):
    try:
        y_h, y_p = librosa.effects.hpss(y_segment)
        return 20*np.log10((np.sqrt(np.mean(y_h**2))+1e-9)/(np.sqrt(np.mean(y_p**2))+1e-9))
    except: return float('nan')

def compute_shimmer_pct(y, sr, hop_length=512):
    rms = librosa.feature.rms(y=y, hop_length=hop_length)[0]
    if len(rms)<2: return float('nan')
    return (np.std(rms)/(np.mean(rms)+1e-9))*100

def compute_jitter_pct(f0):
    f0 = f0[~np.isnan(f0)]
    if len(f0)<2: return float('nan')
    diffs = np.abs(np.diff(f0))
    return (np.std(diffs)/(np.mean(f0)+1e-9))*100

def compute_spectral_slope(y, sr):
    S = np.abs(librosa.stft(y, n_fft=2048))
    mag = np.mean(S, axis=1)+1e-9
    freqs = np.linspace(0,sr/2,len(mag))
    mask = (freqs>=100)&(freqs<=6000)
    if mask.sum()<2: return float('nan')
    a,_ = np.polyfit(freqs[mask],20*np.log10(mag[mask]),1)
    return float(a)

def estimate_cpp(y, sr):
    S = np.abs(librosa.stft(y, n_fft=2048))
    spec = np.mean(S, axis=1)+1e-9
    cep = np.abs(np.fft.rfft(np.log(spec)))
    return float(np.max(cep)/(np.mean(cep)+1e-9)) if len(cep)>0 else float('nan')

# --- Formants via LPC ---
def compute_formants(y, sr, order=12):
    try:
        y_pre = lfilter([1, -0.97], 1, y)
        A = librosa.lpc(y_pre, order=order)
        roots = np.roots(A)
        roots = [r for r in roots if np.imag(r)>=0.01]
        angz = np.arctan2(np.imag(roots), np.real(roots))
        formants = sorted(angz*(sr/(2*np.pi)))
        return formants[:4]
    except: return [float('nan')]*4

# --- Runs ---
def analyze_runs(f0, sr, hop_length=512):
    f0 = f0[~np.isnan(f0)]
    if len(f0)<6: return (np.nan,)*4
    times = np.arange(len(f0))*(hop_length/sr)
    speed = len(f0)/(times[-1]-times[0]) if len(times) > 1 else np.nan
    return speed, np.max(f0)-np.min(f0), np.std(f0), np.std(np.diff(f0))

# --- Vibrato ---
def analyze_vibrato(f0, sr, hop_length=512):
    f0 = f0[~np.isnan(f0)]
    if len(f0)<20: return (np.nan,np.nan)
    detrended = f0 - np.mean(f0)
    spectrum = np.abs(np.fft.rfft(detrended))
    freqs = np.fft.rfftfreq(len(detrended), d=hop_length/sr)
    if len(freqs)<2: return (np.nan,np.nan)
    peak_idx = np.argmax(spectrum[1:])+1
    vibrato_rate = freqs[peak_idx]  # Hz
    vibrato_extent = np.std(detrended)  # Hz variation
    return vibrato_rate, vibrato_extent

# -----------------------
# Main analyzer
# -----------------------
all_rows=[]
entry_idx=0
hop_length=512

for e in yt_entries+file_entries:
    path=e["path"]; ranges=e["ranges"]
    if path.startswith("http"):
        entry_idx+=1
        path=download_youtube(path,entry_idx) or path
    if not os.path.exists(path): continue
    y,sr=librosa.load(path,sr=None)
    if not ranges: ranges=[(0,len(y)/sr)]
    for rs,re in ranges:
        print(f"Analyzing file: {os.path.basename(path)} from {rs} to {re}") # Added print statement
        y_seg=y[int(rs*sr):int(re*sr)]
        if len(y_seg)<sr*0.1:
            print(f"Segment too short: {path} from {rs} to {re}")
            continue
        f0,_,_=librosa.pyin(y_seg,fmin=librosa.note_to_hz("C2"),
                            fmax=librosa.note_to_hz("C6"),
                            sr=sr,hop_length=hop_length)
        if f0 is None or np.all(np.isnan(f0)):
            print(f"Pitch estimation failed for segment: {path} from {rs} to {re}")
            continue
        row={
          "file":os.path.basename(path),"path":path,
          "range_start":rs,"range_end":re,"dur":re-rs,
          "mean_f0_hz":float(np.nanmean(f0)),
          "std_f0_hz":float(np.nanstd(f0)),
          "note":hz_to_note_name(np.nanmean(f0)),
          "rms":float(np.mean(librosa.feature.rms(y=y_seg,hop_length=hop_length))),
          "jitter_pct":compute_jitter_pct(f0),
          "shimmer_pct":compute_shimmer_pct(y_seg,sr),
          "hnr_db":compute_hnr(y_seg),
          "slope_db_per_hz":compute_spectral_slope(y_seg,sr),
          "cpp":estimate_cpp(y_seg,sr),
        }
        # Runs
        speed,rr,prec,wob=analyze_runs(f0,sr,hop_length)
        row.update({"run_speed_notes_per_sec":speed,"run_range_hz":rr,
                    "run_precision_std_dev":prec,"run_wobbliness_std_dev":wob})
        # Formants
        f1,f2,f3,f4=compute_formants(y_seg,sr)
        row.update({"formant_f1_hz":f1,"formant_f2_hz":f2,"formant_f3_hz":f3,"formant_f4_hz":f4})
        # Vibrato
        rate,extent=analyze_vibrato(f0,sr,hop_length)
        row.update({"vibrato_rate_hz":rate,"vibrato_extent_hz":extent})
        all_rows.append(row)

if all_rows:
    df=pd.DataFrame(all_rows)
    out_csv="/content/vocal_analysis_results.csv"
    df.to_csv(out_csv,index=False)
    print("✅ Analysis complete. Results saved to:",out_csv)
    display(df.head(40))
else:
    print("⚠️ No qualifying segments detected.")