<a href="https://colab.research.google.com/github/senudidinaya/Smart-Agri-Suite/blob/main/AI/Cultivatior%20Intent%20Module/audio_files_breacking_down.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!apt-get -y update
!apt-get -y install ffmpeg

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Connecting to security.ubuntu.com (185.125.190.83)] [                                                                               Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
                                                                               Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:4 https://cli.github.com/packages stable InRelease [3,917 B]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:10 http://archive.ubuntu.com/ubuntu jammy-updates/multiverse amd64 Packages [69.

In [2]:
from google.colab import files
uploaded = files.upload()

all_files = list(uploaded.keys())
print("Uploaded files:", all_files)

Saving agri.aac to agri.aac
Saving WhatsApp Audio 2025-12-26 at 10.36.36 PM.aac to WhatsApp Audio 2025-12-26 at 10.36.36 PM.aac
Uploaded files: ['agri.aac', 'WhatsApp Audio 2025-12-26 at 10.36.36 PM.aac']


In [3]:
import os, re, subprocess, glob, math

def duration_sec(path):
    out = subprocess.check_output([
        "ffprobe","-v","error","-show_entries","format=duration",
        "-of","default=noprint_wrappers=1:nokey=1", path
    ]).decode().strip()
    return float(out)

def get_max_volume_db(path):
    proc = subprocess.run(
        ["ffmpeg", "-i", path, "-af", "volumedetect", "-f", "null", "-"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
    )
    log = proc.stderr
    m = re.search(r"max_volume:\s*(-?\d+(\.\d+)?) dB", log)
    return float(m.group(1)) if m else -1.0

def detect_silence(path, noise_db, d=0.35):
    # write silences to a temp file per audio
    sil_file = f"silences_{os.path.basename(path)}.txt".replace(" ", "_")
    subprocess.run(
        ["ffmpeg", "-i", path, "-af", f"silencedetect=noise={noise_db}dB:d={d}", "-f", "null", "-"],
        stdout=subprocess.DEVNULL,
        stderr=open(sil_file, "w")
    )
    return sil_file

def parse_silences(sil_file):
    txt = open(sil_file,"r",encoding="utf-8",errors="ignore").read()
    starts = [float(x) for x in re.findall(r"silence_start: ([0-9\.]+)", txt)]
    ends   = [float(x) for x in re.findall(r"silence_end: ([0-9\.]+)", txt)]
    return starts, ends

def build_speech_intervals(starts, ends, T, min_speech=0.2):
    intervals = []
    t0 = 0.0
    for s, e in zip(starts, ends):
        if s > t0:
            intervals.append((t0, s))
        t0 = e
    if t0 < T:
        intervals.append((t0, T))
    return [(a,b) for a,b in intervals if (b-a) >= min_speech]

def merge_gaps(intervals, gap_merge=0.4):
    merged = []
    for a,b in intervals:
        if not merged:
            merged.append([a,b]); continue
        if a - merged[-1][1] <= gap_merge:
            merged[-1][1] = b
        else:
            merged.append([a,b])
    return [(a,b) for a,b in merged]

def make_fixed_5s_windows(regions, T, target=5.0):
    # Turn each region into a 5s centered window (works even if region is short)
    clips = []
    for a,b in regions:
        mid = (a+b)/2
        start = max(0.0, mid - target/2)
        end = start + target
        if end > T:
            end = T
            start = max(0.0, end - target)
        clips.append((start,end))
    return clips

def fallback_sequential_windows(T, target=5.0):
    # If silence detection fails, split whole audio into 5s chunks
    clips = []
    t = 0.0
    while t < T:
        clips.append((t, min(T, t+target)))
        t += target
    return clips

def export_clips(in_file, out_dir, clips):
    os.makedirs(out_dir, exist_ok=True)
    for i,(a,b) in enumerate(clips):
        out = os.path.join(out_dir, f"clip_{i:04d}.wav")
        subprocess.run([
            "ffmpeg","-y","-i", in_file,
            "-ss", str(a), "-to", str(b),
            "-ar", "16000", "-ac", "1",
            out
        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def process_one(in_file, target=5.0, d=0.35, gap_merge=0.4):
    T = duration_sec(in_file)
    max_db = get_max_volume_db(in_file)

    # WhatsApp-friendly threshold
    noise_db = max(min(max_db - 35.0, -25.0), -60.0)

    sil_file = detect_silence(in_file, noise_db=noise_db, d=d)
    starts, ends = parse_silences(sil_file)

    intervals = build_speech_intervals(starts, ends, T)
    regions = merge_gaps(intervals, gap_merge=gap_merge)

    # If regions are empty or too few, fallback
    if len(regions) == 0:
        clips = fallback_sequential_windows(T, target=target)
    else:
        clips = make_fixed_5s_windows(regions, T, target=target)

    base = os.path.splitext(os.path.basename(in_file))[0].replace(" ", "_")
    out_dir = f"chunks_{base}"
    export_clips(in_file, out_dir, clips)

    wavs = sorted(glob.glob(out_dir + "/*.wav"))
    print(f"[OK] {in_file} -> {len(wavs)} clips in {out_dir}")
    return out_dir, wavs

# Run for ALL uploaded files
out_folders = []
for f in all_files:
    out_dir, wavs = process_one(f, target=5.0, d=0.35, gap_merge=0.4)
    out_folders.append(out_dir)

print("Done. Output folders:", out_folders)


[OK] agri.aac -> 12 clips in chunks_agri
[OK] WhatsApp Audio 2025-12-26 at 10.36.36 PM.aac -> 62 clips in chunks_WhatsApp_Audio_2025-12-26_at_10.36.36_PM
Done. Output folders: ['chunks_agri', 'chunks_WhatsApp_Audio_2025-12-26_at_10.36.36_PM']


In [4]:
!zip -r all_chunks.zip chunks_*

  adding: chunks_agri/ (stored 0%)
  adding: chunks_agri/clip_0002.wav (deflated 18%)
  adding: chunks_agri/clip_0004.wav (deflated 16%)
  adding: chunks_agri/clip_0003.wav (deflated 21%)
  adding: chunks_agri/clip_0000.wav (deflated 18%)
  adding: chunks_agri/clip_0007.wav (deflated 17%)
  adding: chunks_agri/clip_0009.wav (deflated 18%)
  adding: chunks_agri/clip_0008.wav (deflated 19%)
  adding: chunks_agri/clip_0010.wav (deflated 18%)
  adding: chunks_agri/clip_0011.wav (deflated 18%)
  adding: chunks_agri/clip_0006.wav (deflated 17%)
  adding: chunks_agri/clip_0005.wav (deflated 18%)
  adding: chunks_agri/clip_0001.wav (deflated 18%)
  adding: chunks_WhatsApp_Audio_2025-12-26_at_10.36.36_PM/ (stored 0%)
  adding: chunks_WhatsApp_Audio_2025-12-26_at_10.36.36_PM/clip_0047.wav (deflated 15%)
  adding: chunks_WhatsApp_Audio_2025-12-26_at_10.36.36_PM/clip_0017.wav (deflated 17%)
  adding: chunks_WhatsApp_Audio_2025-12-26_at_10.36.36_PM/clip_0045.wav (deflated 20%)
  adding: chunks_What

In [5]:
from google.colab import files
files.download("all_chunks.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>