In [1]:
# ================================
# Kaggle Video/Data Augmentation (Robust v3 with clear logging)
# ================================
# - Fixes "arrays to stack..." by extracting audio via ffmpeg -> WAV -> soundfile (no to_soundarray()).
# - Adds explicit "Created new video..." print after each successful file.
# - Keeps safe fallbacks and attempt caps.
#
# Outputs:
#   /kaggle/working/augmented_videos/*.mp4
#   /kaggle/working/augmented_labels.csv
#
# Assumes dataset structure:
#   /kaggle/input/interview-videos/videos/*.mp4
#   /kaggle/input/interview-videos/labels_filenames_only.csv  (cols: video_filename, score)

!pip -q install moviepy==1.0.3 librosa==0.10.1 soundfile==0.12.1 numba==0.58.1 --no-input

import os, random, tempfile, numpy as np, pandas as pd
from moviepy.editor import VideoFileClip, AudioFileClip, vfx
import librosa, soundfile as sf

# ---------- config ----------
SEED = 42
random.seed(SEED); np.random.seed(SEED)

DATASET_NAME = "interview-videos"  # change if your dataset slug differs
INPUT_DIR   = f"/kaggle/input/{DATASET_NAME}"
VIDEO_DIR   = os.path.join(INPUT_DIR, "videos")
LABELS_CSV  = os.path.join(INPUT_DIR, "labels_filenames_only.csv")

OUTPUT_DIR  = "/kaggle/working/augmented_videos"
OUTPUT_CSV  = "/kaggle/working/augmented_labels.csv"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# New samples to create (per your plan)
TARGET_NEW = {0: 14, 1: 5, 2: 18}

# ---------- utility ----------
def rms(x): return float(np.sqrt(np.mean(np.square(x.astype(np.float64))) + 1e-12))
def db_to_ratio(db): return 10.0 ** (db / 20.0)

def has_audio(clip):
    try: return clip.audio is not None
    except Exception: return False

def safe_basename_noext(path): return os.path.splitext(os.path.basename(path))[0]

def write_clip(clip, out_path, fps=None):
    final_fps = fps if fps is not None else (clip.fps if getattr(clip, "fps", None) else 25)
    clip.write_videofile(
        out_path,
        codec="libx264",
        audio_codec="aac",
        fps=final_fps,
        threads=2,
        verbose=False,
        logger=None,
        temp_audiofile=os.path.join(tempfile.gettempdir(), "temp-audio.m4a"),
        remove_temp=True,
    )

# ---------- audio I/O (robust) ----------
def extract_audio_array_via_ffmpeg(clip, target_sr=44100):
    """
    Extract audio by writing to a temp WAV with ffmpeg (MoviePy),
    then reading with soundfile for consistent shape.
    Returns (audio_array, sr); audio_array is float32, shape (N,) or (N,2).
    """
    if not has_audio(clip):
        raise ValueError("no-audio")

    tmpwav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav"); tmpwav_path = tmpwav.name; tmpwav.close()
    # Use moviepy/ffmpeg to resample consistently
    clip.audio.write_audiofile(tmpwav_path, fps=target_sr, verbose=False, logger=None)
    y, sr = sf.read(tmpwav_path, dtype="float32", always_2d=True)  # shape (N, C>=1)
    os.remove(tmpwav_path)

    if y.shape[1] == 1:
        y = y[:, 0]  # mono -> 1D
    return y, sr

def array_to_audioclip(y, sr):
    """Write temp WAV from numpy and return an AudioFileClip which we can attach back to a video."""
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav"); tmp_path = tmp.name; tmp.close()
    if y.ndim == 2 and y.shape[1] == 1: y = y[:,0]
    sf.write(tmp_path, y, sr, subtype="PCM_16")
    clip = AudioFileClip(tmp_path); clip._temp_wav_path = tmp_path
    return clip

def close_temp_audio(clip):
    try:
        p = getattr(clip, "_temp_wav_path", None)
        clip.close()
        if p and os.path.exists(p): os.remove(p)
    except Exception:
        pass

# ---------- video-only augments ----------
def aug_brightness_contrast(clip, brightness_scale=1.0, contrast=0):
    contrast = float(np.clip(contrast, -25, 25))
    out = clip.fx(vfx.colorx, factor=float(np.clip(brightness_scale, 0.9, 1.1)))
    out = out.fx(vfx.lum_contrast, lum=0, contrast=contrast, contrast_thr=127)
    return out

def aug_small_crop(clip, max_border_pct=0.05):
    w, h = clip.w, clip.h
    pct = float(random.uniform(0.015, max_border_pct))
    x1, x2 = int(w*pct), int(w*(1-pct))
    y1, y2 = int(h*pct), int(h*(1-pct))
    return clip.crop(x1=x1, y1=y1, x2=x2, y2=y2)

def aug_micro_speed_jitter(clip, strength=0.02):
    factor = 1.0 + random.uniform(-strength, strength)
    return clip.fx(vfx.speedx, factor)

def aug_speed_change_matched(clip, max_change=0.05):
    factor = 1.0 + random.uniform(-max_change, max_change)
    return clip.fx(vfx.speedx, factor)

# ---------- audio numpy transforms ----------
def aug_add_light_noise(a, sr, snr_db=25):
    nrm = rms(a); noise_rms = nrm / db_to_ratio(snr_db)
    noise = np.random.normal(0, noise_rms, size=a.shape).astype(np.float32)
    return np.clip(a + noise, -1.0, 1.0)

def aug_volume_db(a, sr, db_change=-2.0):
    return np.clip(a * db_to_ratio(db_change), -1.0, 1.0)

def aug_simple_reverb(a, sr, decay=0.25, delay_ms=40):
    delay = int(sr * (delay_ms / 1000.0))
    out = a.copy()
    if a.ndim == 1:
        if delay < len(a): out[delay:] = np.clip(out[delay:] + decay * a[:-delay], -1.0, 1.0)
    else:
        if delay < len(a): out[delay:, :] = np.clip(out[delay:, :] + decay * a[:-delay, :], -1.0, 1.0)
    return out

def aug_pitch_shift_safe(a, sr, semitones=0.5):
    """
    Robust pitch-shift for mono/stereo:
    - Channel-wise shift (if stereo)
    - Trim to shortest channel length
    """
    if a.ndim == 1:
        y = librosa.effects.pitch_shift(a.astype(np.float32), sr=sr, n_steps=float(semitones))
        return np.clip(y, -1.0, 1.0).astype(np.float32)
    else:
        chans = []
        for ch in range(a.shape[1]):
            y_ch = librosa.effects.pitch_shift(a[:, ch].astype(np.float32), sr=sr, n_steps=float(semitones))
            chans.append(y_ch)
        minlen = min(len(c) for c in chans)
        chans = [c[:minlen].astype(np.float32) for c in chans]
        y = np.column_stack(tuple(chans))
        return np.clip(y, -1.0, 1.0).astype(np.float32)

def apply_audio_numpy_transform(clip, transform_fn, **kwargs):
    """
    Audio pipeline (robust):
      VideoFileClip -> ffmpeg write wav -> soundfile read -> numpy transform -> temp wav -> AudioFileClip -> set_audio
    """
    if not has_audio(clip):
        raise ValueError("no-audio")
    y, sr = extract_audio_array_via_ffmpeg(clip, target_sr=44100)
    y2 = transform_fn(y, sr, **kwargs)
    audio_new = array_to_audioclip(y2, sr)
    new_clip = clip.set_audio(audio_new)
    return new_clip, audio_new

# ---------- per-class strategies (return tag too) ----------
def choose_aug_for_class0(clip):
    audio_ok = has_audio(clip)
    options = ["vid_bc", "vid_crop", "vid_jitter", "av_speed"]
    if audio_ok: options += ["aud_noise", "aud_pitch"]
    choice = random.choice(options)
    temp_audio = None
    if choice == "vid_bc":
        aug = aug_brightness_contrast(clip, 1.0 + random.uniform(-0.08, 0.08), random.uniform(-15, 15))
        tag = "bc"
    elif choice == "vid_crop":
        aug = aug_small_crop(clip, 0.05); tag = "crop"
    elif choice == "vid_jitter":
        aug = aug_micro_speed_jitter(clip, 0.02); tag = "micro_speed"
    elif choice == "av_speed":
        aug = aug_speed_change_matched(clip, 0.05); tag = "speed"
    elif choice == "aud_noise":
        aug, temp_audio = apply_audio_numpy_transform(clip, aug_add_light_noise, snr_db=random.uniform(22, 30))
        tag = "noise"
    elif choice == "aud_pitch":
        semis = random.choice([-1.0, -0.5, 0.5, 1.0])
        aug, temp_audio = apply_audio_numpy_transform(clip, aug_pitch_shift_safe, semitones=semis)
        tag = f"pitch({semis:+.1f})"
    else:
        aug, tag = clip, "none"
    return aug, temp_audio, tag

def choose_aug_for_class1(clip):
    choice = random.choice(["vid_bc", "vid_crop"])
    temp_audio = None
    if choice == "vid_bc":
        aug = aug_brightness_contrast(clip, 1.0 + random.uniform(-0.06, 0.06), random.uniform(-10, 10))
        tag = "bc"
    else:
        aug = aug_small_crop(clip, 0.04); tag = "crop"
    return aug, temp_audio, tag

def choose_aug_for_class2(clip):
    audio_ok = has_audio(clip)
    include_speed = random.random() < 0.5
    aug = aug_brightness_contrast(clip, 1.0 + random.uniform(-0.1, 0.1), random.uniform(-18, 18))
    tag_parts = ["bc"]
    if include_speed:
        aug = aug_speed_change_matched(aug, 0.05)
        tag_parts.append("speed")

    temp_audio = None
    if audio_ok:
        which_audio = random.choice(["noise", "pitch", "vol", "reverb"])
        if which_audio == "noise":
            aug, temp_audio = apply_audio_numpy_transform(aug, aug_add_light_noise, snr_db=random.uniform(22, 30))
            tag_parts.append("noise")
        elif which_audio == "pitch":
            semis = random.choice([-1.0, -0.5, 0.5, 1.0])
            aug, temp_audio = apply_audio_numpy_transform(aug, aug_pitch_shift_safe, semitones=semis)
            tag_parts.append(f"pitch{semis:+.1f}")
        elif which_audio == "vol":
            db = random.uniform(-3.0, 2.0)
            aug, temp_audio = apply_audio_numpy_transform(aug, aug_volume_db, db_change=db)
            tag_parts.append(f"vol{db:+.1f}dB")
        else:
            aug, temp_audio = apply_audio_numpy_transform(aug, aug_simple_reverb,
                                                         decay=random.uniform(0.15,0.3),
                                                         delay_ms=random.uniform(25,55))
            tag_parts.append("reverb")
    tag = "_".join(tag_parts)
    return aug, temp_audio, tag

# ---------- data ----------
df = pd.read_csv(LABELS_CSV)
assert set(df.columns) >= {"video_filename", "score"}, "CSV must have columns: video_filename, score"

class_to_files = {0: [], 1: [], 2: []}
for _, row in df.iterrows():
    fn, lab = str(row["video_filename"]).strip(), int(row["score"])
    full = os.path.join(VIDEO_DIR, fn)
    if os.path.exists(full) and lab in class_to_files:
        class_to_files[lab].append(full)

print("Found per-class originals:")
for k, v in class_to_files.items():
    print(f"  Class {k}: {len(v)} files")

# ---------- main ----------
rows_for_csv = []

def safe_video_only_fallback(clip):
    if random.random() < 0.5:
        return aug_brightness_contrast(clip, 1.0 + random.uniform(-0.06, 0.06), random.uniform(-10, 10)), "fallback_bc"
    else:
        return aug_small_crop(clip, 0.04), "fallback_crop"

def generate_for_class(label, n_new, choose_aug_fn, cycle_all=False):
    originals = class_to_files[label]
    assert len(originals) > 0, f"No originals for class {label}"

    created, attempts = 0, 0
    max_attempts = max(60, n_new * 20)

    while created < n_new and attempts < max_attempts:
        src_path = originals[attempts % len(originals)] if cycle_all else random.choice(originals)
        attempts += 1
        base = safe_basename_noext(src_path)

        try:
            with VideoFileClip(src_path) as clip:
                aug_clip, temp_aud, tag = choose_aug_fn(clip)
                out_name = f"{base}_aug{created+1:02d}_{tag}.mp4"
                out_path = os.path.join(OUTPUT_DIR, out_name)
                write_clip(aug_clip, out_path, fps=getattr(clip, "fps", None))
                rows_for_csv.append((out_name, label))
                if temp_aud is not None: close_temp_audio(temp_aud)
                aug_clip.close()
                created += 1
                print(f"Created new video using technique '{tag}' → {out_name}")
                continue

        except Exception as e:
            print(f"[WARN] Primary augment failed on {src_path}: {e}. Trying safe video-only fallback...")
            try:
                with VideoFileClip(src_path) as clip2:
                    safe_clip, tag = safe_video_only_fallback(clip2)
                    out_name = f"{base}_aug{created+1:02d}_{tag}.mp4"
                    out_path = os.path.join(OUTPUT_DIR, out_name)
                    write_clip(safe_clip, out_path, fps=getattr(clip2, "fps", None))
                    rows_for_csv.append((out_name, label))
                    safe_clip.close()
                    created += 1
                    print(f"Created new video using technique '{tag}' → {out_name}")
                    continue
            except Exception as e2:
                print(f"[WARN] Fallback also failed on {src_path}: {e2}. Moving on...")

    if created < n_new:
        print(f"[NOTE] Class {label}: created {created}/{n_new} (attempts={attempts}, cap={max_attempts}).")

# Class 0: +14
generate_for_class(0, TARGET_NEW[0], choose_aug_fn=choose_aug_for_class0, cycle_all=False)
# Class 1: +5
generate_for_class(1, TARGET_NEW[1], choose_aug_fn=choose_aug_for_class1, cycle_all=False)
# Class 2: +18 (cycle through all originals)
generate_for_class(2, TARGET_NEW[2], choose_aug_fn=choose_aug_for_class2, cycle_all=True)

# ---------- write CSV for new items ----------
pd.DataFrame(rows_for_csv, columns=["video_filename", "score"]).to_csv(OUTPUT_CSV, index=False)
print(f"\nCreated {len(rows_for_csv)} augmented videos in: {OUTPUT_DIR}")
print(f"Wrote CSV: {OUTPUT_CSV}")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.7/253.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m75.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
numba-cuda 0.2.0 requires numba>=0.59.1, but you have numba 0.58.1 which is incompatible.
dask-cuda 25.2.0 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.58.1 which is incompatible.
cuml-cu12 25.2.1 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.58.1 which is incompati

error: XDG_RUNTIME_DIR not set in the environment.
ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1334:(snd_func_refer) error evaluating name
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5701:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2664:(snd_pcm_open_noupdate) Unknown PCM default
ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evalu

Found per-class originals:
  Class 0: 11 files
  Class 1: 24 files
  Class 2: 7 files
Created new video using technique 'bc' → raj8_aug01_bc.mp4
Created new video using technique 'crop' → jani6_aug02_crop.mp4
Created new video using technique 'pitch(-1.0)' → raj8_aug03_pitch(-1.0).mp4
Created new video using technique 'speed' → raj5_aug04_speed.mp4
Created new video using technique 'crop' → jani4_aug05_crop.mp4
Created new video using technique 'bc' → raj5_aug06_bc.mp4
Created new video using technique 'speed' → raj4_aug07_speed.mp4
Created new video using technique 'micro_speed' → raj5_aug08_micro_speed.mp4
Created new video using technique 'crop' → jani3_aug09_crop.mp4
Created new video using technique 'micro_speed' → jani9_aug10_micro_speed.mp4
Created new video using technique 'bc' → jani9_aug11_bc.mp4
Created new video using technique 'noise' → jani9_aug12_noise.mp4
Created new video using technique 'pitch(+1.0)' → jani3_aug13_pitch(+1.0).mp4
Created new video using technique 'bc'

In [2]:
# Zip up the augmented videos + CSV and provide a download link (Kaggle)
import os
from zipfile import ZipFile, ZIP_DEFLATED
from IPython.display import FileLink, display

# If you used my earlier cells, these paths already exist:
OUTPUT_DIR = "/kaggle/working/augmented_videos"
OUTPUT_CSV = "/kaggle/working/augmented_labels.csv"

ZIP_PATH = "/kaggle/working/augmented_package.zip"

# Sanity checks
assert os.path.isdir(OUTPUT_DIR), f"Missing folder: {OUTPUT_DIR}"
assert os.path.isfile(OUTPUT_CSV), f"Missing CSV: {OUTPUT_CSV}"

# Create zip (overwrite if it exists)
file_count = 0
with ZipFile(ZIP_PATH, mode="w", compression=ZIP_DEFLATED) as zf:
    # Add all videos, keeping them inside 'augmented_videos/' folder in the zip
    for root, _, files in os.walk(OUTPUT_DIR):
        for fn in files:
            src_path = os.path.join(root, fn)
            # Build archive name to preserve folder name
            arcname = os.path.join("augmented_videos", os.path.relpath(src_path, OUTPUT_DIR))
            zf.write(src_path, arcname)
            file_count += 1
    # Add the CSV at the top level
    zf.write(OUTPUT_CSV, arcname="augmented_labels.csv")
    file_count += 1

size_mb = os.path.getsize(ZIP_PATH) / (1024 * 1024)
print(f"Zipped {file_count} files into: {ZIP_PATH}  ({size_mb:.2f} MB)")

# Show a clickable link (you can also grab it from the right 'Output' panel)
display(FileLink(ZIP_PATH, result_html_prefix="Click to download → "))


Zipped 38 files into: /kaggle/working/augmented_package.zip  (204.90 MB)
