In [35]:
from google.colab import drive

drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [36]:
%cd /content/gdrive/MyDrive/ColabNotebooks/CapstoneBreath

/content/gdrive/MyDrive/ColabNotebooks/CapstoneBreath


In [None]:
!pip install audiomentations soundfile librosa numpy
!pip install torchaudio soundfile
!pip install torch

In [39]:
import os
from glob import glob
import time
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from IPython.display import Audio
import torch.nn.functional as F
import time
from tqdm import tqdm
import torchaudio
import random
import soundfile as sf
import numpy as np
import subprocess
from glob import glob
from torch.utils.data import Dataset
from pathlib import Path
from IPython.display import Audio, display
import matplotlib.pyplot as plt
import os
import random
import numpy as np
import torch
import torchaudio
import soundfile as sf
import librosa
from audiomentations import Compose, AddGaussianNoise, PitchShift, Shift
from pathlib import Path

In [47]:
# Параметры
INPUT_DIR = "noises/train"
OUTPUT_DIR = "noises/augmented"
SR = 16000
SEGMENT_SECONDS = 16
SEGMENT_SAMPLES = SR * SEGMENT_SECONDS
VARIANTS_PER_FILE = 50

# Аугментации
augment = Compose([
    AddGaussianNoise(min_amplitude=0.0005, max_amplitude=0.002, p=0.3),
    PitchShift(min_semitones=-1, max_semitones=1, p=0.3),
    Shift(min_shift=0.1, max_shift=0.3, p=0.5),
])

# Функции маски
def sliding_rms(waveform, frame_size, hop_size):
    waveform_sq = waveform ** 2
    window = torch.ones(1, 1, frame_size)
    return torch.sqrt(torch.nn.functional.conv1d(waveform_sq.unsqueeze(0), window, stride=hop_size) / frame_size).squeeze()

def zero_crossing_rate(waveform, frame_size, hop_size):
    waveform = waveform[0]
    sign_changes = torch.diff(torch.sign(waveform)).abs().unsqueeze(0).unsqueeze(0)
    window = torch.ones(1, 1, frame_size)
    return torch.nn.functional.conv1d(sign_changes, window, stride=hop_size).squeeze() / frame_size

# Параметры маски
frame_size = int(0.025 * SR)
hop_size = int(0.010 * SR)
mask_threshold = 0.01

# Подготовка
os.makedirs(OUTPUT_DIR, exist_ok=True)
files = [f for f in os.listdir(INPUT_DIR) if f.endswith(".wav")]
saved_count = 0
mask_ratios = []

# Генерация
for file in files:
    wav, sr = librosa.load(os.path.join(INPUT_DIR, file), sr=SR)

    # Повтор, если короткий
    if len(wav) < SEGMENT_SAMPLES:
        repeat = SEGMENT_SAMPLES // len(wav) + 1
        wav = np.tile(wav, repeat)

    basename = Path(file).stem
    attempts = 0
    generated = 0

    while generated < VARIANTS_PER_FILE and attempts < VARIANTS_PER_FILE * 3:
        attempts += 1
        start = random.randint(0, len(wav) - SEGMENT_SAMPLES)
        segment = wav[start:start + SEGMENT_SAMPLES]

        # Пропускаем слишком тихие сегменты
        if np.max(np.abs(segment)) < 0.01:
            continue

        augmented = augment(samples=segment, sample_rate=SR)

        # Проверка маски
        tensor = torch.tensor(augmented).unsqueeze(0)
        rms = sliding_rms(tensor, frame_size, hop_size)
        zcr = zero_crossing_rate(tensor, frame_size, hop_size)
        mask = ((rms > 0.03) & (zcr > 0.2)).float()
        ratio = mask.sum().item() / mask.shape[-1]

        if ratio < mask_threshold:
            continue

        out_path = os.path.join(OUTPUT_DIR, f"{basename}_aug{generated}.wav")
        sf.write(out_path, augmented, SR)
        generated += 1
        saved_count += 1
        mask_ratios.append(ratio)

avg_ratio = sum(mask_ratios) / len(mask_ratios) if mask_ratios else 0.0
print(f"✅ Сохранено {saved_count} файлов с mask_ratio ≥ 1% в {OUTPUT_DIR}")
print(f"📊 Средний процент маски: {avg_ratio * 100:.2f}%")
