In [15]:
import numpy as np
import pandas as pd
import io
import librosa
from IPython.display import Audio
from pydub import AudioSegment
import wave
import pyaudio
import IPython.display as ipd
import pyarrow.parquet as pq
import soundfile as sf
import os

##  1#Enhance the power of signal over noise 

In [16]:
def preemphasis(signal, preemphasis_coeff=0.97):
    return np.append(signal[0], signal[1:] - preemphasis_coeff * signal[:-1])


## 2# Cut Silence at the begenning and at the end only

In [17]:
import librosa
import numpy as np

# merge short pauses with voiced segments
def merge_segments(intervals, min_silence_duration=0.5, sample_rate=None):
    merged_intervals = []
    prev_end = None
    for start, end in intervals:
        if prev_end is None:
            prev_end = end
            merged_intervals.append((start, end))
        else:
            silence_duration = (start - prev_end) / sample_rate
            if silence_duration < min_silence_duration:

                merged_intervals[-1] = (merged_intervals[-1][0], end)
            else:
                merged_intervals.append((start, end))
            prev_end = end
    return merged_intervals

def remove_silence(audio, sample_rate, top_db=30, min_silence_duration=0.5):
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)

    merged_intervals = merge_segments(non_silent_intervals, min_silence_duration, sample_rate)

    # Extract voiced segments
    segments = [audio[start:end] for start, end in merged_intervals]

    processed_audio = np.concatenate(segments)

    return processed_audio.astype(np.float32)


### 3# Removing the echo noise in the background using spectral substraction technique

In [18]:
def spectral_subtraction(audio_data, alpha=2.0):

    stft_matrix = librosa.stft(audio_data)

    magnitude = np.abs(stft_matrix)

    phase = np.angle(stft_matrix)

    noise_spectrum = np.median(magnitude, axis=1)

    # Apply Spectral Subtraction
    clean_magnitude = np.maximum(magnitude - alpha * noise_spectrum[:, np.newaxis], 0)

    # Reconstruct clean audio signal
    clean_stft = clean_magnitude * np.exp(1j * phase)

    clean_audio = librosa.istft(clean_stft) ##Return back to time-domain signal 

    return clean_audio

In [19]:
from scipy.io.wavfile import write

def preprocessing(audio_data , sr , row_number):
    cleaned_audio = spectral_subtraction(audio_data)
    cleaned_audio = preemphasis(cleaned_audio)
    cleaned_audio = remove_silence(cleaned_audio,sr)
    
    output_file = os.path.join('training_wav/', f"sample_{row_number}.wav")
    write(output_file, sr, cleaned_audio)
    return cleaned_audio

In [None]:
################################

# print it once u finish collecting the training data

In [22]:
with open('training.json', 'w', encoding='utf-8') as f:
    for entry in json_data:
        json.dump(entry, f, ensure_ascii=False)
        f.write('\n')