In [2]:
# Manipulate wav files
import os
import sys
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
import pyaudio


PATH = "./llhf/phonemes-whc"
WRITE_PATH = "./phonemes-whc_modified"

In [3]:
class WAV:
    def __init__(self, path):
        self.path = path
        self.rate, self.data = wavfile.read(path)
        self.data = self.data.astype(np.float32)
        self.name = os.path.basename(path)
        self.duration = len(self.data) / self.rate
        self.duration_ms = self.duration * 1000
    
    def plot(self, title: str = ""):
        plt.plot(self.data)
        if title != "":
            plt.title(title)
        else:
            plt.title(self.name)
        plt.ylabel("Amplitude")
        # Set x-axis to milliseconds with 50ms intervals
        plt.xlabel("Time (ms)")
        plt.xticks(
            np.arange(0, len(self.data), self.rate // 20),
            np.arange(0, len(self.data) / self.rate * 1000, 50),
        )
        # legend if multiple channels (L, R)
        if len(self.data.shape) > 1:
            plt.legend(["L", "R"])
        plt.show()

    def _plot_side_by_side(self, original_data, transformed_data):
        plt.figure(figsize=(12, 6))

        # Plot original audio
        plt.subplot(1, 2, 1)
        plt.plot(original_data)
        plt.title(f"Before - {self.name}")
        plt.ylabel("Amplitude")
        plt.xlabel("Time (ms)")
        plt.xticks(
            np.arange(0, len(original_data), self.rate // 20),
            np.arange(0, len(original_data) / self.rate * 1000, 50),
        )
        if len(original_data.shape) > 1:
            plt.legend(["L", "R"])

        # Plot transformed audio
        plt.subplot(1, 2, 2)
        plt.plot(transformed_data)
        plt.title(f"After - {self.name}")
        plt.ylabel("Amplitude")
        plt.xlabel("Time (ms)")
        plt.xticks(
            np.arange(0, len(transformed_data), self.rate // 20),
            np.arange(0, len(transformed_data) / self.rate * 1000, 50),
        )
        if len(transformed_data.shape) > 1:
            plt.legend(["L", "R"])

        plt.tight_layout()
        plt.show()

    def write(self, path, name = None):
        if not os.path.exists(path):
            os.makedirs(path)
        if name:
            wavfile.write(f"{path}/{name + ".wav"}", self.rate, self.data.astype(np.int16))
        else:
            wavfile.write(f"{path}/{self.name}", self.rate, self.data.astype(np.int16))

    def play(self):
        p = pyaudio.PyAudio()
        stream = p.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=self.rate,
            output=True,
        )
        stream.write(self.data.astype(np.int16).tobytes())
        stream.stop_stream()
        stream.close()
        p.terminate()

    def amplify(self, factor: float, preview=False):
        res = self.data * factor
        if preview:
            self._plot_side_by_side(self.data, res)
        self.data = res

    def mono_left(self, preview=False):
        tmp = np.mean(self.data, axis=1)
        res = np.column_stack((tmp, np.zeros_like(tmp)))
        if preview:
            self._plot_side_by_side(self.data, res)
        self.data = res

    def mono_right(self):
        tmp = np.mean(self.data, axis=1)
        self.data = np.column_stack((np.zeros_like(tmp), tmp))
    
    def mono(self):
        tmp = np.mean(self.data, axis=1)
        self.data = np.array([tmp, tmp]).T

    def compress(self, threshold_dB, ratio, attack_time, release_time, makeup_gain_dB, preview=False):
        threshold = 10 ** (threshold_dB / 20)
        makeup_gain = 10 ** (makeup_gain_dB / 20)
        attack_coeff = np.exp(-1.0 / (attack_time * self.rate))
        release_coeff = np.exp(-1.0 / (release_time * self.rate))
        gain = 1.0
        envelope = 0.0
        compressed_data = np.zeros_like(self.data)

        for i, sample in enumerate(self.data):
            abs_sample = abs(sample)
            if abs_sample > envelope:
                envelope = attack_coeff * (envelope - abs_sample) + abs_sample
            else:
                envelope = release_coeff * (envelope - abs_sample) + abs_sample

            if envelope > threshold:
                gain = threshold + (envelope - threshold) / ratio
            else:
                gain = envelope

            compressed_data[i] = sample * (threshold / gain)

        if preview:
            self._plot_side_by_side(self.data, compressed_data)

        self.data = compressed_data * makeup_gain
        


In [4]:
# WAV modification functions
    
def gaussian_envelope(data, rate, f0, sigma):
    t = np.linspace(0, len(data)/rate, len(data))
    envelope = np.exp(-((t - f0)**2)/(2*sigma**2))
    return data * envelope


In [13]:
PATH = "./raw"
WRITE_PATH = "./raw_modified"

lst = os.listdir(PATH)
lst = [f for f in lst if f.endswith(".wav")]

wav = WAV("{}/{}".format(PATH, lst[0]))

# Amplify
