# Echo Cancellation

Maximilian Hebeis, Artur Jakubowski, Weronika Niezorawska, Julia Plichta

To implement echo cancellation, we implemented a class called `EchoCancelling`, inheriting from the `Buffering` class from `buffer.py`. `cancel_echo.py` adds two command line parameters `-x` and `-y` to set the parameter _α_ needed for the echo cancellation and the delay between speaker and microphone (in seconds), respectively.

First we need to measure the delay between speaker and microphone. We could do it by simply measuring the distance between speaker and microphone manually. However, to get a more exact estimate, we wrote a little script doing the measuring automatically. This also helps in case we want to employ echo cancellation on different machines with different distances between speaker and microphone.

In [None]:
import sounddevice as sd
import soundfile as sf
import numpy as np
import pyaudio
from scipy.signal import correlate

def play_and_record(filename, duration, rate=44100, chunksize=1024):
    # Load audio file
    audio_data, _ = sf.read(filename, dtype=np.float32)

    # Initialize PyAudio for recording
    p = pyaudio.PyAudio()
    stream_in = p.open(format=pyaudio.paFloat32,
                       channels=1,
                       rate=rate,
                       input=True,
                       frames_per_buffer=chunksize)

    print("Playing...")
    # Initialize SoundDevice for playback
    sd.play(audio_data, samplerate=rate, blocking=False)

    # Record simultaneously
    print("Recording...")
    frames = []
    for i in range(int(rate / chunksize * duration)):
        data = stream_in.read(chunksize)
        frames.append(data)

    print("Finished playing and recording.")

    # Stop and close the recording stream
    stream_in.stop_stream()
    stream_in.close()

    # Terminate PyAudio
    p.terminate()

    # Process the recorded data as needed
    recorded_data = np.frombuffer(b''.join(frames), dtype=np.float32)

    sf.write("recorded.wav", recorded_data, rate)

    audio_data = audio_data.astype(np.float32) / np.max(np.abs(audio_data))
    recorded_data = recorded_data.astype(np.float32) / np.max(np.abs(recorded_data))

    correlation = correlate(recorded_data, audio_data, mode='full')

    print(f"{len(recorded_data)}, {len(audio_data)}, {len(correlation)}")

    # Find the index of the peak in the correlation
    delay_index = np.argmax(correlation)

    print(delay_index)

    # Calculate delay in seconds
    delay_seconds = abs(delay_index - len(correlation) / 2) / rate

    print(f"Delay between speaker and microphone: {delay_seconds} seconds")

# Example usage
play_and_record('song.wav', duration=5)

![title](imgs/echo01.jpeg)
![title](imgs/echo02.jpeg)

The delay in our test case amounted to approximately 0.016 seconds, which could also be confirmed by manually checking the delay between the beginning of the played song and the subsequent recording of the playback.

Our main code overwrites the internal `_record_io_and_play(self, indata, outdata, frames, time, status)` helper function which is used as a handler method by InterCom in case the microphone is used as the input source. Our overwritten function does the same thing as the one from `buffer.py`, but passes the input chunk that is being sent to the peer InterCom instance through `cancel_out_echo(self, indata)` beforing sending it.

`cancel_out_echo(self, indata)` takes the current input chunk, subtracts the chunk that was played before (in our case, 0.016 seconds before), multiplied by our _α_ value, from it, and then returns the corrected input chunk.

In [None]:
#!/usr/bin/env python
# PYTHON_ARGCOMPLETE_OK

import minimal
import buffer
import logging
import sounddevice as sd

minimal.parser.add_argument("-x", "--alpha", type=int, default=3, help="Alpha parameter for echo cancellation")
minimal.parser.add_argument("-y", "--delay", type=float, default=0.0165, help="Delay between speaker and microphone")

class EchoCancelling(buffer.Buffering):
    def __init__(self):
        super().__init__()

    def cancel_out_echo(self, indata):
        chunk_delay = int((minimal.args.frames_per_second * minimal.args.delay) / minimal.args.frames_per_chunk)

        subtr_chunk = self._buffer[(self.played_chunk_number + chunk_delay) % self.cells_in_buffer]
        subtr_chunk = subtr_chunk.reshape(minimal.args.frames_per_chunk, self.NUMBER_OF_CHANNELS) # for some reason the chunk is (2048,) instead of (1024, 2)

        # print(indata)
        # print(subtr_chunk)

        indata -= minimal.args.alpha * subtr_chunk

        return indata

    def _record_io_and_play(self, indata, outdata, frames, time, status):
        self.chunk_number = (self.chunk_number + 1) % self.CHUNK_NUMBERS
        echo_corr_indata = self.cancel_out_echo(indata)
        packed_chunk = self.pack(self.chunk_number, echo_corr_indata)
        self.send(packed_chunk)
        chunk = self.unbuffer_next_chunk()
        self.play_chunk(outdata, chunk)

try:
    import argcomplete  # <tab> completion for argparse.
except ImportError:
    logging.warning("Unable to import argcomplete (optional)")

if __name__ == "__main__":
    minimal.parser.description = __doc__

    try:
        argcomplete.autocomplete(minimal.parser)
    except Exception:
        logging.warning("argcomplete not working :-/")

    minimal.args = minimal.parser.parse_known_args()[0]

    if minimal.args.list_devices:
        print("Available devices:")
        print(sd.query_devices())
        quit()

    intercom = EchoCancelling()
    
    try:
        intercom.run()
    except KeyboardInterrupt:
        minimal.parser.exit("\nSIGINT received")
    finally:
        intercom.print_final_averages()