# LPCNet
[![Generic badge](https://img.shields.io/badge/GitHub-LPCNet-9cf.svg)][github]
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)][notebook]  
LPCNet inference demo  

[github]:https://github.com/tarepan/LPCNet
[notebook]:https://colab.research.google.com/github/tarepan/LPCNet/blob/master/LPCNet.ipynb

## Setup

In [None]:
!apt install autoconf automake libtool
!git clone https://github.com/tarepan/LPCNet.git
%cd LPCNet

## Inference

### Setup

Build

In [None]:
# Step 1 - Env
%env CFLAGS=-Ofast -g -march=native
!echo $CFLAGS

# Step 2 - Build
!./autogen.sh    # Latest model download & `autoreconf`
!./configure     # Run the generated configure script
!make

Input preparation (wav file => pcm blob file)

In [None]:
import librosa
import numpy as np


# ========= Change this wave path =========
p = "../test_02.wav"
# =========================================


def clamp_int16(x: int) -> int:
  """Clamp int within int16.

  `numpy.short`: 16-bit signed integer, {x∈ℤ|-32768≦x≦32767}
  """
  return min([max([x, -32768]), 32767])

def audio_sample_fp32_to_int16(sample: float) -> int:
  """[-1, 1) to [-2^15, 2^15)"""
  return clamp_int16(round(2**15 * sample))

def audio_fp32_to_int16(audio):
  return np.array([audio_sample_fp32_to_int16(s) for s in audio], dtype=np.int16)


# `input.pcm` should be 16bit/16kHz PCM
audio_float, sr = librosa.load(p, sr=16000, mono=True)
audio_pcm = audio_fp32_to_int16(audio_float)
print(audio_pcm)
audio_pcm.tofile("./input.pcm")

### Demo - Speech Compression
wave -> (compression) -> codes -> (decompression) -> wave

In [None]:
# Encode `input.pcm` (16bit/16kHz PCM, machine endian)
#   to `compressed.bin` (8 bytes per 40-ms packet, raw, no header)
!./lpcnet_demo -encode input.pcm compressed.bin

# Decode `compressed.bin` to `output.pcm` (16bit/16kHz PCM)
!./lpcnet_demo -decode compressed.bin output.pcm


from IPython.display import Audio, display

i = np.fromfile("./input.pcm",  dtype=np.int16)
o = np.fromfile("./output.pcm", dtype=np.int16)

print("Before:")
display(Audio(i,   rate=16000))
print("After:")
display(Audio(o,   rate=16000))

### Demo - Speech Synthesis
wave -> (analysis) -> uncompressed_feature -> (synthesis) -> wave

In [None]:
import time


# (maybe) Feature-rize
!./lpcnet_demo -features  input.pcm uncompressed.bin

# Synthesis
t_start = time.perf_counter()
!./lpcnet_demo -synthesis uncompressed.bin output_resynth.pcm
t_end = time.perf_counter()
t_sec = t_end - t_start


from IPython.display import Audio, display

i = np.fromfile("./input.pcm",          dtype=np.int16)
o = np.fromfile("./output_resynth.pcm", dtype=np.int16)

print("Before:")
display(Audio(i,   rate=16000))
print("After:")
display(Audio(o,   rate=16000))


sr=16000
audio_length_sec = o.shape[0] / sr
print(f"time: {round(t_sec, 2)} sec for {round(audio_length_sec, 2)} sec audio")
print(f"RTF: {round(t_sec/audio_length_sec, 2)}")

## -

In [None]:
def clamp_int16(x: int) -> int:
  return min([max([x, -32768]), 32767])

print("test start...")
assert clamp_int16(-32769) == -32768, "clamp_int16 work wrong"
assert clamp_int16(-32768) == -32768, "clamp_int16 work wrong"
assert clamp_int16(-32767) == -32767, "clamp_int16 work wrong"
assert clamp_int16(     0) ==      0, "clamp_int16 work wrong"
assert clamp_int16(     1) ==      1, "clamp_int16 work wrong"
assert clamp_int16( 32766) ==  32766, "clamp_int16 work wrong"
assert clamp_int16( 32767) ==  32767, "clamp_int16 work wrong"
assert clamp_int16( 32768) ==  32767, "clamp_int16 work wrong"
print("test finished.")