# Въведение в линейното предсказване и речевите сигнали

#### Import required libraries

In [None]:
import numpy as np
import scipy.io.wavfile as wavfile
from scipy.linalg import toeplitz
import matplotlib.pyplot as plt
from IPython.display import Audio

#### Load and Visualize a Speech Signal

##### **Step 1. Load the Speech Signal**
Load the .wav file using scipy.io.wavfile.read(). If the signal is stereo, it's converted to mono by averaging the channels. <br>
***Tip:*** To open a WAV file you need to access its directory correctly: `speech_samples/FILE_NAME.wav`

##### **Step 2. Analyze Signal Properties**
Calculate the duration of the audio signal and print out key properties such as the sampling rate and the total number of samples. This information provides insights into the characteristics of the audio.

##### **Step 3. Plot the Waveform**
Create a time array corresponding to the signal samples to visualize amplitude variations over time. The waveform visually represents the audio characteristics.

##### **Step 4. Playback**
Playback the speech signal directly within the Jupyter notebook using the `IPython.display.Audio` class, allowing for listening to the speech without needing external players.

In [None]:
# Load the speech signal
file_path = 'speech_samples/speech_test_01.wav'  # Ensure the path is correct
sampling_rate, signal = wavfile.read(file_path)

# Convert stereo to mono if necessary
if len(signal.shape) > 1:
    signal = np.mean(signal, axis=1)

# Analyze signal properties
duration = len(signal) / sampling_rate
print(f'Sampling Rate: {sampling_rate} Hz')
print(f'Total Samples: {len(signal)}')
print(f'Duration: {duration:.2f} seconds')

# Plot the waveform
time = np.linspace(0, duration, len(signal))
plt.figure(figsize=(12, 4))
plt.plot(time, signal)
plt.title('Speech Signal Waveform')
plt.xlabel('Time (seconds)')
plt.ylabel('Amplitude')
plt.grid()
plt.show()

# Playback the signal
Audio(data=signal, rate=sampling_rate)

#### Compare LPC and FFT Spectra

##### **Step 1. Load the Speech Signal**
Load the .wav file using scipy.io.wavfile.read(). If the signal is stereo, it's converted to mono by averaging the channels. <br>
***Tip:*** To open a WAV file you need to access its directory correctly: `speech_samples/FILE_NAME.wav`

##### **Step 2. Select a Frame**
Select a small frame of 256 samples from the speech signal for analysis. This frame represents a short segment of the audio to focus on.

##### **Step 3. FFT Spectrum Calculation**
Compute the FFT of the selected frame using `np.fft.fft()`. Calculate the magnitude of the FFT and extract the first 256 frequency bins, which correspond to the positive half of the spectrum.

##### **Step 4. LPC Spectrum Calculation**
Using the autocorrelation method, compute the LPC coefficients. Solve a linear system to derive these coefficients. Calculate the frequency response of the LPC model over the same frequency range as the FFT and compute the magnitude.

##### **Step 5. Plot**
Plot both the FFT and LPC spectra on the same graph for visual comparison, allowing for an easy assessment of the differences and similarities between the two methods.

##### **Step 6. Playback**
Playback the selected speech signal segment directly within the Jupyter notebook using the `IPython.display.Audio` class.

In [None]:
# Load the speech signal
file_path = 'speech_samples/speech_test_02.wav'  # Ensure the path is correct
sampling_rate, signal = wavfile.read(file_path)

# Convert stereo to mono if necessary
if len(signal.shape) > 1:
    signal = np.mean(signal, axis=1)

# Select a frame for analysis
frame_start = 1000  # Starting index for the frame
frame_size = 256    # Number of samples in the frame
frame = signal[frame_start:frame_start + frame_size]

# FFT Spectrum Calculation
fft_spectrum = np.fft.fft(frame, n=frame_size)
fft_magnitude = np.abs(fft_spectrum)[:frame_size // 2]  # Positive half of the spectrum
frequencies = np.fft.fftfreq(frame_size, d=1/sampling_rate)[:frame_size // 2]

# LPC Spectrum Calculation
# Auto-correlation
r = np.correlate(frame, frame, mode='full')
r = r[len(r)//2:len(r)//2 + 12 + 1]  # Get the first order + 1 lags

# Solve for LPC coefficients using Levinson-Durbin recursion
a = np.linalg.solve(toeplitz(r[:-1]), -r[1:])

# Calculate LPC frequency response
w = np.linspace(0, np.pi, 1024)
H_lpc = np.zeros_like(w, dtype=complex)
for i, omega in enumerate(w):
    H_lpc[i] = 1 / (1 + sum(a[k] * np.exp(-1j * k * omega) for k in range(1, len(a))))

# Plot both spectra
plt.figure(figsize=(12, 6))

# Plot FFT Spectrum
plt.subplot(2, 1, 1)
plt.plot(frequencies, 20 * np.log10(fft_magnitude), label='FFT Spectrum')
plt.title('FFT Spectrum')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude (dB)')
plt.grid()
plt.legend()

# Plot LPC Spectrum
plt.subplot(2, 1, 2)
plt.plot(np.linspace(0, sampling_rate / 2, len(H_lpc)), 20 * np.log10(np.abs(H_lpc)), label='LPC Spectrum', color='orange')
plt.title('LPC Spectrum')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude (dB)')
plt.grid()
plt.legend()

plt.tight_layout()
plt.show()

# Playback the selected frame
Audio(data=frame, rate=sampling_rate)

#### Formant Extraction Using LPC Coefficients

##### **Step 1. Load the Speech Signal**
We load the .wav file using scipy.io.wavfile.read(). If the signal is stereo, it's converted to mono by averaging the channels.<br>
***Tip:*** To open a WAV file you need to access its directory correctly: `speech_samples/FILE_NAME.wav`

##### **Step 2. Select a Frame for Analysis**
A specific frame of 256 samples is extracted from the speech signal, starting at index 1000. This frame will be analyzed for LPC coefficients.

##### **Step 3. Compute LPC Coefficients**
The autocorrelation function of the selected frame is computed. LPC coefficients are derived by solving a linear system using Levinson-Durbin recursion, which processes the autocorrelation values.

##### **Step 4. Calculate Roots of the LPC Polynomial**
The roots of the LPC polynomial are computed using the `np.roots()` function, which represents the resonant frequencies or formants of the vocal tract.

##### **Step 5. Calculate Frequencies and Damping**
The frequencies of the formants are determined by calculating the angle of the roots and converting it to Hertz. The damping factors are derived from the absolute values of the roots.

##### **Step 6. Filter Non-Positive Frequencies**
Any non-positive formant frequencies are filtered out to ensure that only valid frequencies are retained for analysis.

##### **Step 7. Plot Formants in the Complex Plane**
The valid formant frequencies are visualized in the complex plane using a scatter plot. The real and imaginary parts of the roots are plotted to show their distribution.

##### **Step 8. Playbacks**
The selected frame of the speech signal is played back in the Jupyter Notebook for auditory confirmation.

In [None]:
# Load the speech signal
file_path = 'speech_samples/speech_test_02.wav'  # Ensure the path is correct
sampling_rate, signal = wavfile.read(file_path)

# Convert stereo to mono if necessary
if len(signal.shape) > 1:
    signal = np.mean(signal, axis=1)

# Select a frame for analysis
frame_start = 1000  # Starting index for the frame
frame_size = 256    # Number of samples in the frame
frame = signal[frame_start:frame_start + frame_size]

# Compute LPC coefficients
# Auto-correlation
r = np.correlate(frame, frame, mode='full')
r = r[len(r)//2:len(r)//2 + 12 + 1]  # Get the first order + 1 lags

# Solve for LPC coefficients using Levinson-Durbin recursion
a = np.linalg.solve(toeplitz(r[:-1]), -r[1:])

# Compute the LPC polynomial roots
roots = np.roots(np.hstack(([1], a)))

# Calculate the frequencies and damping of the formants
frequencies = np.angle(roots) * (sampling_rate / (2 * np.pi))  # Formant frequencies in Hz
dampings = np.abs(roots)  # Damping factors

# Filter out the non-positive frequencies
valid_indices = frequencies > 0
formant_frequencies = frequencies[valid_indices]
formant_dampings = dampings[valid_indices]
formant_roots = roots[valid_indices]

# Display formant frequencies
print("Formant Frequencies (Hz):", formant_frequencies)

# Plot formants in the complex plane
plt.figure(figsize=(8, 6))
plt.scatter(np.real(formant_roots), np.imag(formant_roots), marker='o', color='red')
plt.title('Formant Frequencies in Complex Plane')
plt.xlabel('Real Part')
plt.ylabel('Imaginary Part')
plt.axhline(0, color='black', linewidth=0.5, linestyle='--')
plt.axvline(0, color='black', linewidth=0.5, linestyle='--')
plt.xlim(-1, 1)  # Adjust limits for better visualization
plt.ylim(-1, 1)  # Adjust limits for better visualization
plt.grid()
plt.show()

# Playback the selected frame
Audio(data=frame, rate=sampling_rate)
