# Analiza i przetwarzanie dźwięku - Projekt 1


In [5]:
import wave
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [42]:
def read_wave(path):
    with wave.open(path, 'rb') as wav_file:
        frame_rate = wav_file.getframerate()
        n_samples = wav_file.getnframes()
        samples = wav_file.readframes(n_samples)
        audio = np.frombuffer(samples, dtype=np.int16).astype(np.int32)
    audio_time  = n_samples/frame_rate #in seconds
    display(Audio(data=audio, rate=frame_rate))
    return audio, frame_rate, audio_time

def split_to_frames(audio, frame_rate, percent_frame_size=0.1, percent_hop_length=0.5):
    # default frame_size is 10% of the audio and default frame overlap is 50% overlap
    
    # naming convention: n_ - number of frames, N_ - number of samples in a frame
    # convention is consistent with "Cechy sygnalu audio w dziedzinie czasu.pdf"
    frame_size = int(percent_frame_size * frame_rate)
    hop_length = int(percent_hop_length*percent_frame_size * frame_rate)
    frames = []
    for i in range(0, len(audio), hop_length):
        frame = audio[i:i+frame_size]
        if len(frame) == frame_size:
            frames.append(frame)
    frames = np.stack(frames)
    n_ = frames.shape[0]
    N_ = frames.shape[1]
    return frames, n_, N_


def plot_audio(audio,audio_time):
    times = np.linspace(0, audio_time, num=audio.shape[0])
    fig = make_subplots(rows=1, cols=1)
    fig.add_trace(
        go.Scatter(x=times, y=audio, mode='lines'),
    )

    fig.update_layout(
        title="Audio Waveform",
        xaxis_title="Time (s)",
        yaxis_title="Amplitude"
    )
    fig.show()

In [43]:
# cechy sygnału audio w dziedzinie czasu na poziomie klipu

def get_avg_amplitue(audio):
    return np.mean(np.abs(audio))


def get_vstd(audio):
    # vstd - volume standard deviation normalized by the maximum value
    return np.std(audio)/np.max(np.abs(audio))


def get_vdr(audio):
    # vdr - volume dynamic range 
    return (np.max(audio) - np.min(audio))/np.max(audio)

In [44]:
path = 'recordings/4_10/Znormalizowane/zdanie_2.wav'
audio, frame_rate, audio_time = read_wave(path)
frames, n_, N_ = split_to_frames(audio, frame_rate, percent_frame_size=0.01, percent_hop_length=0.3)

In [19]:
plot_audio(audio,audio_time)

In [41]:
print(f"Avarege amplitude: {np.format_float_positional(get_avg_amplitue(audio), precision=1)}")
print(f"VSTD: {np.format_float_positional(get_vstd(audio),precision=4)}")
print(f"VDR: {np.format_float_positional(get_vdr(audio),precision=4)}")

Avarege amplitude: 1275.6
VSTD: 0.1156
VDR: 1.7744


## Frequency Spectrum

In [None]:
plt.figure(figsize=(10, 6))
plt.specgram(audio, Fs=frame_rate)
plt.title('Singal spectrogram')
plt.ylabel('Frequency (Hz)')
plt.xlabel('Time (s)')
plt.xlim(0, audio_time)
plt.colorbar()
plt.show()