In [1]:
import wave
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
import os
import librosa

# Load the wav file using scipy


In [2]:
def wav_to_pcm_librosa(wav_file, pcm_file, sample_rate=44100):
    # Load the wav file using librosa
    data, sr = librosa.load(wav_file, sr=sample_rate, mono=True)  # Downmix to mono
    
    # Normalize and convert to 16-bit PCM format
    pcm_data = np.int16(data / np.max(np.abs(data)) * 32767)  # Scaling to 16-bit
    
    # Save to PCM file
    pcm_data.tofile(pcm_file)
    print(f"PCM file saved as {pcm_file}")

In [3]:
# Convert Here .wav to .pcm

In [4]:
def Get_Spectogram_Values(pcm_path):
    sample_rate = 16000  # or the actual sample rate of your file
    bit_depth = 16       # bits per sample (16 for standard PCM)
    n_channels = 1 
    with open(pcm_path, 'rb') as pcmfile:
        pcm_data = np.frombuffer(pcmfile.read(), dtype=np.int16)
    audio_data = pcm_data.astype(np.float32) / np.iinfo(np.int16).max

    # mel_spec = np.mean(librosa.feature.melspectrogram(y=audio_data, sr=sample_rate).T, axis=0)
    mel_spec = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate).T
    # mel_spec = mel_spec[:120]
    return mel_spec

In [5]:
def plot_mel_features(mel_spec):
    plt.figure(figsize=(12, 6))
    
    # Line plot
    plt.subplot(1, 2, 1)
    plt.plot(mel_spec)
    plt.title('Mel Spectrogram Features - Line Plot')
    plt.xlabel('Mel Frequency Band Index')
    plt.ylabel('Energy (dB)')
    
    # Heatmap visualization
    plt.subplot(1, 2, 2)
    plt.imshow([mel_spec], aspect='auto', cmap='viridis')
    plt.colorbar(label='Energy (dB)')
    plt.title('Mel Spectrogram Features - Heatmap')
    plt.xlabel('Mel Frequency Band Index')
    
    plt.tight_layout()
    plt.show()

In [6]:
def Get_and_Plot(pcm_path):
    # path = f"BG_PCM/{pcm_path}"
    spec_val = Get_Spectogram_Values(pcm_path)
    plot_mel_features(spec_val)