# Use internal microphone to plot waveforms (Part 3:  add spectrogram)
This script should make a continuous line plot of a sound that is picked up by the computer's microphone, a spectrum to show power, and a spectrogram to show how frequencies change in time.  It derives from https://www.youtube.com/watch?v=aQKX3mrDFoY

This is a follow-on example from My_first_audio notebook and My_second_audio notebook.

Note that if the speaker volume is too high, the graph will look broken (values exceeding +-128 will wrap)

This uses the python module "pyaudio", which also needed "portaudio", as well as numpy and scipy (for fft)

First, as always, we import the needed packages; in this case pyaudio to grab sound from the microphone (either internal or external), struct to convert the digital sound from packed binary to integer, numpy for array handling, scipy for fft (making spectrum) and matplotlib to plot

In [1]:
# import standard stuff
import numpy as np
import time

# import audio package
import pyaudio

# import struct to convert audio binary to integers
import struct

# import graphing modules
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from tkinter import TclError

# import modules for spectrum calculations
from scipy.fftpack import fft
from matplotlib.mlab import window_hanning,specgram
from matplotlib.colors import LogNorm

This "backend" will allow plots to come up outside the jupyter browser (as popups)

In [2]:
%matplotlib tk

Here we define a segment of sound to process.  This essentially takes a continuous time-series and makes it a set of finite-length signals.  These will be the number of samples per frame.  Here we use 1024 * 4, or 4096.  The format here is 16-bit integer, channel is 1 for the machines internal mic being mono (not stereo), and the rate is a somewhat standard 44.1 kHz

In [3]:
CHUNK = 1024 * 4
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
SAMPLES_PER_FRAME = 10

Next, we create a stream using the variables defined above.  Here I'm not sure why we're using integer 16, only later to change to integer 8

In [4]:
# define class instance "p" 
p = pyaudio.PyAudio()

# define object "stream" based on "p"
stream = p.open(
    format = FORMAT,
    channels = CHANNELS,
    rate = RATE,
    input = True,
    output = True,
    frames_per_buffer = CHUNK
)

This next code block is where we define all the graph parameters.  We will have three graphs on a page, the first one (waveform) will extend across the top row; the next two will be below.

In [5]:
# define our graph object "fig"
fig = plt.figure(constrained_layout=True)

# setup three graphs; the first is in the first row and extends over two columns
# the next two are in the second row, one in the first column the other in the second column
gs = GridSpec(2,2, figure=fig)
ax1 = fig.add_subplot(gs[0,0:2])
ax2 = fig.add_subplot(gs[1,0])
ax3 = fig.add_subplot(gs[1,1])

# define the parameters for the waveform time-series
x_time = np.linspace(0,CHUNK/RATE,CHUNK)
line, = ax1.plot(x_time, np.random.rand(CHUNK), '-', lw=1)
ax1.set_title('Audio Waveform')
ax1.set_xlabel('time (seconds)')
ax1.set_ylabel('amplitude')
ax1.set_xlim(0,CHUNK/RATE)
ax1.set_ylim(-150, 150)
#plt.setp(ax1, xticks=[0, CHUNK/2, CHUNK, 3* CHUNK/2, 2*CHUNK], yticks=[-128, 0, 128])

# define parameters for spectrum plot
x_freq = np.linspace(0, RATE, CHUNK)
line_fft, = ax2.semilogx(x_freq, np.random.rand(CHUNK), '-', lw=1)
ax2.set_title('Audio Spectrum')
ax2.set_xlabel('frequency (cycles/second [Hz])')
ax2.set_ylabel('power')
ax2.set_xlim(20,RATE/2)
ax2.set_ylim(0,0.25)
#plt.setp(ax2, xticks=np.arange(0, 1500, step=100), yticks=[0,0.05,0.1,0.15,0.2,0.25])

# get the spectrogram information
# method1
#arr2D,freqs,bins = specgram(x_time,window=window_hanning,
#                                Fs = RATE,NFFT=1024,noverlap=512)
#extent = (bins[0],bins[-1]*SAMPLES_PER_FRAME,freqs[-1],freqs[0])
#im = plt.imshow(arr2D,aspect='auto',extent = extent,interpolation="none",
#                cmap = 'jet',norm = LogNorm(vmin=.01,vmax=1))
# method2
x_spec = np.linspace(0,CHUNK,RATE)
Pxx, freqs, bins, im = ax3.specgram(x_spec, NFFT=2048, Fs=RATE, noverlap=256)
ax3.set_title('Real Time Spectrogram')
ax3.set_xlabel('Time (s)')
ax3.set_ylabel('Frequency (Hz)')
ax3.set_ylim(0,1000)
#ax3.invert_yaxis()
#ax3.colorbar()

plt.show(block=False)

In [6]:
len(x_spec),len(bins),len(freqs),Pxx.size,(len(x_spec))

(44100, 24, 1025, 24600, 44100)

In [7]:
print('stream started')

stream started


In [8]:
frame_count = 0
start_time = time.time()

In [9]:
while True:
# the data are read, one "CHUNK" at a time; the return will be raw bytes (8 bits)    
    data = stream.read(CHUNK,exception_on_overflow=False)
# the raw bytes are now converted to 16-bit signed integers (-128 to 128); and note we
# double the size of the array, so use 2*CHUNK
    data_int = struct.unpack(str(2*CHUNK)+'B',data)
# finally, we take every other point
    data_np = np.array(data_int,dtype='b')[::2]
    
# frist we plot the raw data as a waveform
    line.set_ydata(data_np)
# second we plot the spectrum
    y_fft = fft(data_np)
    line_fft.set_ydata(np.abs(y_fft)*2/(256*CHUNK))
# third we plot the spectrogram
#    arr2D,freqs,bins = specgram(data_np,window=window_hanning,
#                                Fs = 2, NFFT=2048,noverlap=2)
#    plt.imshow(arr2D,aspect='auto',extent = extent,interpolation="none",
#             cmap = 'jet',norm = LogNorm(vmin=.01,vmax=100))
    Pxx, freqs, bins, im = ax3.specgram(data_np, NFFT=128, Fs=RATE, noverlap=0)
#    im.set_data(Pxx)
    ax3.set_xlim(0,CHUNK/RATE)
    ax3.set_ylim(0,10000)

    try:
        fig.canvas.draw()
        fig.canvas.flush_events()
        frame_count += 1

    except TclError:
        frame_rate = frame_count / (time.time() - start_time)
        print('stream stopped')
        print('average frame rate = {:0f} FPS'.format(frame_rate))
        break


  Z = 10. * np.log10(spec)
  dv = (np.float64(self.norm.vmax) -
  np.float64(self.norm.vmin))
  a_min = np.float64(newmin)
  a_max = np.float64(newmax)
  dtype = np.min_scalar_type(value)
  data = np.array(a, copy=False, subok=subok)


stream stopped
average frame rate = 1.712777 FPS
