In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import numpy as np
np.set_printoptions(suppress=True)
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
from collections.abc import Iterable

In [2]:
SAMPLE_RATE = 16000
SAMPLE_WIDTH = 1./SAMPLE_RATE
FRAME_DURATION = 0.01
FRAME_WIDTH = int(SAMPLE_RATE * FRAME_DURATION)
MIN_FREQ = 1./FRAME_DURATION

def FREQ_BY_NUM(i):
    return MIN_FREQ * i

In [3]:
def FFT(frame):
    fft = np.abs(np.fft.rfft(frame).real)
    return fft

def CUTFFT(fft, max_freq):
    return fft[:int(max_freq*FRAME_DURATION)]

def SIGNAL_MEAN(signal):
    return np.mean(signal)

def SIGNAL_GEOM_MEAN(signal):
    return np.exp(np.sum(np.log(1e-11 + signal))/len(signal))

def FFT_MAX_FREQ(fft):
    return FREQ_BY_NUM(np.argmax(fft))

def SFM(fft):
    return 10 * np.log10(1e-11 + SIGNAL_GEOM_MEAN(fft)/(1e-11 + SIGNAL_MEAN(fft)))
    
def ENERGY(signal):
    return SAMPLE_WIDTH*np.sum(signal.real**2) #

def IDENTITY(fft):
    return fft

In [4]:
class Characteristic:
    def __init__(self, name, frame_func=None, frame_fft_func=None, update_kwargs=None, **kwargs):
        self.name = name
        self.value = []
        self.func = frame_func
        self.fft_func = frame_fft_func
        self.kwargs = kwargs
        self.upd_kw_func = update_kwargs
        
    def calc_frame(self, frame, frame_fft):
        if self.func is not None:
            return self.func(frame)
        else:
            return self.fft_func(frame_fft)
        
    def add_frame(self, frame, frame_fft):
        self.value.append(self.calc_frame(frame, frame_fft))
        self.kwargs['last'] = self.value[-1]
#         for i, v in kwargs.items():
#             self.kwargs.update({i: v(self.kwargs.get(i, None), self.value[-1])})
        
    def update_params(self, regime_counts):
        if self.upd_kw_func is not None:
            self.upd_kw_func(self.kwargs, regime_counts)

In [5]:
char_params = (
#     dict(name="FFT", frame_fft_func=IDENTITY), 
    dict(name="Energy", 
         frame_func=ENERGY, 
         min_thresh=40, 
         update_kwargs=(lambda kw, rg: kw.update(min=((kw['min']*rg['silence'] + kw['last'])/(rg['silence'] + 1)) if 'min' in kw and rg['silence'] > 0 else kw['last'])),
         d_thresh_func=(lambda kw: (kw['d_thresh'] if 'd_thresh' in kw else kw['min_thresh'])),
         thresh_func=(lambda kw: kw['d_thresh'] + kw['min'])),
    dict(name="MaxFreq", 
         frame_fft_func=FFT_MAX_FREQ, 
         min_thresh=185,
         update_kwargs=(lambda kw, rg: kw.update(min=kw['min'] if 'min' in kw else kw['last'])),
         d_thresh_func=lambda kw: kw['min_thresh'],
         thresh_func=lambda kw: kw['d_thresh'] + kw['min']),
    dict(name="SFM", 
         frame_fft_func=SFM, 
         min_thresh=5,
         d_thresh_func=lambda kw: kw['min_thresh'],
         update_kwargs=(lambda kw, rg: kw.update(min=kw['min'] if 'min' in kw else kw['last'])),
         thresh_func=lambda kw: kw['d_thresh'] + kw['min']))
char = [Characteristic(**kw) for kw in char_params]

In [6]:
from collections import defaultdict

regime_thresholds = defaultdict(int)
regime_thresholds.update({'voice': 5, 'silence': 10})

class Silence:
    def __init__(self, chars=char, regime_thresholds=regime_thresholds):
        self.chars = chars
        self.counts = defaultdict(int)
        self.regime = 'none'
        self.values = []
        self.thr = regime_thresholds
        
    def feed(self, frame, fft_frame):
        for c in self.chars:
            c.add_frame(frame, fft_frame)
        
    def set_regime(self, regime):
        if regime=='voice':
            self.values.append(1.)
        else:
            self.values.append(0.)
        if self.regime != regime:
            resp = {self.regime: self.counts[self.regime]}
            self.counts = defaultdict(int)
            self.counts[regime] += 1
            self.regime = regime
            return resp
        else:
            self.counts[regime] += 1

    def detect(self):
        for c in self.chars:
            c.update_params(self.counts)
        counter = 0
        for c in self.chars:
            v = c.kwargs.get('last', None)
            if v is not None:
                dtf = c.kwargs.get('d_thresh_func', None)
                if dtf is not None:
                    c.kwargs['d_thresh'] = dtf(c.kwargs)
                    tf = c.kwargs.get('thresh_func', None)
                    if tf is not None:
                        c.kwargs['threshold'] = tf(c.kwargs)
                        if v > c.kwargs['threshold']:
                            counter += 1
        if counter > 1:
            if self.regime != 'voice':
                self.counts['pre_voice'] += 1
            if self.counts['pre_voice'] > self.thr['voice']:
                self.counts['voice'] = self.counts['pre_voice']
                self.counts['pre_voice'] = 0
                self.set_regime('voice')
            else:
                self.set_regime(self.regime)
        else:
            if self.regime != 'silence':
                self.counts['pre_silence'] += 1
            if self.counts['pre_silence'] > self.thr['silence']:
                self.counts['silence'] = self.counts['pre_silence']
                self.counts['pre_silence'] = 0
                self.set_regime('silence')
            else:
                self.set_regime(self.regime)

In [7]:
class RTPlot:
    def __init__(self, silence, frame_duration=FRAME_DURATION, max_duration=10):
        self.silence = silence
        self.frame_duration = frame_duration
        self.max_duration = max_duration
        self.fig, self.ax = plt.subplots(len(silence.chars) + 1, 1, sharex=True)
        self.fig.show()
    
    def plot(self, current_time):
        start_time = max(0., current_time - self.max_duration)
        frame_start, frame_end = int(start_time/self.frame_duration), int(current_time/self.frame_duration)
        for i, v in enumerate(self.silence.chars):
            self.ax[i].cla()
            value = v.value[frame_start:frame_end]
            if len(value) == 0: continue
            if isinstance(value[0], Iterable) > 0:
                extent = [start_time, current_time, 0, 1]
                self.ax[i].imshow(np.swapaxes(np.array(value), 0, 1), cmap=plt.get_cmap('hot'), aspect='auto', origin='lower', interpolation=None, extent=extent, vmin=0)
            else:
                x = [start_time + self.frame_duration*j for j in range(len(value))]
                self.ax[i].plot(x, value, 'b-')
                thr = v.kwargs.get('threshold', None)
                if thr is not None: self.ax[i].plot([x[0], x[-1]], [thr, thr], 'r-')
            self.ax[i].set_xlabel("time, s")
            self.ax[i].set_ylabel(v.name)
        self.ax[-1].cla()
        self.ax[-1].set_xlabel('voice (=1), silence(=0)')
        value = self.silence.values[frame_start:frame_end]
        self.ax[-1].plot([start_time + self.frame_duration*j for j in range(len(value))], value, 'g-')
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()

In [8]:
import pyaudio
import time


p=pyaudio.PyAudio()
stream=p.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=FRAME_WIDTH)

DURATION = 30
PLOT_EVERY = 1
ESTIMATION_TIME = 0.3
# import wave
# wavefile = wave.open("tmp.wav", "wb")
# wavefile.setnchannels(1)
# wavefile.setsampwidth(p.get_sample_size(pyaudio.paInt16))
# wavefile.setframerate(RATE)

MAX_FREQ = 4000
VOICE_FREQS = (100, 1500)

silence = Silence(chars=char)
plot = RTPlot(silence, frame_duration=FRAME_DURATION, max_duration=10)
# plot.plot(0)


for i in range(int(DURATION/FRAME_DURATION)):
    current_time = i * FRAME_DURATION
    rawdata = stream.read(FRAME_WIDTH)
    #     wavefile.writeframes(rawdata)
    data = np.array(np.frombuffer(rawdata, dtype=np.int16), dtype='float32')
    fft = FFT(data)
    signal = CUTFFT(fft, MAX_FREQ)
    
#     if current_time < ESTIMATION_TIME:
#         frame_params = {"min": lambda mn, val: (val if (mn is None) else min(val, mn))}
#     else:
    frame_params = {}
    
    silence.feed(data, signal)
    silence.detect()
    
    if i % int(PLOT_EVERY/FRAME_DURATION) == 0:
        plot.plot(current_time)
    
stream.stop_stream()
stream.close()
p.terminate()