# <center> <span style="color:GREEN"> NUMBER SYSTEM USING ANN

In [None]:
from IPython.display import clear_output
import numpy as np                        # linear algebra library
import pandas as pd                       # data frames processing
import matplotlib.pyplot as plt           # visualization library
import seaborn as sn                      # visualization library
import math
import keras
from sys import byteorder
from array import array
from struct import pack




# Audio processing libraries
import scipy.io.wavfile as wav
import IPython.display as ipd
import librosa
import librosa.display
import scipy.signal as signal
import noisereduce as nr
from IPython.display import Audio, IFrame, display
from scipy.signal import hilbert
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv
import soundfile as sf
import malaya_speech
import webrtcvad
from webrtcvad import Vad
from malaya_speech import Pipeline
import speech_recognition as spreg
import pyaudio
import wave
import threading
from python_speech_features import mfcc

#simulation library
import pygame



In [None]:
model = keras.models.load_model('numbers_ANN.h5')   #loading model

In [None]:
classes = ["eight","five","four","nine","one","seven","six","three","two","zero"]   # output classes

In [None]:
sr=16000

In [None]:
def audio_preprocessing(wave):
    """ this function takes file.wav and process it through the following steps:
        1. reduce bakground noise.
        2. silence removal"spectral gating".   ################
        3. padding with zeros to have a constant vector length.
     Input: audio_file.wav
     Output: sampled signal"""
    samples = nr.reduce_noise(y=wave, sr=sr,stationary=True)
    vad = malaya_speech.vad.webrtc()
    y_= malaya_speech.resample(samples, sr, 16000)
    y_ = malaya_speech.astype.float_to_int(y_)
    frames = malaya_speech.generator.frames(samples, 30, sr)
    frames_ = list(malaya_speech.generator.frames(y_, 30, 16000, append_ending_trail = False))
    frames_webrtc = [(frames[no], vad(frame)) for no, frame in enumerate(frames_)]
    y_ = malaya_speech.combine.without_silent(frames_webrtc)
    if len(y_)>=19999:
            y_=y_[:19999]
    size = (1*sr+4000)-y_.shape[0]
    zero = np.zeros(size)
    signal = np.concatenate((y_,zero))
    return signal

In [None]:
def MFCCs(signal):
    """this function extract MFCCs from samples signal"""
    mfcc_feat = mfcc(signal, sr, winlen=256/sr, winstep=256/(2*sr), numcep=13, nfilt=26, nfft=256,
                 lowfreq=0, highfreq=sr/2, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=np.hamming)
    audio = np.transpose(mfcc_feat)
    return audio

In [None]:
THRESHOLD = 1200          # silence threshold
CHUNK_SIZE = 1024         # frame size
FORMAT = pyaudio.paInt16
RATE = 16000              # sample rate

def is_silent(snd_data):
    "Returns 'True' if below the 'silent' threshold"
    return max(snd_data) < THRESHOLD

def normalize(snd_data):
    "Average the volume out"
    MAXIMUM = 16384
    times = float(MAXIMUM)/max(abs(i) for i in snd_data)

    r = array('h')
    for i in snd_data:
        r.append(int(i*times))
    return r


def record():
    """
    Record a word or words from the microphone and 
    return the data as an array of signed shorts.

    Normalizes the audio, trims silence from the 
    start and end, and pads with 0.128 seconds of 
    blank sound to make sure VLC et al can play 
    it without getting chopped off.
    """
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)

    num_silent = 0
    snd_started = False

    r = array('h')

    while 1:
        # little endian, signed short
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)

        silent = is_silent(snd_data)

        if silent and snd_started:
            num_silent += 1
        elif not silent and not snd_started:
            snd_started = True

        if snd_started and num_silent > 2:
            break
        
    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()
    r = normalize(r)
    return sample_width, r

def record_to_file(path):
    "Records from the microphone and outputs the resulting data as output file"
    
    sample_width, data = record()
    data = pack('<' + ('h'*len(data)), *data)

    wf = wave.open(path, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(sample_width)
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()


In [None]:
def play_audio(sound_file):
    """this function plots audio wave in jupyter notebook """
    wave, sr = sf.read(sound_file, dtype='float32')  
    sd.play(wave, sr)
    status = sd.wait()  # Wait until file is done playing

In [None]:
def plot_audio(sound_file):
    """this function plots audio wave in jupyter notebook """
    wave, sr = sf.read(sound_file, dtype='float32')  
    samples = wave
    time = np.linspace(0, len(samples - 1) / sr, len(samples - 1))
    plt.plot(time,samples)  
    plt.title("Voice Signal")
    plt.xlabel("Time [seconds]")
    plt.ylabel("Voice amplitude")
    plt.show()

In [None]:
def pred_func(sound_file):
    """this function takes sound file and call processing function to 
    adjust it then pass it to the MFCC function to be converted  then 
    passed to the model for prediction."""
    global flag
    wave, sr = sf.read(sound_file, dtype='float32')  
    fs = sr
    signal=audio_preprocessing(wave)
    audio=MFCCs(signal)
    d1 = np.array(audio.shape[0])
    d2 = np.array(audio.shape[1])
    d = d1*d2
    r = []
    r = model.predict(audio.reshape(1,d))
    ind = np.argmax(r)
    output=classes[ind]
    print('**** predicted output is: ',classes[ind], '****')
    flag = 1
    return output,flag

In [None]:
def run_func():
    """this fuction runs in the background continously to detect audio"""
    while True:
        global o_p
        global flag
        print("speak now")
        record_to_file('demo.wav')
        sound_file="demo.wav"
        o_p,flag=pred_func(sound_file)   


In [None]:
game=threading.Thread(target=run_func)   # intialize thread to run in the background
game.start()    #speak zeo here

## Implementation of Numbers simulation inside notebook

In [None]:
# initialize variables
temp=''
text_t=['0','1','2','3','4','5','6','7','8','9']  #output classes
#flag=0
# infinite loop
while True:
    if flag == 1:
        if o_p == 'zero' :
            u=0
        elif o_p == 'one':
            u=1
        elif o_p == 'two':
            u=2
        elif o_p == 'three':
            u=3
        elif o_p == 'four':
            u=4
        elif o_p == 'five':
            u=5
        elif o_p == 'six':
            u=6
        elif o_p == 'seven':
            u=7
        elif o_p == 'eight':
            u=8
        elif o_p == 'nine':
            u=9
        flag = 0   
    
        temp=temp+text_t[u]
        clear_output()
        
        fig = plt.figure(figsize=(20,2))
        ax = fig.add_subplot()
        fig.subplots_adjust(top=0.85)
        ax.axis([0, 10, 0, 10])
        ax.text(0,1 , temp , fontsize=100)
        plt.title(str(temp))
        plt.show()
        
        