# Profiling of code

Sometimes it will be more proper with lprun and the other one just testing and plotting the performance according to the size of the documents.

In [2]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
from pyramidman.speech_commands import SpeechCommandsHandler
from pyramidman.Seshat import Transcriber
from pyramidman.unwrapper import unwrap
from pyramidman.meeting_facilitator import MeetingFacilitator
from pyramidman.email import EmailConfig
from pyramidman.noisereduce_optimized import reduce_noise_optimized
from pyramidman.listener import listen
from pyramidman.basic_audio_IO import record_audio

Fee pyramidman.Seshat




## Initialize models and load data

In [3]:
email_config = EmailConfig()

meeting_facilitator = MeetingFacilitator("my_meeting")
meeting_facilitator.set_automatic_default_transcriber()
meeting_facilitator.set_email_config(email_config)
meeting_facilitator.set_default_speech_command_handler()

Calibrating microphone for 1 seconds.
Calibrated energy threshold:  1210.4821236134335


In [4]:
meeting_facilitator.transcriber.set_automatic_noise_filter(n_fft=2048*1,
                                   win_length=2048*1,
                                   hop_length=512*2)

## 1. Data adquisition thread

Settings: 48.000 rate and 2048 chunks.

From playing around with the read function:
- With no data preprocessing, 98.4 of the time is invested in waiting for the chunck. 1.2 for reading the buffer.
- With data preprocessing, then 70% is waiting, and 30 is to process the chunks

In [5]:
def listen_profiling(audio_filter = lambda x:x):
    with meeting_facilitator.transcriber.microphone as source:
        r = meeting_facilitator.transcriber.recognizer
        audio = listen(r, source, timeout = 0, phrase_time_limit=5, chunk_preprocessing= audio_filter)

In [6]:
%lprun -T ../profiling/profiling_time_listening -f listen listen_profiling() 


*** Profile printout saved to text file '../profiling/profiling_time_listening'. 


In [7]:
%lprun -T ../profiling/profiling_time_listening_noise_reduce_f -f reduce_noise_optimized listen_profiling(meeting_facilitator.transcriber.audio_filter) 


*** Profile printout saved to text file '../profiling/profiling_time_listening_noise_reduce_f'. 


In [8]:
%lprun -T ../profiling/profiling_time_listening_noise_reduce -f listen listen_profiling(meeting_facilitator.transcriber.audio_filter) 


*** Profile printout saved to text file '../profiling/profiling_time_listening_noise_reduce'. 


In [9]:
%lprun -T ../profiling/profiling_time_listening_noise_reduce -f listen listen_profiling(meeting_facilitator.transcriber.audio_filter) 


*** Profile printout saved to text file '../profiling/profiling_time_listening_noise_reduce'. 


## 2. Transcription

In [11]:
filepath  = "../audios/standard/english.wav"
filepath  = "../audios/standard/sep.wav"

In [12]:
record_audio(meeting_facilitator.transcriber.audio_params, filename = filepath, seconds = 10)

Recording
Finished recording


In [80]:
meeting_facilitator.transcriber.transcribe(filepath, True)

{'sentence': 'hello my friend that',
 'words': [{'word': 'hello', 'start_time ': 0.0, 'duration': 5.42},
  {'word': 'my', 'start_time ': 5.44, 'duration': 0.34},
  {'word': 'friend', 'start_time ': 5.8, 'duration': 1.86},
  {'word': 'that', 'start_time ': 8.14, 'duration': 0.3}],
 'confidence': -17.595313706775393}

In [14]:
from pyramidman.deepspeech_tools import transcribe

In [15]:
%lprun -T ../profiling/transcriber -f transcribe  meeting_facilitator.transcriber.transcribe(filepath)


*** Profile printout saved to text file '../profiling/transcriber'. 


In [16]:
%lprun -T ../profiling/transcriber -f meeting_facilitator.transcriber.transcribe  meeting_facilitator.transcriber.transcribe(filepath)


*** Profile printout saved to text file '../profiling/transcriber'. 


## 3. Command processing

In [37]:
from multiprocessing import Process
import os

from pyramidman.basic_audio_IO import play_audio
from pyramidman.audio_parameters import AudioParameters

def info(title):
    print(title)
    print('module name:', __name__)
    print('parent process:', os.getppid())
    print('process id:', os.getpid())
    audio_params = AudioParameters()
    audio_params.set_sysdefault_microphone_index()
    audio_params.set_default_input_parameters()
    filepath  = "../audios/standard/sep.wav"
    play_audio(audio_params, filepath)
    # re = self.transcriber(audio)

def f(name):
    info('function f')
    print('hello', name)


info('main line')
p = Process(target=f, args=('bob',))
p.start()
p.join()

main line
module name: __main__
parent process: 3661
process id: 24721


KeyboardInterrupt: 

In [34]:
unwrap(audio_params)

NameError: name 'audio_params' is not defined

In [39]:
from pyramidman.subprocess import call_subprocess

In [76]:
output, error, return_code = call_subprocess(f"python ../scripts/transcriber.py --audio {filepath}")

In [77]:
output

'{"sentence": "hello my friend that", "words": [{"word": "hello", "start_time ": 0.0, "duration": 5.42}, {"word": "my", "start_time ": 5.44, "duration": 0.34}, {"word": "friend", "start_time ": 5.8, "duration": 1.86}, {"word": "that", "start_time ": 8.14, "duration": 0.3}], "confidence": -17.595313706775393}\n'

In [78]:
json.loads(output)

{'sentence': 'hello my friend that',
 'words': [{'word': 'hello', 'start_time ': 0.0, 'duration': 5.42},
  {'word': 'my', 'start_time ': 5.44, 'duration': 0.34},
  {'word': 'friend', 'start_time ': 5.8, 'duration': 1.86},
  {'word': 'that', 'start_time ': 8.14, 'duration': 0.3}],
 'confidence': -17.595313706775393}

In [75]:
import json
import pickle

In [74]:
meeting_facilitator.transcriber.transcribe(filepath, 0)

{'sentence': 'hello my friend that',
 'words': [{'word': 'hello', 'start_time ': 0.0, 'duration': 5.42},
  {'word': 'my', 'start_time ': 5.44, 'duration': 0.34},
  {'word': 'friend', 'start_time ': 5.8, 'duration': 1.86},
  {'word': 'that', 'start_time ': 8.14, 'duration': 0.3}],
 'characters': <Swig Object of type 'Metadata *' at 0x7f92f93ce270>,
 'confidence': -17.595313706775393}