# Script generator

Takes separated audio channels and generates a time-stamped script

In [None]:
import wave
import json
import numpy as np

from vosk import Model, KaldiRecognizer, SetLogLevel
from utils import Word as custom_Word

from utils import io_utils

In [None]:
model = Model('models/vosk-model-en-us-0.22')

In [None]:
proj_tree = io_utils.ProjectTree('../projects')

for project in proj_tree.get_projects():
    audio_src = project.get_media('audio/wav/mix/ch0_inv_ch1_added')
    script_csv_dst = project.make_media('script/full/csv')
    #script_txt_dst = project.make_media('script/full/txt')
    
    print(project.get_name() + ':')
    
    for audio_src_path, file_name in zip(audio_src.get_file_paths(), audio_src.get_file_names()):
        print('\tGenerating script for ' + audio_src_path + '...')
        
        wav = wave.open(audio_src_path)
        rec = KaldiRecognizer(model, wav.getframerate())
        rec.SetWords(True)
        
        results = []
        
        while True:
            data = wav.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                part_result = json.loads(rec.Result())
                results.append(part_result)
                
        part_result = json.loads(rec.FinalResult())
        results.append(part_result)
        
        word_array = np.array(['word', 't_start', 't_stop', 'conf'])
        
        for sentence in results:
            # sometimes returns empty dictionary {'text': ''}, account for that
            if len(sentence) == 1:
                continue
            
            for obj in sentence['result']:
                word = custom_Word.Word(obj)
                word_array = np.vstack((word_array, word.to_numpy_array()))
                
        np.savetxt(script_csv_dst.get_root() + '/' + file_name + '.csv', word_array, fmt='%s', delimiter=',')