In [5]:
class Timestamp:
    def __init__(self,start = 0.0, end = 0.0, word='word', isInclude=False,feature=None, label=None):
        self.start = start
        self.end = end
        self.word = word
        self.isInclude = isInclude
        self.feature = feature
        self.label = label

In [6]:
"""
/*+----------------------------------------------------------------------
 ||
 ||  Class Timecode
 ||
 ||         Author:  Zhong Ming Tan
 ||
 ||        Purpose:  To make a robust timecode object and improve the existing python timecode class
 ||
 ||  Inherits From:  None
 ||
 ||     Interfaces:  None
 ||
 |+-----------------------------------------------------------------------
 ||
 ||      Constants:  None
 ||
 |+-----------------------------------------------------------------------
 ||
 ||   Constructors:  None
 ||
 ||  Class Methods:  set_fps(float), set_by_seconds(int), set_by_frames(int), set_by_timecode(str),
 ||                  ceil_frames(), floor_frames(),round_frame() ,get_fps():float, get_seconds():int,
 ||                  get_timecode():string, get_time():array, get_frames():int
 ||
 ||  Inst. Methods:  [List the names, arguments, and return types of all
 ||                   public instance methods.]
 ||
 ++-----------------------------------------------------------------------*/
"""
class Timecode:

    def __init__ (self, fps=30,hours=0,minutes=0,seconds=0,frames=0):
        self.framerate = float(fps)
        self.hours = int(hours)
        self.minutes = int(minutes)
        self.seconds = int(seconds)
        self.frames = int(frames)

    def set_fps(self,fps):
        self.framerate      =   float(fps)

    def set_by_seconds(self, input):
        self.set_by_frames(round(int(input)*self.framerate))
    
    def set_by_frames(self,frames):
        total_seconds       =   frames/self.framerate
        self.hours          =   int(total_seconds/3600)
        self.minutes        =   int(total_seconds/60%60)
        self.seconds        =   int(total_seconds%60)
        self.frames         =   round((total_seconds-int(total_seconds))*self.framerate)

    def set_by_timecode(self,timecode):
        splittedTimecode    =   timecode.split(':')
        self.hours          =   int(splittedTimecode[0])
        self.minutes        =   int(splittedTimecode[1])
        self.seconds        =   int(splittedTimecode[2])
        self.frames         =   int(splittedTimecode[3])

    def get_fps(self):
        return self.framerate

    def get_seconds(self):
        total_seconds = ((self.hours*3600) + (self.minutes*60) + (self.seconds))
        total_seconds = total_seconds + (self.frames/self.framerate)
        return total_seconds

    def get_time(self):
        return [self.hours,self.minutes,self.seconds,self.frames]
    
    def get_timecode(self):
        return '{h:02d}:{m:02d}:{s:02d}:{f:02d}'.format(h=self.hours,m=self.minutes,s=self.seconds,f=self.frames)
    
    def get_timecode_ffmpeg(self):
        return '{h:02d}:{m:02d}:{s:f}' \
        .format(h=self.hours,
                m=self.minutes,
                s=( float( self.seconds + (self.frames/self.framerate) ) )
               )        
    
    def get_frames(self):
        total_seconds = int((self.hours*3600) + (self.minutes*60) + (self.seconds))
        total_frames = int((total_seconds * self.framerate) + self.frames)
        return total_frames
    
    def ceil_frames(self):
        if self.frames > 0:
            ceil_seconds = self.get_seconds() + 1
            self.set_by_seconds(ceil_seconds)

    def floor_frames(self):
        if self.frames > 0:
            self.frames = 0
    
    def round_frames(self):
        threshold = self.framerate / 2
        if self.frames > threshold:
            self.ceil_seconds()


In [4]:
# Start Auto Edit Library

# os system processing library
import shutil
import json
from glob import glob
from shutil import move, rmtree, copyfile

# mathematic operation
import time
import math
import numpy as np
import pandas as pd

# audio related
import librosa
import librosa.display

# display libary
import tqdm
from tqdm.notebook import tqdm

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)


class FeatureExtraction:
    def __init__(self, n_mels=128):
        self.n_mels = n_mels
        self.y = None
        self.sr = 11025
        self.S = None
        self.log_S = None
        self.mfcc = None
        self.delta_mfcc = None
        self.delta2_mfcc = None
        self.M = None
        self.rmse = None
        self.foldername = None
        self.filename=None
    
    def loadFile(self, foldernname):
        self.foldernname=foldernname
        self.y, self.sr = librosa.load(foldernname)
    
    def load_y_sr(self, y, sr):
        self.y = y
        self.sr = sr
    
    def melspectrogram(self):
        self.S = librosa.feature.melspectrogram(self.y, sr=self.sr, n_mels=self.n_mels)
        self.log_S = librosa.amplitude_to_db(self.S)
    
    def plotmelspectrogram(self, save=True):
        fig = plt.figure(figsize=(12, 4))
        librosa.display.specshow(self.log_S, sr=self.sr, x_axis='time', y_axis='mel')
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.colorbar(format='%+02.0f dB')
        plt.tight_layout()
        if not os.path.exists('mel'):
            os.mkdir('mel')
        if save:
            fig.savefig(f'./mel/{self.filename}-mel.png', dpi=fig.dpi)
            print(f'Saved to ./mel/{self.filename}-mel.png')
            plt.close('all')

    def extractmfcc(self, n_mfcc=13):
        self.mfcc = librosa.feature.mfcc(S=self.log_S, n_mfcc=n_mfcc)
        self.delta_mfcc = librosa.feature.delta(self.mfcc,mode='nearest')
        self.delta2_mfcc = librosa.feature.delta(self.mfcc, order=2,mode='nearest')
        self.M = np.vstack([self.mfcc, self.delta_mfcc, self.delta2_mfcc])
    
    def plotmfcc(self,save=False):
        fig = plt.figure(figsize=(12, 6))
        plt.subplot(3, 1, 1)
        librosa.display.specshow(self.mfcc)
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.ylabel('MFCC')
        plt.colorbar()
        
        plt.subplot(3, 1, 2)
        librosa.display.specshow(self.delta_mfcc)
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.ylabel('MFCC-$\Delta$')
        plt.colorbar()
        
        plt.subplot(3, 1, 3)
        librosa.display.specshow(self.delta2_mfcc, sr=self.sr, x_axis='time')
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.ylabel('MFCC-$\Delta^2$')
        plt.colorbar()
        
        plt.tight_layout()
        if not os.path.exists('mfcc'):
            os.mkdir('mfcc')
        if save:
            fig.savefig(f'./mfcc/{self.filename}-mfcc.png', dpi=fig.dpi)
            print(f'Saved to ./mfcc/{self.filename}-mfcc.png')
            plt.close('all')

    def extractrmse(self):
        self.rmse = librosa.feature.rms(y=self.y)

ModuleNotFoundError: No module named 'librosa'

In [None]:
# Start Auto Edit Library

# os system processing library
import shutil
import json
from glob import glob
from shutil import move, rmtree, copyfile

# mathematic operation
import time
import math
import numpy as np
import pandas as pd

# audio related
import librosa

# display libary
import tqdm
from tqdm.notebook import tqdm

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

# developed library
import importlib

import module.FeatureExtraction

importlib.reload(module.FeatureExtraction)

from module.FeatureExtraction import FeatureExtraction

class Dataset:
    def __init__(self,):
        self.X = np.empty(shape=(0,80))
        self.Y = np.empty(shape=(0,2))
        self.DATASET = None
        self.PATH_ARRAY = []
        self.failed_file = []
        self.unexpected_label = []
        self.processed_counter = 0
        print("Object created!")

    def create_dataset(self,dataset_path,output_path):
        self.DATASET_PATH = dataset_path
        self.OUTPUT_PATH = output_path
        self.__process_dataset()
        self.__write_to_file()
        
    def get_feature_by_audio(self,y,sr):
          #exctract mfcc
        try:
            features = FeatureExtraction()
            features.load_y_sr(y,sr)
            features.melspectrogram()
            features.extractmfcc()
            features.extractrmse()
        except ValueError:
            self.failed_file.append(ValueError)
            print(ValueError)

        feature_vector = []

        for feature in features.mfcc:
            feature_vector.append(np.mean(feature))
            feature_vector.append(np.var(feature))

        for feature in features.delta_mfcc:
            feature_vector.append(np.mean(feature))
            feature_vector.append(np.var(feature))

        for feature in features.delta2_mfcc:
            feature_vector.append(np.mean(feature))
            feature_vector.append(np.var(feature))

        feature_vector.append(np.mean(features.rmse))
        feature_vector.append(np.var(features.rmse))

        return feature_vector
        
    def get_feature_by_file(self,audio):
        print("Extacting feature:", audio)
        try:
            features = FeatureExtraction()
            features.loadFile(audio)
            features.melspectrogram()
            features.extractmfcc()
            features.extractrmse()
        except ValueError:
            self.failed_file.apppend(file_path)

        feature_vector = []

        for feature in features.mfcc:
            feature_vector.append(np.mean(feature))
            feature_vector.append(np.var(feature))

        for feature in features.delta_mfcc:
            feature_vector.append(np.mean(feature))
            feature_vector.append(np.var(feature))

        for feature in features.delta2_mfcc:
            feature_vector.append(np.mean(feature))
            feature_vector.append(np.var(feature))

        feature_vector.append(np.mean(features.rmse))
        feature_vector.append(np.var(features.rmse))

        return feature_vector
        
    def __process_dataset(self):
        starttime = time.time()
        for i , (dirpath, dirnames, filenames) in enumerate(os.walk(self.DATASET_PATH)):
              if dirpath is not self.DATASET_PATH:
                label = dirpath.split("/")[-1]
                # print(label)
                print("Processing:", label)
                for file in filenames:
                  #load audio
                  file_path = os.path.join(dirpath,file)

                  # print(file_path)

                  #exctract mfcc
                try:
                    features = FeatureExtraction()
                    features.loadFile(file_path)
                    features.melspectrogram()
                    features.extractmfcc()
                    features.extractrmse()
                except ValueError:
                    self.failed_file.apppend(file_path)

                feature_vector = []

                for feature in features.mfcc:
                    feature_vector.append(np.mean(feature))
                    feature_vector.append(np.var(feature))

                for feature in features.delta_mfcc:
                    feature_vector.append(np.mean(feature))
                    feature_vector.append(np.var(feature))

                for feature in features.delta2_mfcc:
                    feature_vector.append(np.mean(feature))
                    feature_vector.append(np.var(feature))

                feature_vector.append(np.mean(features.rmse))
                feature_vector.append(np.var(features.rmse))

                self.X = np.vstack((self.X,[feature_vector]))
                if label == 'success':
                    self.Y = np.vstack((self.Y,[0,1]))
                    self.processed_counter += 1
                    print("Done ", self.processed_counter, file_path,' label=',label)
                elif label == 'stuttered':
                    self.Y = np.vstack((self.Y,[1,0]))
                    self.processed_counter += 1
                    print("Done ", self.processed_counter, file_path,' label=',label)
                else:
                    self.unexpected_label.append(file_path)
                    print("Fail ", self.processed_counter, file_path,' label=',label)

        for fail in self.unexpected_label:
            print("unexpected_label ", file_path, " !")

        for fail in self.failed_file:
            print("fail ", file_path, " !")

        # print("finished all!")
        print('Time taken = {} seconds'.format(time.time() - starttime))    
        self.DATASET = np.hstack((self.X,self.Y))

    def load_dataset(self,dataset_path):
        self.DATASET_PATH = dataset_path

        if os.path.exists(self.DATASET_PATH):
            print("Dataset exist!")
        else:
            print('Not found ',self.DATASET_PATH)
            return

        self.FILE_NAME, self.FILE_TYPE = os.path.splitext(self.DATASET_PATH)

        print("Loading ", self.DATASET_PATH)
        if self.FILE_TYPE == '.csv':
            print('Detect as .csv file')
            self.DATA = np.genfromtxt(self.DATASET_PATH, delimiter=',')
        elif self.FILE_TYPE == '.gz':
            print('Detect as .gz file')
            self.DATA = np.loadtxt(self.DATASET_PATH)
        else:
            print("Only support .gz and .csv file")
            return False

        self.X = self.DATA[:, 0:80]
        self.Y = self.DATA[:, 80:]

    def convert_to_csv(self,output_file):
        if os.path.exists(output_file):
            os.remove(output_file)
        np.savetxt(output_file,self.DATA, delimiter=',')
        print('Converted to',output_file)      

    def __write_to_file(self):
        if os.path.exists(self.OUTPUT_PATH):
            os.remove(self.OUTPUT_PATH)

        np.savetxt(self.OUTPUT_PATH, self.DATASET)
        print('Saved to',self.OUTPUT_PATH)  

    def get_x(self):
        return self.X

    def get_y(self):
        return self.Y

In [None]:

import vosk
import json
import pandas as pd
import numpy as np
import math

class VoskProcess:
    def __init__(self,vosk_path='vosk-model-small-en-us-0.15'):
        print('Loading vosk...')
        vosk.SetLogLevel(-1)
        self.VOSK_PATH = vosk_path
        self.vosk_model = vosk.Model(self.VOSK_PATH)
        print('Loaded vosk!')

    def extract_words(self,res):
        jres = json.loads(res)
        if not 'result' in jres:
            return []
        words = jres['result']
        return words

    def transcribe_words(self,recognizer, bytes):
        results = []

        chunk_size = 4000
        for chunk_no in range(math.ceil(len(bytes)/chunk_size)):
            start = chunk_no*chunk_size
            end = min(len(bytes), (chunk_no+1)*chunk_size)
            data = bytes[start:end]

            if recognizer.AcceptWaveform(data):
                words = self.extract_words(recognizer.Result())
                results += words
        results += self.extract_words(recognizer.FinalResult())

        return results                

    def transcribe(self,audioData):
        print('Creating recognizer ...')
        self.recognizer = vosk.KaldiRecognizer(self.vosk_model, 16000)
        print('Created recognizer')
        self.audioData = audioData
        int16 = np.int16(self.audioData * 32768).tobytes()
        # vosk_path = self.VOSK_PATH
        # vosk_model = vosk.Model(vosk_path) 
        print('Transcribing...')
        res = self.transcribe_words(self.recognizer, int16)
        df = pd.DataFrame.from_records(res)
        df = df.sort_values('start')
        print('Completed transcribe')
        self.df = df
        return self.df

In [None]:
# Start Auto Edit Library

# os system processing library
import subprocess
import shutil
import json
from glob import glob
from shutil import move, rmtree, copyfile
import os

# mathematic operation
import time
import math
import numpy as np
import pandas as pd

# audio related
import librosa
import soundfile as sf
import vosk

# display libary
from tqdm import tqdm
from tqdm.notebook import tqdm

# machine learning libary
import tensorflow as tf

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

# developed library
import importlib

import module.Timecode
import module.FeatureExtraction
import module.Dataset
import module.Timestamp

importlib.reload(module.Timecode)
importlib.reload(module.FeatureExtraction)
importlib.reload(module.Dataset)
importlib.reload(module.Timestamp)

from module.Timecode import Timecode
from module.FeatureExtraction import FeatureExtraction
from module.Dataset import Dataset
from module.Timestamp import Timestamp


class AutoEdit:
    def __init__(self, file=None, ba='160000', ac='1', ar='16000',output_format='.wav', fps = 30.0, 
                 st = 0.04, fm = 4, lt = 2.00, verbose = False, isRender = True,
                 log=False, mono = True, 
                 model='Model/mymodel_78_18.h5'):
        #parameter for ffmpeg to convert the file
        self.MODEL_PATH = model
        self.INPUT_FILE = file
        if(file != None):
            self.FILENAME = file.split('.')[0]
        else:
            self.FILENAME = None
        self.AUDIO_OUTPUT_FORMAT = output_format
        if(file != None):
            self.AUDIO_OUTPUT = f'{self.FILENAME}{self.AUDIO_OUTPUT_FORMAT}'
        else:
            self.AUDIO_OUTPUT = None
        
        self.BITRATE_AUDIO = ba
        self.AUDIO_CHANEL = ac
        self.AUDIO_RATE = ar
        self.FRAME_RATE = fps
        
        self.FRAME_MARGIN = fm
        self.SILENT_THRESHOLD = st
        self.LOUDNESS_THRESHOLD = lt
        
        self.VERBOSE = verbose
        
        self.audioData = None
        self.sampleRate = None
        
        self.audioSampleCount = None
        self.maxAudioVolume = None
        self.samplesPerFrame = None
        self.audioFrameCount = None
        self.hasLoudAudio = None
        
        self.chunks = None
        self.shouldIncludeFrame = None
        self.timecodeList = None
        self.chunks_path = 'chunks.txt'
        self.log = log
        self.isMono = mono
        self.VOSK_PATH = 'vosk-model-small-en-us-0.15'
        self.isRender = isRender
        # self.VOSK_PATH = 'vosk-model-en-us-aspire-0.2'
            
    def extract_audio(self):
        if self.INPUT_FILE == None:
            print("No input file!")
            
        cmd = ['ffmpeg', '-y' ,'-i',self.INPUT_FILE,'-acodec','pcm_s16le' ,'-b:a', self.BITRATE_AUDIO, '-ac', self.AUDIO_CHANEL, 
               '-ar', self.AUDIO_RATE, '-vn', f'{self.AUDIO_OUTPUT}']
        #ffmpeg -i "%%a" -acodec pcm_s16le -ac 1 -ar 16000 -af lowpass=3000,highpass=200 "converted\%%~na.wav
        # ffmpeg -y -i SBLQ.mp4 -acodec pcm_s16le -b:a 16k -ac 1 -ar 16000 -vn output.wav

        # ffmpeg -y -i SBLQ.mp4 -acodec libmp3lame -b:a 16k -ac 1 -ar 16000 -vn output.mp3
        if(not self.VERBOSE):
            cmd.extend(['-nostats', '-loglevel', '0'])
        subprocess.call(cmd)
        
    def get_max_volume(self,s):
        maxv = float(np.max(s))
        minv = float(np.min(s))
        return max(maxv, -minv)

    def load_audio(self):
        # self.sampleRate,self.audioData = wavfile.read(f'{self.AUDIO_OUTPUT}')
        self.audioData,self.sampleRate = librosa.load(f'{self.AUDIO_OUTPUT}',
        mono = self.isMono,sr=self.sampleRate)

        self.audioSampleCount = self.audioData.shape[0]
        self.maxAudioVolume = self.get_max_volume(self.audioData)
        self.samplesPerFrame = self.sampleRate / self.FRAME_RATE
        self.audioFrameCount = int(math.ceil(self.audioSampleCount / self.samplesPerFrame))
    
    def get_shape(self):
        return self.audioData.shape
    
    def calc_has_loud_audio(self):
        self.hasLoudAudio = np.zeros((self.audioFrameCount))
        
        for i in range(self.audioFrameCount):
            start = int(i * self.samplesPerFrame)
            end = min( int( (i+1) * self.samplesPerFrame ), self.audioSampleCount)
            audiochunks = self.audioData[start:end]
            maxchunksVolume = self.get_max_volume(audiochunks) / self.maxAudioVolume
            
            if(maxchunksVolume >= self.LOUDNESS_THRESHOLD):
                self.hasLoudAudio[i] = 2
            elif(maxchunksVolume >= self.SILENT_THRESHOLD):
                self.hasLoudAudio[i] = 1
    
    def calc_should_include_frame(self):
        self.shouldIncludeFrame = np.zeros((self.audioFrameCount))
        self.chunks = [[0,0,0]]
        
        for i in range(self.audioFrameCount):
            start = int(max(0, i-self.FRAME_MARGIN))
            end = int(min(self.audioFrameCount, i+1+self.FRAME_MARGIN))
            self.shouldIncludeFrame[i] = min(1,np.max(self.hasLoudAudio[start:end]))

            if(i >= 1 and self.shouldIncludeFrame[i] != self.shouldIncludeFrame[i-1]):
                self.chunks.append([self.chunks[-1][1], i, self.shouldIncludeFrame[i-1]])
        self.chunks.append([self.chunks[-1][1], self.audioFrameCount, self.shouldIncludeFrame[i-1]])
        self.chunks = self.chunks[1:]
        
    def calc_timecode(self):
        self.timecodeList = []
        
        for chunk in self.chunks:
            startTime = Timecode(fps=self.FRAME_RATE)
            endTime = Timecode(fps=self.FRAME_RATE)
            
            startTime.set_by_frames(chunk[0])
            endTime.set_by_frames(chunk[1])
            isInclude = chunk[2]
            self.timecodeList.append([startTime,endTime,isInclude])
            
    def execute(self):
        print('Executing command...')
        command = 'bash ./run.sh'
        if os.path.exists('run.sh'):
            # if self.log:
            #      command += ' > log.txt'    
            output = subprocess.call(command,shell=True)
        if self.VERBOSE:
            print("Complex filter command success") if output == 0 else print("Complex filter command failed!")
      
            
            
    def write_to_bat(self,command):
        if(self.isRender == False):
            return
        if os.path.exists('run.sh'):
            os.remove(f'run.sh')
        file1 = open("run.sh","w")
        file1.write(command)
        file1.close()
        filename = 'run.sh'
        # if self.log:
        #     filename += ' > log.txt'
        return filename
    
    def produce_concat_file(self):
        if os.path.exists(self.chunks_path):
            os.remove(self.chunks_path)
            
        with open(self.chunks_path, 'w') as f:
            for index in range(len(self.timecodeList)):
                isInclude = float(self.timecodeList[index][2])
                if isInclude < 1:
                    continue;
                # startTime = self.timecodeList[index][0].get_timecode_ffmpeg()
                # endTime = self.timecodeList[index][1].get_timecode_ffmpeg()
                startTime = self.timecodeList[index][0].get_seconds()
                endTime = self.timecodeList[index][1].get_seconds()
                f.write(f'file {self.INPUT_FILE}\ninpoint {startTime}\noutpoint {endTime}\n')
    
    def concat_way(self):
        concat = ['ffmpeg','-y','-f','concat','-safe','0','-i', f'{self.chunks_path}',
                 '-async','1','-framerate', f'{self.FRAME_RATE}','-b:a', f'{self.BITRATE_AUDIO}',
                 '-c:v', 'copy', '-ar', f'{self.AUDIO_RATE}', '-ac', f'{self.AUDIO_CHANEL}',
                 '-c:a','aac','-movflags','+faststart',f'{self.FILENAME}_CONCATED.mp4']
        subprocess.call(concat)
        
    def select_filter(self):
        
        between = []
        counter = 0
        for i in self.timecodeList:
            if i[2] > 0:
#                 print(f'{self.INPUT_FILE},{i[0].get_seconds()},{i[1].get_seconds()}')
                between.append(f'between(t,{i[0].get_seconds()},{i[1].get_seconds()})') 
        
        betweens = '+'.join(between)
        slt = '\"select=\'' + betweens + '\'' + ',setpts=N/FRAME_RATE/TB\"'
        aslt = '\"aselect=\'' + betweens + '\'' + ',asetpts=N/SR/TB\"'
        
        sltFilter = ['ffmpeg','-y','-i',f'{self.INPUT_FILE}', '-vf', 
                     f'{slt}','-af', f'{aslt}',
                     f'{self.FILENAME}_FILTERED.mp4']
        
        total_string = ' '.join(sltFilter)
#         if self.log:
#             total_string += " > log.txt 2>&1";
        bat_path = self.write_to_bat(total_string)
        # output = subprocess.call(bat_path,shell=True)
        self.execute()
        if self.VERBOSE:
            print("Select filter command success") if output == 0 else print("Select filter command failed!")
            
    def remove_silence(self):
        trim = []
        duration_list = []
        number_of_segment = 0
        prev = 0
        current = 0

        # with out xfade
        for i in self.timecodeList:
            if i[2] > 0:
                duration_list.append(i[0].get_seconds()-i[1].get_seconds())
                trim.append(
                    f'[0:v]trim=start={i[0].get_seconds()}:end={i[1].get_seconds()},setpts=PTS-STARTPTS[v{number_of_segment}]')
                trim.append(
                    f'[0:a]atrim=start={i[0].get_seconds()}:end={i[1].get_seconds()},asetpts=PTS-STARTPTS[a{number_of_segment}]')
                number_of_segment += 1

                
        filter = ';'.join(trim)
        filter = filter + ";"

        # Normal cut feature
        for i in range(number_of_segment):
            filter += f' [v{i}] [a{i}]'

        # Start to generate ending of command
        filter += f'concat=n={number_of_segment}:v=1:a=1 [out]'
        filter = '"' + filter + '"'
        filter = f'ffmpeg -y -i {self.INPUT_FILE} -filter_complex ' + filter
        filter = filter + f' -map "[out]" {self.FILENAME}_SILENCE.mp4'
            
        bat_path = self.write_to_bat(filter)     
    

    def fliter_complex(self):
        trim = []
        duration_list = []
        number_of_segment = 0
        prev = 0
        current = 0

        # with out xfade
        for i in self.timecodeList:
            if i[2] > 0:
                duration_list.append(i[0].get_seconds()-i[1].get_seconds())
                trim.append(
                    f'[0:v]trim=start={i[0].get_seconds()}:end={i[1].get_seconds()},setpts=PTS-STARTPTS[v{number_of_segment}]')
                trim.append(
                    f'[0:a]atrim=start={i[0].get_seconds()}:end={i[1].get_seconds()},asetpts=PTS-STARTPTS[a{number_of_segment}]')
                number_of_segment += 1
                
        filter = ';'.join(trim)
        filter = filter + ";"

        # Normal cut feature
        for i in range(number_of_segment):
            filter += f' [v{i}] [a{i}]'

        # Start to generate ending of command
        filter += f'concat=n={number_of_segment}:v=1:a=1 [out]'
        filter = '"' + filter + '"'
        filter = f'ffmpeg -y -i {self.INPUT_FILE} -filter_complex ' + filter
        filter = filter + f' -map "[out]" {self.FILENAME}_COMPLEX.mp4'

        bat_path = self.write_to_bat(filter)
        self.execute()
        output = 1
        if self.VERBOSE:
            print("Complex filter command success") if output == 0 else print("Complex filter command failed!")
    
    
    def post_process(self):
        if os.path.exists(f'{self.chunks_path}'):
            os.remove(f'{self.chunks_path}')
            if self.VERBOSE:
                print(f"Removed {self.chunks_path}")
                
        if os.path.exists(f'{self.AUDIO_OUTPUT}'):
            os.remove(f'{self.AUDIO_OUTPUT}')
            if self.VERBOSE:
                print(f"Removed {self.AUDIO_OUTPUT}")
        return f'{self.FILENAME}_COMPLEX.mp4'
       
        
    def export_complex(self):
        self.pbar = tqdm(total=7)
        print("Start processing...")
        self.extract_audio()
        self.update_mypbar()
        self.load_audio()
        self.update_mypbar()
        self.calc_has_loud_audio()
        self.update_mypbar()
        self.calc_should_include_frame()
        self.update_mypbar()
        self.calc_timecode()
        self.update_mypbar()
        
        print(f'Exporting {self.FILENAME}_COMPLEX.mp4 ...')
        self.fliter_complex()
        self.update_mypbar()
        print(f'Exported {self.FILENAME}_COMPLEX.mp4 successfully!')
        
        self.post_process()
        self.update_mypbar()
        self.pbar.close()
        
    def export_fast(self):
        try:
            self.extract_audio()
            self.load_audio()
            self.calc_has_loud_audio()
            self.calc_should_include_frame()
            self.calc_timecode()
            self.produce_concat_file()
            self.concat_way()
            self.post_process()
            if(self.VERBOSE):
                print(f'Exported {self.FILENAME}_CONCATED.mp4 successfully!')
        except:
            print('Failed to export fast!')

        return f'{self.FILENAME}_CONCATED.mp4'
            
    def update_mypbar(self):
        self.pbar.update(1)
        time.sleep(0.01)
        self.pbar.refresh()
            
    def export_good(self):
        self.pbar = tqdm(total=7)
        try:
            print("Start processing...")
            self.extract_audio()
            self.update_mypbar()

            self.load_audio()
            self.update_mypbar()

            self.calc_has_loud_audio()
            self.update_mypbar()
            self.calc_should_include_frame()
            self.update_mypbar()
            self.calc_timecode()
            self.update_mypbar()
            
            print(f'Exporting {self.FILENAME}_FILTERED.mp4 ...')
            self.select_filter()
            self.update_mypbar()
            print(f'Exported {self.FILENAME}_FILTERED.mp4 successfully!')
  
            self.post_process()
            self.update_mypbar()
            self.pbar.close()
        except:
            print(f'Failed to export {self.FILENAME}_FILTERED.mp4 !')
        return f'{self.FILENAME}_FILTERED.mp4'
            
    def extract_words(self,res):
        jres = json.loads(res)
        if not 'result' in jres:
            return []
        words = jres['result']
        return words

    def transcribe_words(self,recognizer, bytes):
        results = []

        chunk_size = 4000
        for chunk_no in range(math.ceil(len(bytes)/chunk_size)):
            start = chunk_no*chunk_size
            end = min(len(bytes), (chunk_no+1)*chunk_size)
            data = bytes[start:end]

            if recognizer.AcceptWaveform(data):
                words = self.extract_words(recognizer.Result())
                results += words
        results += self.extract_words(recognizer.FinalResult())

        return results                

    def vosk_process(self):
        print('Loading vosk...')
        vosk.SetLogLevel(-1)
        int16 = np.int16(self.audioData * 32768).tobytes()
        vosk_path = self.VOSK_PATH
        vosk_model = vosk.Model(vosk_path)
        recognizer = vosk.KaldiRecognizer(vosk_model, 16000)
        print('Transcribing...')
        res = self.transcribe_words(recognizer, int16)
        df = pd.DataFrame.from_records(res)
        df = df.sort_values('start')
        print('Completed transcribe')
        self.df = df
        
        
    def feature_process(self):
        # Process by using vosk
        self.audioData
        df = self.df
        model = tf.keras.models.load_model(self.MODEL_PATH)
        feature_file = f'{self.FILENAME}_feature.csv'
        
        sampleRate = self.sampleRate
        fail_list = []
        time_margin = int( (  (1/self.FRAME_RATE) *self.FRAME_MARGIN ) )
        index_margin = int( (  (1/self.FRAME_RATE) *self.FRAME_MARGIN ) *self.sampleRate )

        if(os.path.exists(feature_file)):
            os.remove(feature_file)

        if(not os.path.exists(feature_file)):
            print("Extracting feature...")
            features = np.empty(shape=(0,80))
            ds = Dataset()
            for i in tqdm(df.index[:]): 
                start_index = max(0, int(  df['start'][i] * self.sampleRate))
                end_index = min( int( (df['end'][i]) * self.sampleRate), self.audioSampleCount)
                fea = ds.get_feature_by_audio(self.audioData[start_index:end_index],11025)
                features = np.vstack((features,[fea]))
            print(f'Saved features to {feature_file}')
            np.savetxt(feature_file, features, delimiter=',')


        print(f'Load feature from {feature_file}')
        features = np.loadtxt(feature_file,delimiter=',')

        print('Predicting...')
        predictions = model.predict(x=features, batch_size=84,verbose=0)
        print("Finish predict!")

        self.predictions = predictions
        include_list = []
        for i in tqdm(df.index[:]):
            isInclude = True
            predict = np.round(predictions[i])
            word = df['word'][i]
            if(word == "i'm" or word == 'um' or word =='m' or word=='ah'or word=='huh'or word=='hm'):
                if(predict == 1):
                    isInclude = False
            if(isInclude):
                start = df['start'][i]
                end = df['end'][i]
                ts = Timestamp(start,end,word=word,label=predict)
                include_list.append(ts)
                
        self.include_list = include_list        
        render_list = []
        counter = 0
        start = include_list[0].start
        end = include_list[0].end
        word = ""
        for i,ts in tqdm(enumerate(include_list)):
            current_start = ts.start
            current_end = ts.end
            prev_start = include_list[i-1].start
            prev_end = include_list[i-1].end
            if(i >= 1 and current_start != prev_end):
                segment = Timestamp(start,prev_end, word=word)
                word = ''
                start = current_start
                render_list.append(segment)
                counter = counter + 1
            word = word + ts.word + " "    

        render_list.append(Timestamp(include_list[-1].start,include_list[-1].end,include_list[-1].word))
        self.render_list = render_list           


    def generate_complex_filter(self):
        render_list = self.render_list
        print('Generating complex filter...')
        trim = []
        duration_list = []
        number_of_segment = 0
        prev = 0
        current = 0
        # with out xfade
        for ts in render_list:
            duration_list.append(ts.end-ts.start)
            trim.append(
                f'[0:v]trim=start={ts.start}:end={ts.end},setpts=PTS-STARTPTS[v{number_of_segment}]')
            trim.append(
                f'[0:a]atrim=start={ts.start}:end={ts.end},asetpts=PTS-STARTPTS[a{number_of_segment}]')
            number_of_segment += 1

        filter = ';'.join(trim)
        filter = filter + ";"

        # Normal cut feature
        for i in range(number_of_segment):
            filter += f' [v{i}] [a{i}]'


        # Start to generate ending of command
        filter += f'concat=n={number_of_segment}:v=1:a=1 [out]'
        filter = '"' + filter + '"'
        
        if(self.isRender):
            filter = f'ffmpeg -y -i {self.INPUT_FILE} -filter_complex ' + filter
        else:
            filter = f'-filter_complex ' + filter

        filter = filter + f' -map "[out]"'

        if(self.isRender):
            filter = filter + f' {self.FILENAME}_COMPLEX.mp4'
            bat_path = self.write_to_bat(filter)
            print('Complete complex filter...')
            self.filter = filter
        else:
            self.filter = filter
        
        
