# Spartan Stuttering Lab

In [126]:
import openai 
openai.api_key = 

In [127]:
import librosa 
import numpy as np 
import soundfile as sf
from datetime import datetime 

class ssl:
    
    lab_name = 'Spartan Stuttering Lab'
    
    def __init__(self):
        # Initialize instance variables
        self.path = None
        self.sr = None
        self.y = None
        self.total_time = None
        self.step = None
        self.audio_chunks = []  # Initialized as an empty list
        self.chunk_paths = []

    def load(self, path, duration = None, sr = None):
        self.path = path
        self.sr = sr 
        self.duration = duration
        try:
            self.y, self.sr = librosa.load(self.path, sr = self.sr, duration = self.duration)
            print(f'\nload: File is loaded\n')
        except FileNotFoundError:
            print(f"\nError: The file {self.path} was not found.")
        
            
    def split_audio(self, contour = 2, buffer = 5):
        self.contour = contour
        self.buffer = buffer
        
        self.total_time = len(self.y)/self.sr
        self.step = self.sr * contour * 60 # multiply by 60, if the contour is in minutes
        
        for i in range(0,len(self.y),self.step):
            self.audio_chunks.append(self.y[i : i + self.step + (self.sr*self.buffer) ])  #add extra 5 seconds(buffer) for transcribe buffer
        print(f'split_audio: audio is sucessfully splitted\n')
        
    def save_audio_chunks(self, path_new = None):
        if path_new:
            self.path_new = path_new
        else:
            self.path_new = self.path.replace(self.path.split('/')[-1],'chunk_files - ' + self.path.split('/')[-1][:-4] +'/')
        
        try :
            os.makedirs(self.path_new)
        except FileExistsError:
            pass
        
        for i, x in enumerate(self.audio_chunks):
            self.chunk_name = f'{i}___{self.path.split('/')[-1]}'
            sf.write(self.path_new + self.chunk_name , x, self.sr)
            self.chunk_paths.append(self.path_new + self.chunk_name)
        print(f'save_audio_chunks : chunked audio files are successfully saved\n')
    
    def transcribe(self,audioArray):
        try:
            transcript = openai.Audio.transcribe(
                                                file = open(audioArray, "rb"),
                                                model = "whisper-1",
                                                response_format="text",
                                                language="en",
                                                timesteps=True,
                                                # user="messi"
                                                )
            return transcript
        except e as exception:
            print(f'Error transcribing {audioArray}: {e} \n' * 10)
            print(f'\n\n')
            return e
        
    def transcribe_batch(self):
        self.text_list = []
        
        for i, x in enumerate(self.chunk_paths):
            self.transcript = self.transcribe(x)
            self.text_list.append(self.transcript)
            print(f'                {i+1}/{len(self.chunk_paths)} is completed - Progress : {round((i+1)/len(self.chunk_paths),3)*100} % ')
        self.final_text = ''
        
        for i, y in enumerate(self.text_list):
            self.final_text += '\n\n' + f'{i* self.contour} - {i* self.contour + self.contour} mins ' + '\n\n ' + y.strip()
        print(f'transcribe_batch: batch is sucessfully transcribed \n')
    
    def save_output(self, path_save = None):
        if path_save is not None :
            if path_save[-4:] == '.txt':
                self.path_save = path_save
            else:
                print('save output : wrong path provided. A text file is expected')
                return 
        else:
            self.path_save = self.path.replace(self.path.split('/')[-1],self.path.split('/')[-1].split('.')[0]+'.txt')
        
        with open(self.path_save,'w') as f:
            f.write(self.final_text)
        print(f'save_output: the output is successfully saved in \n {self.path_save} \n')

    def run_all(self, path):
        self.path = path
        self.load(path, duration = None)
        self.split_audio(contour = 2)
        self.save_audio_chunks()
        self.transcribe_batch()
        self.save_output()
        
    pass

In [124]:
%time

path = 'EDITED 2.wav'

messi = ssl()

messi.load(path, duration = None)
messi.split_audio(contour = 2)
messi.save_audio_chunks()
messi.transcribe_batch()
messi.save_output()

CPU times: user 4 μs, sys: 12 μs, total: 16 μs
Wall time: 42 μs

load: File is loaded

split_audio: audio is sucessfully splitted

save_audio_chunks : chunked audio files are successfully saved

                1/1 is completed - Progress : 100.0 % 
transcribe_batch: batch is sucessfully transcribed 

save_output: the output is successfully saved in 
 /Users/sivaguganjayachandran/Documents/Spartan stuttering lab/103/240418_1454/240418_1454_EDITED 2.txt 



In [130]:
'''
import os 
path_1 = ''

dir_path = [x for x in os.listdir(path_1) if os.path.isdir(path_1 + '/' + x)]
path_list = []

for i in dir_path:
    for j in  os.listdir(path_1 + '/' + i):
        if j[-10:] == 'EDITED.wav':
            path_list.append(path_1 + '/' + i + '/' + j)

path_list = sorted(path_list)
path_list
'''

"\nimport os \npath_1 = ''\n\ndir_path = [x for x in os.listdir(path_1) if os.path.isdir(path_1 + '/' + x)]\npath_list = []\n\nfor i in dir_path:\n    for j in  os.listdir(path_1 + '/' + i):\n        if j[-10:] == 'EDITED.wav':\n            path_list.append(path_1 + '/' + i + '/' + j)\n\npath_list = sorted(path_list)\npath_list\n"