## Working page leafthrough

In [None]:
import gradio as gr

def paginate(page, max_page, page_change):
    new_page = page + page_change
    new_page = max(1, new_page)  # Ensures page is not less than 1
    new_page = min(new_page, max_page)  # Ensures page does not exceed max_page
    return new_page

def paginate_go(page, max_page):
    try:
        page = int(page)
    except ValueError:
        print(f'Invalid page number: {page}')
        return None
    return paginate(page, max_page, 0)
def create_gradio_app():
    with gr.Blocks() as app:
        with gr.Row():
            page_input = gr.Number(label="Page Number", value=1)
            max_page = gr.Number(label="Max Page", value=10)  # Assuming max 10 pages for testing
            go_button = gr.Button("Go to Page")
            next_button = gr.Button("Next")
            prev_button = gr.Button("Previous")
            current_page_label = gr.Label("Current Page: 1")

        def update_page_label(page):
            current_page_label.update(f"Current Page: {page}")

        go_button.click(fn=paginate_go, inputs=[page_input, max_page], outputs=page_input)
        next_button.click(fn=lambda x, y: paginate(x, y, 1), inputs=[page_input, max_page], outputs=page_input)
        prev_button.click(fn=lambda x, y: paginate(x, y, -1), inputs=[page_input, max_page], outputs=page_input)
        page_input.change(update_page_label)

    return app

app = create_gradio_app()
app.launch()

## Working going through images across different pages

In [None]:
import gradio as gr
import math
import os

IMAGES_TO_SHOW = 1  # Set to 1 since we're showing one audio file per page
loaded_audios = []  # Global variable to store the loaded audios

def paginate(page, max_page, page_change):
    new_page = page + page_change
    new_page = max(1, new_page)
    new_page = min(new_page, max_page)
    return new_page

def get_audio_for_page(all_audios, page):
    if 1 <= page <= len(all_audios):
        return all_audios[page - 1]
    return None  # Return None if the page is out of range

def update_audio_display(page):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    displayed_audio = get_audio_for_page(loaded_audios, page)
    audio_name = os.path.basename(displayed_audio) if displayed_audio else ""
    current_page_label_text = f"Current Page: {page}/{max_page}"
    return displayed_audio, audio_name, page, current_page_label_text

def handle_pagination(page, delta):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    new_page = paginate(page, max_page, delta)
    return update_audio_display(new_page)

def handle_audio_load(audios):
    global loaded_audios
    loaded_audios = audios
    return update_audio_display(1)  # Display first audio

def create_gradio_app():
    with gr.Blocks() as app:
        with gr.Row():
            audio_loader = gr.File(label="Load Audio Files", file_count='multiple')
            audio_player = gr.Audio(label="Audio Player")
            audio_name_box = gr.Textbox(label="Audio File Name", interactive=True)

        with gr.Row():
            page_input = gr.Number(label="Page Number", value=1, visible=True)
            current_page_label = gr.Label("Current Page: 1/X")
            go_button = gr.Button("Go to Page")
            prev_button = gr.Button("Previous")
            next_button = gr.Button("Next")

        audio_loader.change(fn=handle_audio_load, inputs=[audio_loader], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        next_button.click(fn=lambda page: handle_pagination(page, 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        prev_button.click(fn=lambda page: handle_pagination(page, -1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        go_button.click(fn=lambda page: update_audio_display(page), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return app

app = create_gradio_app()
app.launch()


## Display audio files per page

In [None]:
import gradio as gr
import math
import os

IMAGES_TO_SHOW = 1  # Set to 1 since we're showing one audio file per page
loaded_audios = []  # Global variable to store the loaded audios

def paginate(page, max_page, page_change):
    new_page = page + page_change
    new_page = max(1, new_page)
    new_page = min(new_page, max_page)
    return new_page

def get_audio_for_page(all_audios, page):
    if 1 <= page <= len(all_audios):
        return all_audios[page - 1]
    return None  # Return None if the page is out of range

def update_audio_display(page):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    displayed_audio = get_audio_for_page(loaded_audios, page)
    audio_name = os.path.basename(displayed_audio) if displayed_audio else ""
    current_page_label_text = f"Current Page: {page}/{max_page}"
    return displayed_audio, audio_name, page, current_page_label_text

def handle_pagination(page, delta):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    new_page = paginate(page, max_page, delta)
    return update_audio_display(new_page)

def handle_audio_load(audios):
    global loaded_audios
    loaded_audios = audios
    return update_audio_display(1)  # Display first audio

def create_gradio_app():
    with gr.Blocks() as app:
        with gr.Row():
            audio_loader = gr.File(label="Load Audio Files", file_count='multiple')
            audio_player = gr.Audio(label="Audio Player")
            audio_name_box = gr.Textbox(label="Audio File Name", interactive=True)

        with gr.Row():
            page_input = gr.Number(label="Page Number", value=1, visible=True)
            current_page_label = gr.Label("Current Page: 1/X")
            go_button = gr.Button("Go to Page")
            prev_button = gr.Button("Previous")
            next_button = gr.Button("Next")

        audio_loader.change(fn=handle_audio_load, inputs=[audio_loader], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        next_button.click(fn=lambda page: handle_pagination(page, 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        prev_button.click(fn=lambda page: handle_pagination(page, -1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        go_button.click(fn=lambda page: update_audio_display(page), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return app

app = create_gradio_app()
app.launch()



## Simplified script for managing one audio per page

In [None]:
import gradio as gr
import os

def create_gradio_app():
    loaded_audios = []  # Stores the loaded audio files

    def change_audio(index):
        if 0 <= index < len(loaded_audios):
            audio_file = loaded_audios[index]
            audio_name = os.path.basename(audio_file)
            current_page_label = f"Current Audio: {index + 1}/{len(loaded_audios)}"
            return audio_file, audio_name, index + 1, current_page_label
        return None, "", 1, "Audio not available"

    def next_audio(index):
        return change_audio(index)

    def prev_audio(index):
        return change_audio(index - 2)

    def load_audios(audios):
        nonlocal loaded_audios
        loaded_audios = audios
        return change_audio(0)

    with gr.Blocks() as app:
        audio_loader = gr.File(label="Load Audio Files", file_count='multiple')
        audio_player = gr.Audio(label="Audio Player")
        audio_name_box = gr.Textbox(label="Audio File Name", interactive=True)
        page_input = gr.Number(label="Go to page:", value=1, visible=True)
        current_page_label = gr.Label("Current Audio: 1/X")
        next_button = gr.Button("Next")
        prev_button = gr.Button("Previous")
        go_button = gr.Button("Go to Page")

        audio_loader.change(fn=load_audios, inputs=[audio_loader], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        next_button.click(fn=lambda index: next_audio(index), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        prev_button.click(fn=lambda index: prev_audio(index), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        go_button.click(fn=lambda index: change_audio(index - 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return app

app = create_gradio_app()
app.launch()

## Inputs and Outputs in Gradio

### Basics of inputs and outputs

In [None]:
import gradio as gr

def change_textbox2():
    return 'So what does a fine person like you do in a place such as this?'

def change_textbox3():
    return 'Yeah, I know. I love coding so much that I am still there at 10PM doing stuff like this instead of playing Elden Ring.'

# The function change textboxes() is the function called by the go_button from the interface. The function itself can call other functions which will themselves
# send variables or new components that we can then return to the UI. 

def change_textboxes(initial_text):
    text1 = f'Oh so your name is {initial_text}! Nice to meet you!'
    text2 = change_textbox2()
    text3 = change_textbox3()

    return text1, text2, text3

with gr.Blocks() as demo:
    textbox1 = gr.Textbox(label='What is your name?')
    textbox2 = gr.Textbox()
    textbox3 = gr.Textbox()
    go_button = gr.Button('Do your magic')
    go_button.click(fn=change_textboxes, inputs=[textbox1], outputs=[textbox1, textbox2, textbox3])


demo.launch()



### Update the visibility of components

The function that you call through a button can also send new gradio components that will automatically get mapped to the outputs defined in the UI (go_button.click), and thus replace them / update them. That is how you change the visibility of an element in Gradio.

In [None]:
import gradio as gr

def change_textboxes(initial_text):
    text1 = f'Oh so your name is {initial_text}! Nice to meet you!'
    # Check if the user has entered a name
    if initial_text.strip():
        # Return visible textboxes with the respective messages
        textbox2 = gr.Textbox(value='So what does a fine person like you do in a place such as this?', visible=True)
        textbox3 = gr.Textbox(value='Yeah, I know. I love coding so much that I am still there at 10PM doing stuff like this instead of playing Elden Ring.', visible=True)
    else:
        # Return invisible textboxes
        textbox2 = gr.Textbox(value='', visible=False)
        textbox3 = gr.Textbox(value='', visible=False)
    return text1, textbox2, textbox3

with gr.Blocks() as demo:
    textbox1 = gr.Textbox(label='What is your name?')
    go_button = gr.Button('Do your magic')

    textbox2 = gr.Textbox(visible=False)  # Initially not visible
    textbox3 = gr.Textbox(visible=False)  # Initially not visible

    go_button.click(fn=change_textboxes, inputs=textbox1, outputs=[textbox1, textbox2, textbox3])

demo.launch()

# Whisper and transcription

In [None]:
import whisper
import json
import os
import torchaudio


def transcribe_audio(audio_path, whisper_model):
    audio_name = os.path.splitext(audio_path)[0]
    model = whisper.load_model(whisper_model)
    result = model.transcribe(audio_path)
    return {audio_name: result}




whisper_model = 'medium.en'
audio_file =  r"/home/maelys/AI_PROJECTS/SOUND/TOOLS/MRQ/ai-voice-cloning/training/train_mark_ultimate_8600/audio/But�_our_paths_soon_diverged_2_blood_guzzling.wav"

transcription = transcribe_audio(audio_file, whisper_model)

json_file_path = "json_test.json"
json_object = json.dumps(transcription, indent=4)

with open (json_file_path, "w") as outfile:
    outfile.write(json_object)



It seems I don't get exactly the same tokens or values as the MRQ ai-voice-cloning tool, although they are close. However, as I'm primarily building this tool to help me prepare datasets that I then will feed MRQ's tool, I find it hazardous to take the risk to botch the transcription of like thousands and thousand of audios, only to be compelled to do everything again in a few months. What I'm gonna do, then, is give a choice to the user of my tool. In the transcription tab, there will be a choice "Transcribe here" will launch and realize the transcribing internally with my tool. "Transcribe with MRQ" will make a textbox appear, describing the exact process to transcribe the audios through MRQ's tool, and then invite the user to go to the transcription check tab. In this last tab, the user will then simply point to the folder where the whisper.json has been created, whether internally or through MRQ.

## Trasncribe panel

In [None]:
import gradio as gr
import whisper
import json

def transcribe_audios(input, model, export_folder):
    model = whisper.load_model(model)
    export_path = os.path.join(export_folder, 'whisper.json')
    
    transcriptions = {}

    for audio in os.listdir(input):
        audio_path = os.path.join(input, audio)
        audio_name = os.path.splitext(audio)[0]
        
        result = model.transcribe(audio_path)

        # Add to the transcriptions dictionary
        transcriptions[audio] = result

    # Write to a JSON file
    with open(export_path, 'w') as json_file:
        json.dump(transcriptions, json_file, indent=4)
    
    


def internal_transcriber(input, model, export_path):
    transcribe_audios(input, model, export_path)
    markdown_text = f"""
    
        >> Your audios have been retranscribed in **{export_path}**

    """
    return gr.Markdown(value=markdown_text, visible=True)

def choose_transcriber(transcriber_choice):
    if transcriber_choice == 'This tool':
        internal_transcriber_group = gr.Group(visible=True)
        mrq_tool_group = gr.Group(visible=False)
    
    else:
        internal_transcriber_group = gr.Group(visible=False)
        mrq_tool_group = gr.Group(visible=True)
    
    return internal_transcriber_group, mrq_tool_group


with gr.Blocks() as demo:
    choice_radio = gr.Radio(label='Which tool do you want to use for transcribing?', choices=['This tool', 'MRQ ai-voice-cloning'])

   
    internal_transcriber_group = gr.Group(visible=False)
    mrq_tool_group = gr.Group(visible=False)


    with internal_transcriber_group:
        input_folder = gr.Textbox(label='Path to the folder you want to transcribe')
        model_choice = gr.Dropdown(label='Which Whisper model do you want to use?', 
                                            choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", 
                                                    "medium", "medium.en",
                                                    "large", "large-v1", "large-v2", ])
        export_path = gr.Textbox(label='Path to the folder you want to export your transcribed json')
        info_textbox = gr.Markdown(visible=False)
        submit_button = gr.Button('Transcribe')
    
    with mrq_tool_group:
        instructions_text = """
      
                ># Hey there!
     
                >So you chose to use MRQ's ai-voice-cloning tool for your retranscription! Good choice, that tool is pure magic.

                >Here's how to do this:

                >>1. Go to MRQ ai-voice-cloning repo: [MRQ ai-voice-cloning](https://git.ecker.tech/mrq/ai-voice-cloning)
                2. Clone the repo, install the tool (you have all instructions on the git page)
                3. Put the voices you want to transcribe in a dedicated folder, inside the "voices" folder
                4. Launch the interface (start.bat or start.sh depending on your OS)
                5. Go to the "Training" tab
                6. Choose your voice in "Dataset Source"
                7. Click on Transcribe and Process
                8. The "whisper.json" is written in the "training" folder, so go get the path
                9. You're ready to go to the "Checkout Transcription Tab" here, point to the "whisper.json" file produced by MRQ ai-voice-cloning tool!
                
    """
        

        
        mrq_textbox = gr.Markdown(value = instructions_text)


    choice_radio.change(fn=choose_transcriber, inputs=[choice_radio], outputs=[internal_transcriber_group, mrq_tool_group])
    submit_button.click(fn=internal_transcriber, inputs=[input_folder, model_choice, export_path], outputs=[info_textbox])
    


demo.launch()

In [None]:
import os

my_path = "/home/maelys/AI_PROJECTS/SOUND/TOOLS/MRQ/ai-voice-cloning/training/train_mark_ultimate_8600/audio/Buté_our_paths_soon_diverged_2_blood_guzzling.wav"

# Split the path and the filename
path_components = os.path.split(my_path)

# Extract the filename
filename_with_extension = path_components[1]

# Split the filename and the extension
filename_without_extension, _ = os.path.splitext(filename_with_extension)

print(filename_without_extension)

## Experimenting with json manipulation

### Basic json display 

In [None]:
import os
import json

def get_text(audio_folder, json_file):
    extensions = ['.mp3', '.wav']
    audios = [audio for audio in os.listdir(audio_folder) if os.path.splitext(audio)[1] in extensions]
    with open(json_file, 'r') as file:
        json_data = json.load(file)

    print ([json_data[audio]['text'] for audio in audios])
    return [json_data[audio]['text'] for audio in audios]


audio_folder = r'/home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/'
json_file = r'/home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/whisper.json'


if texts := get_text(audio_folder, json_file):
    print('ok')
    # for text in texts:
    #     print(text)

### Change json interactively 

In [None]:
import os
import json
import gradio as gr



def change_json(json_file_path, audio_folder, json_textbox):
    print(f'current text in json text box is {json_textbox}')
    # new_filepath = os.path.join(audio_folder, 'coucou.json')
    json_file = None

    for file in os.listdir(json_file_path):
        print(file)
        if file.endswith('.json'):
            json_file = file
            break

    extensions = ['.mp3', '.wav']
    audio = [audio for audio in os.listdir(audio_folder) if os.path.splitext(audio)[1] in extensions]

    with open(os.path.join(json_file_path, json_file), 'r+') as file:
        json_data = json.load(file)
        print (f"old key is {json_data[audio[0]]['text']}")
        json_data[audio[0]]['text'] = json_textbox
        print (f"new key is {json_data[audio[0]]['text']}")

        file.seek(0)
        json.dump(json_data, file, indent=4)
        file.truncate()


    


def load_stuff(json_file_path, audio_folder):
    print(json_file_path)
    json_file = None

    for file in os.listdir(json_file_path):
        print(file)
        if file.endswith('.json'):
            json_file = file
            break

     

    extensions = ['.mp3', '.wav']
    clip = [audio for audio in os.listdir(audio_folder) if os.path.splitext(audio)[1] in extensions]
    with open(os.path.join(json_file_path, json_file), 'r') as file:
        json_data = json.load(file)
    
    for key, value in json_data.items():
        print(key, value)

    return json_data[clip[0]]['text']

with gr.Blocks() as demo:
    json_file = gr.Textbox(label='json path')
    audio_folder = gr.Textbox(label='audios') # We have to keep loading a path instead of an audio, because Gradio changes the names of the audio it loads when using gr.File... It deletes the commas etc, which we have in our titles. Thus the json can't find the key.
    json_textbox = gr.Textbox(interactive=True)
    
    submit_btn = gr.Button('Submit')
    save_json = gr.Button('Save Json')
    console_output = gr.Textbox()
    


    submit_btn.click(fn=load_stuff, inputs = [json_file, audio_folder], outputs = [json_textbox])
    save_json.click(fn=change_json, inputs = [json_file, audio_folder, json_textbox], outputs=[console_output])


demo.launch()

## Generate box rows dynamically

Although this only work if there's a button that then creates another interface, which sounds a bit cumbersome

In [None]:
import gradio as gr 

total_amount = 10

def show_stuff(*args):
    amount_to_show = 2
    textboxes = args
    new_textboxes = []
    # Iterate through the original list of textboxes to create new ones based on the visibility condition.
    for i in range (0, len(textboxes)):
        # If i is less than or equal to 5, the textbox is visible. Otherwise, it's invisible.
        if i < amount_to_show:
            new_textbox = gr.Textbox(visible=True, value=str(i))  # Ensuring value is a string
        else:
            new_textbox = gr.Textbox(visible=False, value=str(i))  # Ensuring value is a string
        
        new_textboxes.append(new_textbox)

    return new_textboxes
        

def show_one(textbox):
    print(type(textbox))
    return gr.Textbox(visible=True, value='coucou')
    

with gr.Blocks() as demo:

    textboxes = []
    for i in range(total_amount):
        textbox = gr.Textbox(value=i, visible=True)
        textboxes.append(textbox)


    #amount_to_show = gr.Number(label='Amount ot boxes to show')


    show_btn = gr.Button('Show')

    show_btn.click(fn=show_stuff, inputs=textboxes, outputs=textboxes)


demo.launch()

## Drafting the checkout tab

In [None]:
import gradio as gr


def create_interface():
    loaded_audios = []

    def change_audio(index):
        if 0 <= index < len(loaded_audios):
            audio_file = loaded_audios[index]
            audio_name = os.path.basename(audio_file)
            current_page_label_label = f"Current Audio: {index + 1}/{len(loaded_audios)}"
            return audio_file, audio_name, index + 1, current_page_label_label
        return None, "", 1, "Audio not available"

    def handle_pagination(page, delta):
        new_index = page -1 + delta # We asjust for zero-based indexing, and the delta determines which way we move
        return change_audio(new_index)


    def load_audios(audio_path, json_file, epub_file):
        nonlocal loaded_audios
        extensions = ['.mp3', '.wav']
        for audio in os.listdir(audio_path):
            name, extension = os.path.splitext(audio)
            filepath = os.path.join(audio_path, audio)
            if extension in extensions:      
                loaded_audios.append(filepath)
        
        return change_audio(0)


    with gr.Blocks() as interface:
        audios = gr.Textbox(label='Write the folder to your audios')
        json_file = gr.File(label='Your whisper.json')
        ebook_file = gr.File(label='Your epub or ebook for reference (Optional)')
        submit_button = gr.Button('Load') # This is supposed to load the audios, but also the json file and the ebook if they exist
        

        process_group = gr.Group(visible=True)

        with process_group:
            with gr.Row():
                audio_player = gr.Audio()
                audio_name_box = gr.Textbox(label='Audio File Name', interactive=True)
                

            with gr.Row():
                previous_audio_btn = gr.Button('Previous')
                delete_audio = gr.Button('Delete from dataset')
                next_audio_btn = gr.Button('Next')

            with gr.Row(equal_height=True):
                
                json_reference = gr.Textbox(scale=20, interactive=True)
                save_json_button = gr.Button('Save JSON')
                    
            with gr.Row():
                epub_reference = gr.Textbox()
            

            current_page_label = gr.Label('Current page : 1/X')
            page_input = gr.Number(label='Enter page', value=1)
            go_button = gr.Button('Go to page')

            submit_button.click(fn=load_audios, inputs = [audios, json_file, ebook_file], outputs = [audio_player, audio_name_box, page_input, current_page_label] ) 
            next_audio_btn.click(fn=lambda index: handle_pagination(index, 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
            previous_audio_btn.click(fn=lambda index: handle_pagination(index, -1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
            go_button.click(fn=lambda index: change_audio(index - 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return interface


app = create_interface()
app.launch()
    




## checkout tab, simpler interface (only load json and infer from there)

In [None]:
import gradio as gr
import os
import json

def create_interface():
    json_data = None

    

    def save_json(json_folder, text, audio_name):
         nonlocal json_data
         json_file = get_json(json_folder)
         json_file_path = os.path.join(json_folder, json_file)

         with open(json_file_path, 'r+') as file:
               json_data = json.load(file)
               json_data[audio_name]['text'] = text
               file.seek(0)
               json.dump(json_data, file, indent=4)
               file.truncate()

         
         return '>> *The JSON was saved.*'

    def handle_pagination(page, json_folder, delta):
          new_index = page -1 + delta # We adjust for zero-based indexing, and the delta determines which way we move
          # Check if the new_index is within the valid range
          if 0 <= new_index < len(json_data):
               return change_audio(new_index, json_folder)
          else:
               # If the new_index is out of bounds, return current state without change
               # To achieve this, subtract delta to revert to original page index
               return change_audio(page - 1, json_folder)  # page - 1 adjusts back to zero-based index
    
    def get_json(path):
          return next(
          (file for file in os.listdir(path) if file.endswith('.json')), None)
          
    def change_audio(index, json_folder):


          def get_audio_file():
               keys_list = list(json_data.keys())
               return keys_list[index]
               

          def get_audio_text(audio_name):
               return json_data[audio_name]['text']


          entry_amount = len(json_data)
          
          if 0 <= index < entry_amount:
               audio_file = get_audio_file()
               audio_name = os.path.basename(audio_file)
               audio_path = os.path.join(json_folder, 'audios', audio_file)
               audio_text = get_audio_text(audio_name)
               current_page_label_label = f"Current Audio: {index + 1}/{entry_amount}"

               return audio_path, audio_name, index + 1, current_page_label_label, audio_text, ""
          
          return None, "", 1, "Audio not available", "", ""
          
          

    def load_and_init(json_folder):
          nonlocal json_data # We initialize the nonlocal json_data defined earlier so that all the other functions can access it
          json_file = get_json(json_folder)
          json_path = os.path.join(json_folder, json_file)

          with open(json_path, 'r') as file:
               json_data = json.load(file)

          return change_audio(0, json_folder)

    with gr.Blocks() as interface:
          json_folder = gr.Textbox(label='Path to your retranscription json and clips')
          submit_button = gr.Button('Load') # This is supposed to load the audios, but also the json file and the ebook if they exist


          process_group = gr.Group(visible=True)

          with process_group:
               with gr.Row():
                    audio_player = gr.Audio()
                    audio_name_box = gr.Textbox(label='Audio File Name', interactive=True)


               with gr.Row():
                    previous_audio_btn = gr.Button('Previous')
                    delete_audio = gr.Button('Delete from dataset')
                    next_audio_btn = gr.Button('Next')

               with gr.Row(equal_height=True):

                    json_reference = gr.Textbox(label='JSON reference', scale=20, interactive=True)
                    save_json_button = gr.Button('Save JSON')

               with gr.Row():
                    epub_reference = gr.Textbox(label='EPUB reference')


               current_page_label = gr.Label('Current page : 1/X')
               page_input = gr.Number(label='Enter page', value=1)
               go_button = gr.Button('Go to page')
               info_textbox = gr.Markdown(visible=True)

               submit_button.click(fn=load_and_init, 
                                   inputs = [json_folder], 
                                   outputs = [audio_player, audio_name_box, page_input, current_page_label, json_reference] ) 
               
               next_audio_btn.click(fn=lambda index, json_folder: handle_pagination(index, json_folder, 1), 
                                    inputs=[page_input, json_folder], 
                                    outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox])
               
               previous_audio_btn.click(fn=lambda index, json_folder: handle_pagination(index, json_folder, -1), 
                                        inputs=[page_input, json_folder], 
                                        outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox])
               
               go_button.click(fn=lambda index, json_folder: change_audio(index - 1, json_folder), 
                               inputs=[page_input, json_folder], 
                               outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox])
               

               save_json_button.click(fn=save_json, inputs=[json_folder, json_reference, audio_name_box], outputs=info_textbox)
    
    return interface

app = create_interface()
app.launch()

## Analyze json to get the max number of segments

In [None]:
import json
import math

def calculate_average_segments(json_data):
    total_segments_count = 0
    segments_key_count = 0
    
    # Iterate through each key in the JSON data
    for key, value in json_data.items():
        # Check if this key contains a "segments" list and count its length
        if "segments" in value:
            segments_key_count += 1
            total_segments_count += len(value["segments"])
    
    # Calculate the average if there are any "segments" keys
    if segments_key_count > 0:
        average_segments = total_segments_count / segments_key_count
        # Round up to the nearest whole number
        return math.ceil(average_segments)
    else:
        return 0


def find_max_segments(json_data):
    max_segments_count = 0
    
    # Iterate through each key in the JSON data
    for key, value in json_data.items():
        # Check if this key contains a "segments" list and count its length
        if "segments" in value:
            current_count = len(value["segments"])
            if current_count > max_segments_count:
                max_segments_count = current_count
                
    return max_segments_count

json_file = r'/home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/scripts/Voice_cloning_data_management/transcribed_jsons/Mark_Noble/Mark_Noble_Master_jsons_official/Regis_Master.json'
with open(json_file, 'r') as file:
    json_data = json.load(file)

# Assuming 'json_data' is your loaded JSON data from the file
# Use the function to calculate the average
average_segments = calculate_average_segments(json_data)
max_segments = find_max_segments(json_data)

print(f'Average number of entries in "segments", rounded up: {average_segments}')
print(f'Maximum number of entries in a segment: {max_segments}')


## Checkout tab - implementing segments (working)

In [None]:
import gradio as gr
import os
import json

def create_interface():
    json_data = None

    

    def save_json(json_folder, text, audio_name, *all_segment_boxes):
         nonlocal json_data
         json_file = get_json(json_folder)
         json_file_path = os.path.join(json_folder, json_file)
         
         cleaned_textboxes = [i for i in all_segment_boxes if i]
                     
         print(f'all textboxes is {all_segment_boxes}')
         print(f'cleaned is {cleaned_textboxes}')
     

         with open(json_file_path, 'r+') as file:
               json_data = json.load(file)
               json_data[audio_name]['text'] = text
               segments = json_data[audio_name]['segments']

               for i, segment in enumerate(segments):
                     j = i*3
                     segment['start'] = cleaned_textboxes[j+1]
                     #print(f"segment start is {segment['start']}")
                     segment['end'] = cleaned_textboxes[j+2]
                     #print(f"segment end is {segment['end']}")
                     segment['text'] = cleaned_textboxes[j]
                     #print(f"segment text is {segment['text']}")
                     

               file.seek(0)
               json.dump(json_data, file, indent=4)
               file.truncate()

         
         return '>> *The JSON was saved.*'

    def handle_pagination(page, json_folder, delta):
          new_index = page -1 + delta # We adjust for zero-based indexing, and the delta determines which way we move
          # Check if the new_index is within the valid range
          if 0 <= new_index < len(json_data):
               return change_audio(new_index, json_folder)
          else:
               # If the new_index is out of bounds, return current state without change
               # To achieve this, subtract delta to revert to original page index
               return change_audio(page - 1, json_folder)  # page - 1 adjusts back to zero-based index
    
    def get_json(path):
          return next(
          (file for file in os.listdir(path) if file.endswith('.json')), None)
          
    def change_audio(index, json_folder):
          new_segment_group = []

          def get_audio_file():
               keys_list = list(json_data.keys())
               return keys_list[index]
               
          def get_audio_text(audio_name):
               return json_data[audio_name]['text']
          

          def create_segment_group(segments):
            new_segment_group = []
            total_components = 10
            visible_segments = len(segments)

            for i in range(total_components):
                  visible = i < visible_segments

                  text = segments[i]['text'] if visible else ''
                  start = segments[i]['start'] if visible else ''
                  end = segments[i]['end'] if visible else ''

                  seg_textbox = gr.Textbox(visible=visible, value=text, label=f'Segment {i+1} Text', interactive=True, scale=50)
                  start_number = gr.Textbox(visible=visible, value=str(start), label=f'Segment {i+1} Start', interactive=True)
                  end_number = gr.Textbox(visible=visible, value=str(end), label=f'Segment {i+1} End', interactive=True)

                  new_segment_group.extend([seg_textbox, start_number, end_number])
            
            return new_segment_group


                                    
                        
          audio_amount = len(json_data)

            # Adjust the index based on the user input
          if index < 0:
            index = 0  # Ensure index is not less than 0
          elif index >= audio_amount:
            index = audio_amount - 1  # Set index to the last audio if out of range
                  
          if 0 <= index < audio_amount:
               audio_file = get_audio_file()
               audio_name = os.path.basename(audio_file)
               audio_path = os.path.join(json_folder, 'audios', audio_file)
               audio_text = get_audio_text(audio_name)
               current_page_label_label = f"Current Audio: {index + 1}/{audio_amount}"

               segments = json_data[audio_name]['segments']

               for i in range(len(segments)):
                     new_segment_group.extend(create_segment_group(segments))
               
                     
               return audio_path, audio_name, index + 1, current_page_label_label, audio_text, "", *new_segment_group
          
          return None, "", 1, "Audio not available", "", "", *new_segment_group
          
          

    def load_and_init(json_folder):
      nonlocal json_data  # Use nonlocal to modify the outer scope variable
      json_file = get_json(json_folder)
      if not json_file:
            print("JSON file not found in the provided folder.")
            return []

      json_file_path = os.path.join(json_folder, json_file)

      try:
            with open(json_file_path, 'r') as file:
                  json_data = json.load(file)
      except Exception as e:
            print(f"Failed to load JSON data: {e}")
            return []

      # Assuming json_data structure: {'audio_name': {'text': '...', 'segments': [{'text': '...', 'start': ..., 'end': ...}, ...]}}
      if not json_data:
            print("JSON data is empty.")
            return []

      # Initialize components for the first audio entry or a default state
      first_audio_name = next(iter(json_data), None)
      if first_audio_name is None:
            print("No audio entries found in JSON data.")
            return []

      return change_audio(0, json_folder)  # Assuming this function sets up the initial UI state based on the first audio entry

    with gr.Blocks() as interface:
        json_folder = gr.Textbox(label='Path to your retranscription json and clips')
        submit_button = gr.Button('Load') # This is supposed to load the audios, but also the json file and the ebook if they exist
        all_segment_boxes = []

    
        with gr.Row():
                audio_player = gr.Audio()
                audio_name_box = gr.Textbox(label='Audio File Name', interactive=True)


        with gr.Row():
                previous_audio_btn = gr.Button('Previous')
                delete_audio = gr.Button('Delete from dataset')
                next_audio_btn = gr.Button('Next')

        with gr.Row(equal_height=True):

                json_reference = gr.Textbox(label='JSON reference', scale=20, interactive=True)
                save_json_button = gr.Button('Save JSON')
        

        for i in range(10):
            with gr.Row():
                  seg_textbox = gr.Textbox(visible=False, scale=50)
                  seg_start = gr.Textbox(visible=False)
                  seg_end = gr.Textbox(visible=False)

            all_segment_boxes.extend((seg_textbox, seg_start, seg_end))
                  

      
        with gr.Row():
                epub_reference = gr.Textbox(label='EPUB reference')


        current_page_label = gr.Label('Current page : 1/X')
        page_input = gr.Number(label='Enter page', value=1)
        go_button = gr.Button('Go to page')
        info_textbox = gr.Markdown(visible=True)

        submit_button.click(fn=load_and_init, 
                            inputs = json_folder, 
                            outputs = [audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes]
                              ) 
        
        next_audio_btn.click(fn=lambda index, json_folder: handle_pagination(index, json_folder, 1), 
                                inputs=[page_input, json_folder], 
                                outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
        
        previous_audio_btn.click(fn=lambda index, json_folder: handle_pagination(index, json_folder, -1), 
                                    inputs=[page_input, json_folder], 
                                    outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
        
        go_button.click(fn=lambda index, json_folder: change_audio(index - 1, json_folder), 
                        inputs=[page_input, json_folder], 
                        outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
        

        save_json_button.click(fn=save_json, inputs=[json_folder, json_reference, audio_name_box, *all_segment_boxes], outputs=info_textbox)
    
    return interface

app = create_interface()
app.launch()

## Testing the showing of segments and timecodes

In [None]:
import gradio as gr


# def display_stuff_no_loop():
#     seg_textbox1 = gr.Textbox(visible=True, label='Segment1')
#     seg_start1 = gr.Number(visible=True, value=3.5)
#     seg_end1 = gr.Number(visible=True, value=6.5)

#     seg_textbox2 = gr.Textbox(visible=False)
#     seg_start2 = gr.Number(visible=False)
#     seg_end2 = gr.Number(visible=False)

#     return seg_textbox1, seg_start1, seg_end1, seg_textbox2, seg_start2, seg_end2

 
# def display_stuff_loop():
#     amount_to_show = 1
#     segment1 = []
#     segment2 = []

#     for i in range (0,2):
#         if i < 1:
#             textbox = gr.Textbox(visible=True, value=f'Segment{i}')
#             start = gr.Number(visible=True, value=3.5)
#             end = gr.Number(visible=True, value=6.5)
#             segment1.extend((textbox, start, end))
#         else: 
#             textbox = gr.Textbox(visible=False)
#             start = gr.Number(visible=False)
#             end = gr.Number(visible=False)
#             segment2.extend((textbox, start, end))

#     return segment1[0], segment1[1], segment1[2], segment2[0], segment2[1], segment2[2]


def create_segment(visible, start, end, text, index):
    """
    Creates a segment with a Textbox and two Number components, based on visibility and index.
    """
    textbox = gr.Textbox(visible=visible, value=text, interactive=True)
    start = gr.Number(visible=visible, value=start if visible else 0)  
    end = gr.Number(visible=visible, value=end if visible else 0)    
    return textbox, start, end

def display_stuff_loop_modular():
    amount_to_show = 3  
    components = []     

    # Dynamically create components based on the amount to show
    for i in range(5):  
        # Add components for each segment to the list
        components.extend(create_segment(visible=i < amount_to_show, start=i+1, end=i+3, text=f'this is {i}',index=i))

    # Return unpacked components, allowing for dynamic creation yet individual handling
    print(components)
    return *components, 'hello'  # Convert the list to a tuple for returning



with gr.Blocks() as demo3:
    seg_textbox1 = gr.Textbox(elem_id='Segment1')
    seg_start1 = gr.Number(elem_id='Segment1')
    seg_end1 = gr.Number(elem_id='Segment1')

    seg_textbox2 = gr.Textbox(elem_id='Segment2')
    seg_start2 = gr.Number(elem_id='Segment2')
    seg_end2 = gr.Number(elem_id='Segment2')

    # Segment 3 components
    seg_textbox3 = gr.Textbox(elem_id='Segment3')
    seg_start3 = gr.Number(elem_id='Segment3')
    seg_end3 = gr.Number(elem_id='Segment3')

    # Segment 4 components
    seg_textbox4 = gr.Textbox(elem_id='Segment4')
    seg_start4 = gr.Number(elem_id='Segment4')
    seg_end4 = gr.Number(elem_id='Segment4')

    # Segment 5 components
    seg_textbox5 = gr.Textbox(elem_id='Segment5')
    seg_start5 = gr.Number(elem_id='Segment5')
    seg_end5 = gr.Number(elem_id='Segment5')

    all_components = [
    seg_textbox1, seg_start1, seg_end1,
    seg_textbox2, seg_start2, seg_end2,
    seg_textbox3, seg_start3, seg_end3,
    seg_textbox4, seg_start4, seg_end4,
    seg_textbox5, seg_start5, seg_end5
]
    
    console_output = gr.TextArea()

    submit_btn = gr.Button('Submit')

    submit_btn.click(fn=display_stuff_loop_modular, outputs = [*all_components, console_output])
    




demo3.launch()





In [None]:
import gradio as gr

def make_stuff():
    textboxes = []
    textbox1 = gr.Textbox(value='text', scale=50, visible=True)
    textbox2 = gr.Textbox(value='2.6', visible=True)
    textbox3 = gr.Textbox(value='7.8', visible=True)

    textboxes.extend([textbox1, textbox2, textbox3])

    return *textboxes, 'hello'

with gr.Blocks() as demo:
    textboxes = []

    with gr.Row():
        textbox1 = gr.Textbox(visible=False)
        textbox2 = gr.Textbox(visible=False)
        textbox3 = gr.Textbox(visible=False)
    
    console_output = gr.Textbox(visible=True)
    
    textboxes=[textbox1, textbox2, textbox3]

    submit_btn = gr.Button('Submit')

    submit_btn.click(fn=make_stuff, outputs=[*textboxes, console_output])

demo.launch()


## Delete audio

In [None]:
import gradio as gr
import os
import json

def create_interface():
    json_data = None

    def delete_entry(json_folder, index, audio_name):
         nonlocal json_data
         json_file = get_json(json_folder)
         json_file_path = os.path.join(json_folder, json_file)

         with open(json_file_path, 'r+') as file:
              json_data = json.load(file)

              if audio_name in json_data:
                json_data.pop(audio_name)
            
              file.seek(0)
              json.dump(json_data, file, indent=4)
              file.truncate()

         
         return change_audio(index-1, json_folder)
              
         



    def save_json(json_folder, text, audio_name, *all_segment_boxes):
         nonlocal json_data
         json_file = get_json(json_folder)
         json_file_path = os.path.join(json_folder, json_file)
         
         cleaned_textboxes = [i for i in all_segment_boxes if i]
                     
         print(f'all textboxes is {all_segment_boxes}')
         print(f'cleaned is {cleaned_textboxes}')
     

         with open(json_file_path, 'r+') as file:
               json_data = json.load(file)
               json_data[audio_name]['text'] = text
               segments = json_data[audio_name]['segments']

               for i, segment in enumerate(segments):
                     j = i*3
                     segment['start'] = cleaned_textboxes[j+1]
                     #print(f"segment start is {segment['start']}")
                     segment['end'] = cleaned_textboxes[j+2]
                     #print(f"segment end is {segment['end']}")
                     segment['text'] = cleaned_textboxes[j]
                     #print(f"segment text is {segment['text']}")
                     

               file.seek(0)
               json.dump(json_data, file, indent=4)
               file.truncate()

         
         return '>> *The JSON was saved.*'

    def handle_pagination(page, json_folder, delta):
          new_index = page -1 + delta # We adjust for zero-based indexing, and the delta determines which way we move
          # Check if the new_index is within the valid range
          if 0 <= new_index < len(json_data):
               return change_audio(new_index, json_folder)
          else:
               # If the new_index is out of bounds, return current state without change
               # To achieve this, subtract delta to revert to original page index
               return change_audio(page - 1, json_folder)  # page - 1 adjusts back to zero-based index
    
    def get_json(path):
          return next(
          (file for file in os.listdir(path) if file.endswith('.json')), None)
          
    def change_audio(index, json_folder):
          new_segment_group = []

          def get_audio_file():
               keys_list = list(json_data.keys())
               return keys_list[index]
               
          def get_audio_text(audio_name):
               return json_data[audio_name]['text']
          

          def create_segment_group(segments):
            new_segment_group = []
            total_components = 10
            visible_segments = len(segments)

            for i in range(total_components):
                  visible = i < visible_segments

                  text = segments[i]['text'] if visible else ''
                  start = segments[i]['start'] if visible else ''
                  end = segments[i]['end'] if visible else ''

                  seg_textbox = gr.Textbox(visible=visible, value=text, label=f'Segment {i+1} Text', interactive=True, scale=50)
                  start_number = gr.Textbox(visible=visible, value=str(start), label=f'Segment {i+1} Start', interactive=True)
                  end_number = gr.Textbox(visible=visible, value=str(end), label=f'Segment {i+1} End', interactive=True)

                  new_segment_group.extend([seg_textbox, start_number, end_number])
            
            return new_segment_group


                                    
                        
          audio_amount = len(json_data)

            # Adjust the index based on the user input
          if index < 0:
            index = 0  # Ensure index is not less than 0
          elif index >= audio_amount:
            index = audio_amount - 1  # Set index to the last audio if out of range
                  
          if 0 <= index < audio_amount:
               audio_file = get_audio_file()
               audio_name = os.path.basename(audio_file)
               audio_path = os.path.join(json_folder, 'audios', audio_file)
               audio_text = get_audio_text(audio_name)
               current_page_label_label = f"Current Audio: {index + 1}/{audio_amount}"

               segments = json_data[audio_name]['segments']

               for i in range(len(segments)):
                     new_segment_group.extend(create_segment_group(segments))
               
                     
               return audio_path, audio_name, index + 1, current_page_label_label, audio_text, "", *new_segment_group
          
          return None, "", 1, "Audio not available", "", "", *new_segment_group
          
          

    def load_and_init(json_folder):
      nonlocal json_data  # Use nonlocal to modify the outer scope variable
      json_file = get_json(json_folder)
      if not json_file:
            print("JSON file not found in the provided folder.")
            return []

      json_file_path = os.path.join(json_folder, json_file)

      try:
            with open(json_file_path, 'r') as file:
                  json_data = json.load(file)
      except Exception as e:
            print(f"Failed to load JSON data: {e}")
            return []

      # Assuming json_data structure: {'audio_name': {'text': '...', 'segments': [{'text': '...', 'start': ..., 'end': ...}, ...]}}
      if not json_data:
            print("JSON data is empty.")
            return []

      # Initialize components for the first audio entry or a default state
      first_audio_name = next(iter(json_data), None)
      if first_audio_name is None:
            print("No audio entries found in JSON data.")
            return []

      return change_audio(0, json_folder)  # Assuming this function sets up the initial UI state based on the first audio entry

    with gr.Blocks() as interface:
        json_folder = gr.Textbox(label='Path to your retranscription json and clips')
        submit_button = gr.Button('Load') # This is supposed to load the audios, but also the json file and the ebook if they exist
        all_segment_boxes = []
        

    
        with gr.Row():
                audio_player = gr.Audio()
                audio_name_box = gr.Textbox(label='Audio File Name', interactive=True)


        with gr.Row():
                previous_audio_btn = gr.Button('Previous')
                delete_audio = gr.Button('Delete from dataset')
                next_audio_btn = gr.Button('Next')


         
        segment_group = gr.Group()
        with segment_group:
            with gr.Row(equal_height=True):

                  json_reference = gr.Textbox(label='JSON reference', scale=20, interactive=True)
                  save_json_button = gr.Button('Save JSON')

            for i in range(10):
                  with gr.Row():
                        seg_textbox = gr.Textbox(visible=False, scale=50)
                        seg_start = gr.Textbox(visible=False)
                        seg_end = gr.Textbox(visible=False)

                  all_segment_boxes.extend((seg_textbox, seg_start, seg_end))
                        

      
        with gr.Row():
                epub_reference = gr.Textbox(label='EPUB reference')


        current_page_label = gr.Label('Current page : 1/X')
        page_input = gr.Number(label='Enter page', value=1)
        go_button = gr.Button('Go to page')
        info_textbox = gr.Markdown(visible=True)

        submit_button.click(fn=load_and_init, 
                            inputs = json_folder, 
                            outputs = [audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes]
                              ) 
        
        next_audio_btn.click(fn=lambda index, json_folder: handle_pagination(index, json_folder, 1), 
                                inputs=[page_input, json_folder], 
                                outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
        
        previous_audio_btn.click(fn=lambda index, json_folder: handle_pagination(index, json_folder, -1), 
                                    inputs=[page_input, json_folder], 
                                    outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
        
        go_button.click(fn=lambda index, json_folder: change_audio(index - 1, json_folder), 
                        inputs=[page_input, json_folder], 
                        outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
        

        save_json_button.click(fn=save_json, inputs=[json_folder, json_reference, audio_name_box, *all_segment_boxes], outputs=info_textbox)
        delete_audio.click(fn=delete_entry, inputs=[json_folder, page_input, audio_name_box], outputs=[audio_player, audio_name_box, page_input, current_page_label, json_reference, info_textbox, *all_segment_boxes])
    
    return interface

app = create_interface()
app.launch()

## Experimenting with waveform options

In [None]:
import gradio as gr


with gr.Blocks() as demo:
    audio = gr.Audio(type='filepath', waveform_options={'show_controls': True})



demo.launch()

## Experimenting with progress bar

In [None]:
import gradio as gr
import time
def my_function(x, progress=gr.Progress()):
    progress(0, desc="Starting...")

    for _ in progress.tqdm(range(10)):
        print('Im doing this')
        time.sleep(1)
        print('now I am doing that')


    return x
gr.Interface(my_function, gr.Textbox(), gr.Textbox()).queue().launch()

In [None]:
import whisper
import json
import os
from tqdm import tqdm
import threading
import time

# Transcribe function wrapped for threading
def transcribe_audio(audio_path, whisper_model, callback):
    try:
        audio_name = os.path.splitext(os.path.basename(audio_path))[0]
        model = whisper.load_model(whisper_model)
        result = model.transcribe(audio_path)
        transcription = {audio_name: result}
    except Exception as e:
        transcription = {"error": str(e)}
    
    callback(transcription)

# Callback function to save the transcription
def save_transcription(transcription):
    json_file_path = "json_test.json"
    json_object = json.dumps(transcription, indent=4)
    with open(json_file_path, "w") as outfile:
        outfile.write(json_object)

# Wrapper function to handle transcription and progress bar
def transcribe_with_progress(audio_file, whisper_model):
    # Start transcription in a separate thread
    transcription_thread = threading.Thread(target=transcribe_audio, args=(audio_file, whisper_model, save_transcription))
    transcription_thread.start()
    
    # Initialize tqdm progress bar
    pbar = tqdm(total=100, desc="Transcribing")
    
    while transcription_thread.is_alive():
        # This is a placeholder for actual progress. You may adjust the sleep time or implement a more sophisticated progress update mechanism.
        time.sleep(0.1)  # Simulate progress update
        pbar.update(1)  # Increment progress bar slightly. Adjust as necessary.
    
    pbar.close()  # Ensure the progress bar closes when done

whisper_model = 'tiny.en'
audio_file =  r"/home/maelys/AI_PROJECTS/SOUND/TOOLS/MRQ/ai-voice-cloning/training/train_mark_ultimate_8600/audio/But�_our_paths_soon_diverged_2_blood_guzzling.wav"


# Call the function
transcribe_with_progress(audio_file, whisper_model)

## Updating Split panel with feedback

In [9]:
import gradio as gr
from dataclasses import dataclass
from pydub import AudioSegment
import tqdm
import os
import whisper
import shutil
import re
import subprocess

@dataclass
class AudioProcess_Config():
    """Class to store the necessary variables to processing the audio"""
    filepath: str
    input_format :str
    export_format: str
    output_folder: str
    usable_folder: str
    not_usable_folder: str
    time_threshold: float
    prefix : str
    
class AudioProcessor():
    ''' Class to process the audios'''

    def __init__(self, config):
        self.config = config
        self.audio = AudioSegment.from_file(config.filepath)
 
    def detect_silences(self, decibel="-23dB", audio_path = None):
        '''Function to detect silences in an audio'''
        audio_path = audio_path or self.config.filepath

        # Executing ffmpeg to detect silences
        command = ["ffmpeg","-i",audio_path,"-af",f"silencedetect=n={decibel}:d={str(self.config.time_threshold)}","-f","null","-"]
        out = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        stdout, stderr = out.communicate()

        # Decoding and splitting ffmpeg output
        output = stdout.decode("utf-8")
        silence_info = output.split('[silencedetect @')
        silence_starts = []
        silence_ends = []

        if len(silence_info) <= 1:
            return('No silence was detected')

            # Process each detected silence fragment
        for index, segment in enumerate(silence_info[1:], start=1):
            segment_details = segment.split(']')[1]
            if time_values := re.findall(r"[-+]?\d*\.\d+|\d+", segment_details):
                time = float(time_values[0])

                # Checking whether the time should be either the start or end time according to where we are in the iteration
                if index % 2 == 0 :
                    silence_ends.append(time)
                else:
                    silence_starts.append(time)

        return list(zip(silence_starts, silence_ends))

    def extract_midpoints(self, list):
        ''' Function to extract the midpoints where the audio must be sliced '''
        return [(start + end) / 2 for start, end in list]

    def process_segment(self, start_point, end_point):
        '''Extracts and exports a segment of the audio'''
        segment = self.audio[start_point * 1000 : end_point * 1000]
        temp_segment_folder = os.path.join(self.config.output_folder, 'temp')
                                           
        if not os.path.exists(temp_segment_folder):
            os.makedirs(temp_segment_folder)

        temp_segment_name = f'temp_segment.{self.config.export_format}'
        temp_segment_path = os.path.join(temp_segment_folder, temp_segment_name)
        segment.export(temp_segment_path, format=self.config.export_format)

        return temp_segment_path, len(segment)
    
    def transcribe_segment(self, segment_path):
        '''Transcribes the audio segment'''
        model = whisper.load_model(self.config.whisper_model)
        transcription = model.transcribe(segment_path)
        transcription_text = transcription['text']
        return re.sub(r'[ ?!,;."]', "_", transcription_text[:150])
    
        
    def export_segment(self, segment, counter):
        """Exports the processed segment with a formatted name."""
        padded_index = str(counter).zfill(4)
        segment_path = os.path.join(self.config.output_folder, f"{self.config.prefix}_{padded_index}.{self.config.export_format}")
        segment.export(segment_path, format=self.config.export_format)
        print(f"Saved {segment_path}")
        return segment_path


    def split_audio(self, start_point, end_point, counter):
        segment_path, segment_length = self.process_segment(start_point, end_point)
     
        if segment_length > 11000:
            counter = self.handle_long_segment(segment_path, counter)
        
        else:

            segment_path = self.export_segment(self.audio[start_point*1000:end_point*1000], counter)
            counter +=1
        
        return counter
    
    def handle_long_segment(self, segment_path, counter):
        new_silence_periods = self.detect_silences("-23dB", segment_path)
        # Ensure new_silence_periods is a list of tuples before proceeding
        if isinstance(new_silence_periods, str) or not all(isinstance(item, tuple) and len(item) == 2 for item in new_silence_periods):
            print('Received message:', new_silence_periods)
            return counter  # Skip processing if data is invalid
        
        # Check if there are no valid silence periods to process
        if not new_silence_periods:
            print('No silence detected')
            return counter


        new_midpoints = self.extract_midpoints(new_silence_periods)
        for i in range(len(new_midpoints) - 1):
            start_midpoint = new_midpoints[i]
            end_midpoint = new_midpoints[i + 1]
            counter = self.split_audio(start_midpoint, end_midpoint, counter)
    
    
    def clean_up(self):
        temp_segment_folder = os.path.join(self.config.output_folder, 'temp')
        if os.path.exists(temp_segment_folder):
            shutil.rmtree(temp_segment_folder)
            print("Temporary folder deleted:", temp_segment_folder)







def define_process_config(filepath, time_threshold, output_folder):
    usable_folder = os.path.join(output_folder, 'Usable_Audios')
    not_usable_folder = os.path.join(output_folder, 'Not_Usable_Audios')
    input_format = filepath.split('.')[-1].lower()
    prefix = os.path.basename(filepath).rsplit('.', 1)[0]
    
   
    return AudioProcess_Config(
        filepath=filepath,
        input_format=input_format,
        export_format=input_format,
        output_folder=output_folder,
        usable_folder=usable_folder,
        not_usable_folder=not_usable_folder,
        time_threshold=time_threshold,
        prefix=prefix
        
    )

    
def split_main(files, time_threshold, output_folder):

    for file in files:
        
        process_config = define_process_config(file, time_threshold, output_folder)

        if not os.path.exists(process_config.output_folder):
            os.makedirs(process_config.output_folder)
        

        ap = AudioProcessor(process_config)
        counter = 1

        if silence_list := ap.detect_silences():
            midpoints = ap.extract_midpoints(silence_list)
            start_point = 0


            for end_point in midpoints:
                counter = ap.split_audio(start_point, end_point, counter)
                start_point = end_point
            
            ap.clean_up()

        else:
            print('No silences detected')
        
        #ap.clean_up()


def create_split_audio_interface():
    with gr.Blocks() as interface:
        # with gr.Tab('Split audios'):
                with gr.Row():
                    with gr.Column():
                        
                        files_input = gr.File(file_count="multiple", 
                                    type="filepath",
                                    label="Choose the files to segment")
                                    
                        silence_float = gr.Number(label = 'Silence duration',
                                    info = 'Minium duration of a silence to be defined as a split point')

                        
                        output_folder = gr.Textbox(
                        label = 'Output Folder',
                        info = 'Type the path where you want to output the segmented audios')

                        split_btn = gr.Button("Split")

                    with gr.Column():
                        out = gr.Textbox(label='Progress')
                        console_log = gr.TextArea(label='Console output')

                    
                    split_btn.click(fn=split_main, inputs=[files_input, silence_float, output_folder], outputs=out)
    
    
    return interface

app= create_split_audio_interface()
app.queue()
app.launch()


Running on local URL:  http://127.0.0.1:7869

To create a public link, set `share=True` in `launch()`.




Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0001.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0002.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0003.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0004.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0005.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0006.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0007.mp3
Saved /home/maelys/AI_PROJECTS/SOUND/DATA_CENTER/Tests/split_test/02 - Epitaph for a Spy Penguin Modern Classics_0008.mp3
Saved /home/maelys/AI_PR