# Demo (in gradio)

In [1]:
# imports
import os
import json
from dotenv import load_dotenv
from typing import Optional

from summarize_media.get_media.youtube import get_youtube
from summarize_media.pre_processing.convert_audio_format import convert_to_wav
from summarize_media.transcribe.transcribe_cloud import get_transcribe_cloud
from summarize_media.host_files.host_files import upload_file_to_0x0
from summarize_media.post_processing.reformat_output import reformat
from summarize_media.summarize_transcription.summarize import get_summarization

load_dotenv()

True

In [2]:
# Hard coded for demo purposes
media_output_path = "data/media"
transcript_output_path = "data/transcript"
local_size_limit = 40 # in MB

In [3]:
def get_name(file_path: str):
    _, name = os.path.split(file_path)
    name, _ = os.path.splitext(name)
    return name

def pipeline(url: str, 
             sampling_rate: int = 16000, 
             save_results = False, 
             output_path: Optional[str] = None,
             cloud_transcribe_model_name: str = "large-v3",
             cloud_transcribe_kwargs: Optional[dict] = None):
    # Fetching Audio from youtube and converting to wav
    audio = get_youtube(url, media_output_path)
    name = get_name(audio)
    
    audio = convert_to_wav(audio, sample_rate=sampling_rate)

    # Directly supply the file for inference or upload the file depending on size
    size = os.path.getsize(audio)
    print(f"Converted audio size: {(size / 1024) /1024:.1f} MB")
    if size < local_size_limit * 1024 * 1024:
        print("Upload file directly to inference endpoint")
        audio = open(audio, "rb")
    else:
        print("Exceeded size limit, uploading to file host before performing inference")
        audio = upload_file_to_0x0(audio, 3600)
        print("Uploaded File")
        

    # Transcribing and reformating the audio
    print("Starting transcription process")
    cloud_transcribe_kwargs = cloud_transcribe_kwargs if cloud_transcribe_kwargs else {}
    transcript = get_transcribe_cloud(audio, 
                                      model_name=cloud_transcribe_model_name, 
                                      **cloud_transcribe_kwargs)
    transcript = transcript["segments"]
    transcript_txt = reformat(transcript)
    summary_txt =  get_summarization(transcript_txt)
    
    output_path = output_path if output_path else transcript_output_path
    transcript_json_path = os.path.join(output_path, name + ".json")
    transcript_txt_path = os.path.join(output_path, name + ".txt")
    
    if save_results and output_path:
        with open(transcript_json_path, 'w') as file:
            json.dump(transcript, file)
            
        with open(transcript_txt_path, "w") as file:
            file.write(transcript_txt) 
        
    return summary_txt, transcript_txt


In [4]:
def pipeline_local_file(file_path: str, 
                        sampling_rate: int = 16000, 
                        save_results = False, 
                        output_path: Optional[str] = None,
                        cloud_transcribe_model_name: str = "large-v2",
                        cloud_transcribe_kwargs: Optional[dict] = None):
    
    name = get_name(file_path)
    audio = convert_to_wav(file_path, sample_rate=sampling_rate)
    cloud_transcribe_kwargs = cloud_transcribe_kwargs if cloud_transcribe_kwargs else {}

    # Directly supply the file for inference or upload the file depending on size
    size = os.path.getsize(audio)
    print(f"Converted audio size: {(size / 1024) /1024:.1f} MB")
    if size < local_size_limit * 1024 * 1024:
        print("Upload file directly to inference endpoint")
        audio = open(audio, "rb")
    else:
        print("Exceeded size limit, uploading to file host before performing inference")
        audio = upload_file_to_0x0(audio, 3600)
        print("Uploaded File")
        

    # Transcribing and reformating the audio
    
    print("Start Transcription")
    transcript = get_transcribe_cloud(audio, 
                                      model_name=cloud_transcribe_model_name, 
                                      **cloud_transcribe_kwargs)
    transcript_txt = reformat(transcript)
    summary_txt =  get_summarization(transcript_txt)
    
    transcript_json_path = os.path.join(output_path, name + ".json")
    transcript_txt_path = os.path.join(output_path, name + ".txt")
    
    if save_results and output_path:
        with open(transcript_json_path, 'w') as file:
            json.dump(transcript, file)
            
        with open(transcript_txt_path, "w") as file:
            file.write(transcript_txt) 
        
    return summary_txt, transcript_txt

## Gradio Demo

In [5]:
def demo_pipeline(url: str, sampling_rate: int = 16000):
    try:
        summary, transcript = pipeline(url, sampling_rate)
        return summary, transcript
    except Exception as e:
        return str(e), "Failed to get transcription"

In [6]:
import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Media Summarizer
    
    Summarizes and transcribes a youtube video.
    
    Feedback: [Google form](https://forms.gle/RT66bre2X9B9s4d79) or dm me (twitter(X):shivvor2 or discord: shz2__)
    
    Wait time is ~5-10 mins (my internet is slow) and there are issues for super long (3+ hour) videos (file size cap for hosting service im using)
    
    """
    )
    input = gr.Textbox(label = "url", placeholder = "Please input a valid youtube url") 
    sampling_rate = gr.Slider(value=16000, minimum=8000, maximum=24000, step=100, label = "Audio Sample Rate")
    out_summary = gr.Markdown(label = "summary")
    out_transcript = gr.Textbox(label = "transcript")
    start_btn = gr.Button("Un-NotebookLM's your podcast")
    start_btn.click(fn = demo_pipeline, 
                    inputs = input, 
                    outputs = [out_summary, out_transcript]
                    )

demo.launch(share = True)


  from .autonotebook import tqdm as notebook_tqdm
INFO:httpx:HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://127.0.0.1:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7860


INFO:httpx:HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


* Running on public URL: https://eaf28e749992caa370.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


INFO:httpx:HTTP Request: HEAD https://eaf28e749992caa370.gradio.live "HTTP/1.1 200 OK"




Title: Why Can't We Make Simple Software? - Peter van Hardenberg
Upload date: 27/11/24 09:00:40
Time length: 0:41:33
 ↳ |████████████████████████████████████████████| 100.0%

INFO:summarize_media.host_files.host_files:Uploading file: Why Can't We Make Simple Software? - Peter van Hardenberg.wav


Converted audio size: 152.2 MB
Exceeded size limit, uploading to file host before performing inference


INFO:summarize_media.host_files.host_files:Successfully uploaded to https://0x0.st/XRnJ.wav


Uploaded File
Starting transcription process


INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: GET https://api.replicate.com/v1/models/victor-upmeet/whisperx/versions/84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
