In [2]:
!pip install pyngrok
!pip install streamlit
!pip install pytube
!pip install openai-whisper
!pip install transformers torch

Collecting pyngrok
  Using cached pyngrok-7.1.2-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.2
Collecting streamlit
  Using cached streamlit-1.31.1-py2.py3-none-any.whl (8.4 MB)
Collecting validators<1,>=0.2 (from streamlit)
  Using cached validators-0.22.0-py3-none-any.whl (26 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Using cached GitPython-3.1.42-py3-none-any.whl (195 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using cached pydeck-0.8.1b0-py2.py3-none-any.whl (4.8 MB)
Collecting watchdog>=2.1.5 (from streamlit)
  Using cached watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Using cached gitdb-4.0.11-py3-none-any.whl (62 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Using cached smmap-5.0.1-py3-none-any.whl (24 kB)
Installing collected packages: watchdog, validators, sm

In [3]:
from pyngrok import ngrok
# Replace 'your_ngrok_token' with your actual ngrok token.
!ngrok authtoken 2Wmym3woew8QwtCQnGgOsvEIplX_6k5z4ZTjSNiRjZbzjx32s

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [4]:
%%writefile app.py
import streamlit as st
from pytube import YouTube
import whisper
import os
from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import pipeline

# Initialize the T5 tokenizer and model
model_name = 't5-base'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to("cuda")

# Load Whisper model for audio transcription
whisper_model = whisper.load_model("base", device="cuda")

# Function to download audio from YouTube
def download_audio(youtube_url):
    yt = YouTube(youtube_url)
    audio_stream = yt.streams.get_audio_only()
    audio_file_path = audio_stream.download(output_path='.', filename='downloaded_audio.mp3')
    return audio_file_path

# Function to transcribe audio using Whisper
def transcribe_audio(audio_file_path):
    result = whisper_model.transcribe(audio_file_path)
    return result["text"]

def summarize_text(text):
    # Split the text into smaller chunks
    chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]

    # Initialize summarization pipeline
    summarization_pipeline = pipeline("summarization", model=model, tokenizer=tokenizer, device=0)  # 0 denotes GPU

    # Summarize each chunk and concatenate the summaries
    summaries = []
    for chunk in chunks:
        summary = summarization_pipeline(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
        summaries.append(summary)

    # Concatenate the summaries
    final_summary = ' '.join(summaries)

    return final_summary


def set_background_and_text_styles(url):
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url("{url}");
            background-size: cover;
        }}
        /* Additional styles */
        h1, h2, h3, h4, h5, h6, p, .stTextInput>div>div>input, .stButton>button {{
            color: #000000;  /* Sets text to black */
        }}
        .stTextInput>div>div>input, .stButton>button {{
            background-color: #fff;  /* Background to white */
            color: #000000;  /* Text to black */
            caret-color: #000000;  /* Cursor color to black */
        }}
        textarea {{
            background-color: #FFFFFF !important;  /* White background */
            color: #000000 !important;  /* Black text color */
        }}
        </style>
        """,
        unsafe_allow_html=True
    )

# Set the background image and text styles
set_background_and_text_styles('https://images.unsplash.com/photo-1620712943543-bcc4688e7485?q=80&w=2730&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D')

# Streamlit UI
st.title('YouTube Video Summarizer')
youtube_url = st.text_input('Enter the YouTube video URL:', '')

if st.button('Generate Transcript'):
    with st.spinner('Downloading audio and transcribing... Please wait.'):
        audio_file_path = download_audio(youtube_url)
        st.session_state.transcription = transcribe_audio(audio_file_path)  # Save transcription to session state
        # Cleanup: remove the downloaded audio file
        os.remove(audio_file_path)
    st.write('Transcription:')
    st.text_area("Transcript", st.session_state.transcription, height=300)

if 'transcription' in st.session_state and st.session_state.transcription:
    if st.button('Generate Summary'):
        with st.spinner('Generating summary... Please wait.'):
            st.session_state.summary = summarize_text(st.session_state.transcription)
        st.write('Summary:')
        st.text_area("Summary", st.session_state.summary, height=150)



Overwriting app.py


In [5]:
from pyngrok import ngrok

# Kill existing ngrok tunnels (if any)
ngrok.kill()

# Create a new ngrok tunnel on port 8501
public_url = ngrok.connect(8501)
print(public_url)

# Run the Streamlit app
!streamlit run app.py


NgrokTunnel: "https://e373-34-143-225-16.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.143.225.16:8501[0m
[0m
2024-02-25 04:25:12.035000: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-25 04:25:12.035065: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-25 04:25:12.036547: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been regis



[34m  Stopping...[0m
[34m  Stopping...[0m
Traceback (most recent call last):
  File "/usr/lib/python3.10/weakref.py", line 667, in _exitfunc
    f()
  File "/usr/lib/python3.10/weakref.py", line 591, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 1181, in _close_pool_connections
    conn.close()
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connection.py", line 272, in close
    super().close()
  File "/usr/local/lib/python3.10/dist-packages/streamlit/web/bootstrap.py", line 69, in signal_handler
    server.stop()
  File "/usr/local/lib/python3.10/dist-packages/streamlit/web/server/server.py", line 399, in stop
    self._runtime.stop()
  File "/usr/local/lib/python3.10/dist-packages/streamlit/runtime/runtime.py", line 311, in stop
    async_objs.eventloop.call_soon_threadsafe(stop_on_eventloop)
  File "/usr/lib/python3.10/asyncio/base_events.py", line 798, in call_soon_th