In [1]:
import gradio as gr
import os
import json
import qdrant_client
from typing import cast
from glob import glob
from pathlib import Path
from src.preprocess import (
    VideoIngestionPipeline,
    AppConfig,
    RunConfig,
    get_yt_id_from_url,
)
from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
from llama_index.core.schema import QueryBundle
from llama_index.core.schema import ImageNode
from src.video_to_index import video_to_index, chat

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
run_config = RunConfig(seconds_per_frame=5)
# Create a local Qdrant vector store
db_client = qdrant_client.QdrantClient(path="qdrant_mm_db")

In [3]:
demo = None

In [4]:
def chat_response_with_context(response: Response):
    context_response = str(response) + "\n\n"
    source_nodes = response.source_nodes

    context_response += "<sub>Sources: </sub>"
    for node in source_nodes:
        if isinstance(node.node, ImageNode):
            context_response += f"<sub>{str(Path(node.node.image_path).name)}</sub>, "
    return context_response

In [None]:
from src.preprocess import VideoMetadata


def ingest_and_indexing(input_path_or_url: str, save_dir: Path):
    if not os.path.exists(save_dir):
        vid_pipe = VideoIngestionPipeline(AppConfig(save_dir=save_dir))
        video_metadata = vid_pipe.run(input_path_or_url, run_config)
    else:
        metadata_file = glob(f"{save_dir}/metadata/*.json")
        video_metadata = cast(VideoMetadata, json.load(open(metadata_file[0], "r")))
    # TODO: load if index are saved.
    db_client.delete_collection(f"{video_metadata['id']}_text_collection")
    db_client.delete_collection(f"{video_metadata['id']}_image_collection")
    index = video_to_index(save_dir, db_client, video_metadata["id"])
    return video_metadata, index


if demo:
    demo.close()

try:
    with gr.Blocks(fill_height=True, theme="ParityError/Anime") as demo:
        index_state = gr.State(None)
        video_metadata_state = gr.State(None)
        with gr.Row(variant="panel"):
            with gr.Column(scale=3):
                with gr.Tab("Youtube video"):
                    yt_video_output = gr.Video(sources=["upload"], height="50%")
                    url_input = gr.Textbox(
                        label="Copy & paste a Youtube URL",
                        show_copy_button=True,
                        placeholder="Youtube video URL",
                    )
                    fetch_btn = gr.Button("Fetch and build index", variant="primary")

                with gr.Tab("Your video"):
                    # gr.Markdown("Currently not working.")
                    video_output = gr.Video(sources=["upload"], height="50%")
                    video_upload = gr.UploadButton(
                        label="Select video", file_types=["video"], size="sm"
                    )

            with gr.Column(scale=7):

                def send_message(message, history, index, video_metadata):
                    text = message["text"]

                    file = None
                    if len(message["files"]):
                        file = message["files"][0]["path"]

                    response = chat(
                        QueryBundle(query_str=text, image_path=file),
                        history,
                        index,
                        video_metadata,
                    )

                    # return str(response)
                    return chat_response_with_context(response)

                bot = gr.Chatbot(height="500px", render=False)
                itf = gr.ChatInterface(
                    send_message,
                    chatbot=bot,
                    title="Youtube QA",
                    description="Paste a youtube link to start chat",
                    multimodal=True,
                    additional_inputs=[index_state, video_metadata_state],
                )

        def on_url_submit(youtube_url: str):
            youtube_id = get_yt_id_from_url(youtube_url)
            save_dir = Path(f"data/{youtube_id}")
            video_metadata, index = ingest_and_indexing(youtube_url, save_dir)
            video_title = video_metadata["title"]

            return {
                yt_video_output: video_metadata["video_file"],
                index_state: index,
                video_metadata_state: video_metadata,
                bot: [
                    [
                        None,
                        f"🎉Loaded the video! If you have any question about {video_title}, feel free to ask.",
                    ]
                ],
            }

        fetch_btn.click(
            on_url_submit,
            inputs=[url_input],
            outputs=[yt_video_output, index_state, video_metadata_state, bot],
        )

        def on_video_upload(filepath: str):
            save_dir = Path(f"data/{Path(filepath).stem}")
            video_metadata, index = ingest_and_indexing(filepath, save_dir)

            return {
                video_output: video_metadata["video_file"],
                index_state: index,
                video_metadata_state: video_metadata,
            }

        video_upload.upload(
            on_video_upload,
            inputs=[video_upload],
            outputs=[video_output, index_state, video_metadata_state],
        )
        # video_upload.upload()

    demo.launch(share=False, server_port=8000)
except Exception as e:
    demo.close()
    raise e

Running on local URL:  http://127.0.0.1:8000

To create a public link, set `share=True` in `launch()`.


Parsing nodes: 100%|██████████| 48/48 [00:00<00:00, 4053.77it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.77it/s]
Generating image embeddings: 100%|██████████| 47/47 [00:14<00:00,  3.26it/s]


Retrieving using text...
Retrieving using image...
Query:
Give the timestamp of this scene.
 Assistant:
 The scene is at 00:15.
Query:
What is this scene about?
 Assistant:
 The scene is about Casey Neistat talking about how he got started on YouTube and how he became successful. He talks about how he was told that he couldn't do it, but he didn't listen and he kept going. He encourages others to do the same and to never give up on their dreams.
Retrieving using text...
Retrieving using image...
Query:
How about the timestamp of this scene?
 Assistant:
 The first image is a screenshot of a man sitting on a couch, wearing sunglasses and a black shirt. He is sitting in front of a bookshelf. The other images are also screenshots from the video. The first image is at the timestamp of 01:05, and the second image is at the timestamp of 01:15. The video transcription shows that the man in the video is talking about how he doesn't listen to anyone and how he does what he wants. He also talks a

Parsing nodes: 100%|██████████| 48/48 [00:00<00:00, 3804.36it/s]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.62s/it]
Generating image embeddings: 100%|██████████| 47/47 [00:14<00:00,  3.15it/s]


Retrieving using text...
Retrieving using image...
Query:
What is the timestamp of this scene?
 Assistant:
 The scene is at 03:10.
Retrieving using text...
Retrieving using image...
Query:
In which timestamp does this scene appear?
 Assistant:
 The scene appears at 00:14 in the video.
Retrieving using text...
Retrieving using image...
Query:
Who is this man?
 Assistant:
 The man in the video is Casey Neistat. He is a popular YouTuber who is known for his vlogs and his work with the Beme app. He is also the co-founder of the multimedia company 368.
[youtube] Extracting URL: https://www.youtube.com/watch?v=PznJqxon4zE
[youtube] PznJqxon4zE: Downloading webpage
[youtube] PznJqxon4zE: Downloading ios player API JSON
[youtube] PznJqxon4zE: Downloading android player API JSON
[youtube] PznJqxon4zE: Downloading m3u8 information
[info] PznJqxon4zE: Downloading 1 format(s): 18
[download] Destination: data/PznJqxon4zE/video/Steve Jobs passion in work.mp4
[download] 100% of    3.97MiB in 00:00:00

                                                            

Moviepy - Done writing frames data/PznJqxon4zE/img/steve-jobs-passion-in-work-frame-%04d.png.


Parsing nodes: 100%|██████████| 20/20 [00:00<00:00, 1002.86it/s]
Generating embeddings: 100%|██████████| 1/1 [00:02<00:00,  2.05s/it]
Generating image embeddings: 100%|██████████| 19/19 [00:05<00:00,  3.38it/s]


Query:
Who are in the videos?
 Assistant:
 Steve Jobs.
Query:
Who are in the videos?
 Assistant:
 Steve Jobs.
steve-jobs-passion-in-work /private/var/folders/8r/8d2_blbs1md53x1nyd_6f9m00000gp/T/gradio/66e57a1aaec0cbe1f4320844295265346f658507/steve-jobs-passion-in-work.mp4
<moviepy.audio.io.AudioFileClip.AudioFileClip object at 0x1eb283d10>
MoviePy - Writing audio in data/steve-jobs-passion-in-work/audio/steve-jobs-passion-in-work.wav


                                                                      

MoviePy - Done.


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Moviepy - Writing frames data/steve-jobs-passion-in-work/img/steve-jobs-passion-in-work-frame-%04d.png.


                                                            

Moviepy - Done writing frames data/steve-jobs-passion-in-work/img/steve-jobs-passion-in-work-frame-%04d.png.


Parsing nodes: 100%|██████████| 20/20 [00:00<00:00, 783.14it/s]
Generating embeddings: 100%|██████████| 1/1 [00:08<00:00,  8.05s/it]
Generating image embeddings: 100%|██████████| 19/19 [00:05<00:00,  3.31it/s]


Query:
Summarize the video
 Assistant:
 Steve Jobs talked about the importance of passion in work. He said that if you don't love what you do, you'll never be successful. He also said that you need to be a good talent scout and surround yourself with great people.
Query:
What did Steve Jobs wear in the talk?
 Assistant:
 Steve Jobs wore a black turtleneck, glasses, and dark pants.
Query:
What is the color of his chair?
 Assistant:
 The color of the chair is red.
Query:
Did he sit next to anybody?
 Assistant:
 Yes, there was a woman sitting next to him.
Query:
Who is she/he?
 Assistant:
 The person in the video is Steve Jobs, the co-founder of Apple Inc.
Query:
In which timestamp did Steve Jobs mention about "talent scout"?
 Assistant:
 Steve Jobs mentioned about "talent scout" at 59.2.
