In [None]:
import gradio as gr
import json
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
import ollama
from IPython.display import Markdown, display

# The chunk retrieval function
def grab_relevant_chunks(
    question,
    top_k=1,
    collection_name="subtitle_chunks",
    qdrant_host="localhost",
    qdrant_port=6333
):
    # Load the same embedding model used during indexing
    embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
    query_embedding = embedding_model.encode(question)

    # Connect to Qdrant
    client = QdrantClient(host=qdrant_host, port=qdrant_port)

    # Perform similarity search
    search_results = client.search(collection_name=collection_name, query_vector=query_embedding, limit=top_k)

    # Extract metadata/payloads
    top_metadata = [hit.payload for hit in search_results]

    return top_metadata

# Prompt Generation and Response
def generate_response(question):
    chunks = grab_relevant_chunks(question)
    llm_prompt = f"""I have a list of video segments that were retrieved in response to the user question: "{question}"
    Each segment includes a start and end timestamp, the video number it belongs to, and a short transcript (text).
    Please do the following for each segment:

    1. Give me the start and end timestamps and the video the segment came from.
    2. Summarize the main idea or key point explained in the segment in one sentence.
    3. Highlight any explanation, definition, or key information related to the user's question.
    4. If relevant, rephrase technical descriptions into simpler or more understandable terms.

    Here is the list of segments:
    ```json
    {json.dumps(chunks)}
    ```"""

    response = ollama.chat(
        model='llama3.1:8b',
        messages = [{'role': 'user', 'content': llm_prompt}]
    )

    response_content = response['message']['content']
    return response_content

# Gradio Interface
def chatbot_interface(user_input):
    return generate_response(user_input)

# Create Gradio chatbot interface
chatbot = gr.Chatbot()

# Launch the app with Gradio
iface = gr.Interface(
    fn=chatbot_interface,
    inputs=gr.Textbox(label="Ask a Question"),
    outputs=chatbot,
    live=True,
    title="Video-Based Q&A Bot",
    description="Ask any question about video content and get answers based on the video segments."
)

iface.launch()

Traceback (most recent call last):
  File "/home/rahav/.cache/pypoetry/virtualenvs/llm-engineering-sRZUHTNH-py3.11/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/rahav/.cache/pypoetry/virtualenvs/llm-engineering-sRZUHTNH-py3.11/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/rahav/.cache/pypoetry/virtualenvs/llm-engineering-sRZUHTNH-py3.11/lib/python3.11/site-packages/gradio/blocks.py", line 2156, in process_api
    data = await self.postprocess_data(block_fn, result["prediction"], state)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/rahav/.cache/pypoetry/virtualenvs/llm-engineering-sRZUHTNH-py3.11/lib/python3.11/site-packages/gradio/blocks.py", line 1