In [None]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
from aana.api.sdk import AanaSDK
from aana.api.sdk import get_deployment


aana_sdk = AanaSDK(port=8000)

In [None]:
from aana.deployments.whisper_deployment import (
    WhisperComputeType,
    WhisperConfig,
    WhisperDeployment,
    WhisperModelSize,
)

whisper_deployment_medium = WhisperDeployment.options(
    num_replicas=1,
    max_concurrent_queries=1000,
    ray_actor_options={"num_gpus": 0.25},
    user_config=WhisperConfig(
        model_size=WhisperModelSize.MEDIUM,
        compute_type=WhisperComputeType.FLOAT16,
    ).model_dump(),
)

aana_sdk.register_deployment(
    "whisper_deployment_medium",
    whisper_deployment_medium,
)

In [None]:
from aana.deployments.hf_blip2_deployment import HFBlip2Config, HFBlip2Deployment
from aana.models.core.dtype import Dtype

blip2_deployment = HFBlip2Deployment.options(
    num_replicas=1,
    max_concurrent_queries=1000,
    ray_actor_options={"num_gpus": 0.25},
    user_config=HFBlip2Config(
        model="Salesforce/blip2-opt-2.7b",
        dtype=Dtype.FLOAT16,
        batch_size=2,
        num_processing_threads=2,
    ).model_dump(),
)

aana_sdk.register_deployment(
    "blip2_deployment",
    blip2_deployment,
)

In [None]:
from aana.deployments.vllm_deployment import VLLMConfig, VLLMDeployment
from aana.models.pydantic.sampling_params import SamplingParams


vllm_llama2_7b_chat_deployment = VLLMDeployment.options(
    num_replicas=1,
    max_concurrent_queries=1000,
    ray_actor_options={"num_gpus": 0.25},
    user_config=VLLMConfig(
        model="TheBloke/Llama-2-7b-Chat-AWQ",
        dtype="auto",
        quantization="awq",
        gpu_memory_reserved=13000,
        enforce_eager=True,
        default_sampling_params=SamplingParams(
            temperature=0.0, top_p=1.0, top_k=-1, max_tokens=1024
        ),
        chat_template="llama2",
    ).model_dump(),
)

aana_sdk.register_deployment(
    "vllm_llama2_7b_chat_deployment",
    vllm_llama2_7b_chat_deployment,
)

In [None]:
from aana.models.pydantic.video_input import VideoInput
from aana.utils.video import download_video, extract_audio

url = "https://www.youtube.com/watch?v=UQuIVsNzqDk"
video_input = VideoInput(url=url)

video = download_video(video_input=video_input)
audio = extract_audio(video=video)

In [None]:
from aana.models.pydantic.whisper_params import WhisperParams

whisper_params = WhisperParams()

whisper_output = await get_deployment("whisper_deployment_medium").transcribe.remote(
    audio=audio, params=whisper_params
)

In [None]:
from aana.models.pydantic.video_params import VideoParams
from aana.utils.video import generate_frames_decord

video_params = VideoParams()

timestamps = []
frame_ids = []
captions = []

for frames_dict in generate_frames_decord(
    video=video, params=video_params, batch_size=4
):
    captions_dict = await get_deployment("blip2_deployment").generate_batch.remote(
        images=frames_dict["frames"]
    )

    timestamps.extend(frames_dict["timestamps"])
    frame_ids.extend(frames_dict["frame_ids"])
    captions.extend(captions_dict["captions"])

In [None]:
from aana.utils.video import generate_combined_timeline

timeline_dict = generate_combined_timeline(
    transcription_segments=whisper_output["segments"],
    captions=captions,
    caption_timestamps=timestamps,
)
timeline = timeline_dict["timeline"]

In [None]:
timeline_sentences = [
    f"{segment['audio_transcript']}\n{segment['visual_caption']}"
    for segment in timeline
]

In [None]:
timeline_embeddings_dict = await get_deployment(
    "mxbai_embed_large_v1_deployment"
).embed_batch.remote(sentences=timeline_sentences)

In [None]:
timeline_embeddings_dict["embedding"].shape

In [None]:
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore

document_store = QdrantDocumentStore(
    path="/tmp/qdrant_index",
    recreate_index=True,
    embedding_dim=1024,
    return_embedding=True,
    wait_result_from_api=True,
)

In [None]:
from haystack.components.writers import DocumentWriter
from haystack.document_stores.types import DuplicatePolicy

doc_writer = DocumentWriter(
    document_store=document_store, policy=DuplicatePolicy.OVERWRITE
)

In [None]:
doc_writer

In [None]:
from haystack import Document


timeline_docs = [
    Document(content=sentence, embedding=embedding)
    for sentence, embedding in zip(
        timeline_sentences, timeline_embeddings_dict["embedding"]
    )
]

In [None]:
len(timeline_docs)

In [None]:
document_store.write_documents(timeline_docs)

In [None]:
query = "Why green screen is not a perfect solution?"

In [None]:
query_embedding_dict = await get_deployment(
    "mxbai_embed_large_v1_deployment"
).embed_batch.remote(sentences=[query])

In [None]:
retrieved_docs = document_store.query_by_embedding(
    query_embedding_dict["embedding"][0].tolist(), top_k=5
)

In [None]:
query_embedding_dict["embedding"][0]

In [None]:
from haystack.components.builders import PromptBuilder

template = """
<s>[INST] Given the following information, answer the question factually from the content you have.

If there is no information in the collection, say that and politely refuse to answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
[/INST]
"""

prompt_builder = PromptBuilder(template=template)

In [None]:
prompt_dict = prompt_builder.run(documents=retrieved_docs, question=query)

In [None]:
sampling_params = SamplingParams()

answer = await get_deployment("vllm_llama2_7b_chat_deployment").generate.remote(
    prompt=prompt_dict["prompt"], sampling_params=sampling_params
)

In [None]:
print(answer["text"])

In [None]:
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

In [None]:
doc_embedder = SentenceTransformersDocumentEmbedder(
    model="mixedbread-ai/mxbai-embed-large-v1", meta_fields_to_embed=["summary"]
)

In [None]:
doc_embedder.warm_up()

In [None]:
docs = doc_embedder.run(documents=[Document(content="Hello, World!")])

In [None]:
docs

In [None]:
type(docs["documents"][0].embedding)

In [None]:
doc_embedder_with_deployment = SentenceTransformersDeploymentEmbedder(
    "mxbai_embed_large_v1_deployment"
)

In [None]:
doc_embedder_with_deployment.arun(documents=[Document(content="Hello, World!")])

In [None]:
import requests, json

data = {"video_input": {"url": "https://www.youtube.com/watch?v=UQuIVsNzqDk"}}
# data = {"video_input": {"url": "https://www.youtube.com/watch?v=33BZfufw8cI"}}
response = requests.post(
    "http://127.0.0.1:8000/index_video",
    data={"body": json.dumps(data)},
    stream=True,
)
for chunk in response.iter_content(chunk_size=None):
    print(json.loads(chunk))

In [None]:
import requests, json

data = {"query": "What is an alternative to green screen?"}
# data = {"query": "Why green screen is not a perfect solution?"}
response = requests.post(
    "http://127.0.0.1:8000/chat",
    data={"body": json.dumps(data)},
    stream=True,
)
text = ""
for chunk in response.iter_content(chunk_size=None):
    text = text + json.loads(chunk)["text"]
    print(json.loads(chunk)["text"], end="")