### > Setup ffmpeg

In [1]:
%%sh
sudo apt-get update && sudo apt upgrade -y

sudo apt-get install ffmpeg -y

pip install -U sagemaker

sudo: apt-get: command not found
sudo: apt-get: command not found


Collecting sagemaker
  Downloading sagemaker-2.214.1-py3-none-any.whl.metadata (14 kB)
Downloading sagemaker-2.214.1-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.214.0
    Uninstalling sagemaker-2.214.0:
      Successfully uninstalled sagemaker-2.214.0
Successfully installed sagemaker-2.214.1


### > Initialize parameters

In [19]:
import boto3
import sagemaker
from sagemaker.utils import name_from_base
import shutil
import os

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
region = sagemaker_session._region_name

prefix = "multi-modal-search"

s3_client = boto3.client('s3')

video_dir = 'videos' # BigBuckBunny.mp4' #TearsOfSteel.mp4'  #

# load the host and index_name for opensearch
%store -r host
%store -r index_name

print(f"Opensearch hosting url: {host}")
print(f"Opensearch index name: {index_name}")

Opensearch hosting url: 1fk47z0pze7baugjoq9g.us-east-1.aoss.amazonaws.com
Opensearch index name: mm-search-2024-03-31-15-48-41-887


### > download and setup videos

In [3]:
if os.path.exists(video_dir):
    # Remove existing folder
    shutil.rmtree(video_dir)
    
# Create new folder
os.makedirs(video_dir)

In [4]:
videos=[
    # “Shooting in motion for a football player”
    "https://d286ovwbajn9p7.cloudfront.net/AdobeStock_161973588.mov",
    # “Race to the Cloud”
    "https://d286ovwbajn9p7.cloudfront.net/RaceToTheCloud_444_PR_M_040623-FINAL_264.mp4",
    # "Picchu"
    "https://d286ovwbajn9p7.cloudfront.net/aws-picchu-edit-rendered.mp4",
    # "AWS España Ya disponible"
    "https://d286ovwbajn9p7.cloudfront.net/AWS_Espa%C3%B1a_Ya%20disponible.mp4",
    # "Christmas fire"
    "https://d286ovwbajn9p7.cloudfront.net/AdobeStock_384597062.mov",
    # "westiepoo"
    "https://d2yqlwoly7fl0b.cloudfront.net/super-slomo/samples/westiepoo.mov",
]

In [5]:
for v in videos:
    print(v)
    !cd {video_dir} && wget {v} --no-check-certificate

https://d286ovwbajn9p7.cloudfront.net/AdobeStock_161973588.mov
--2024-03-30 23:49:55--  https://d286ovwbajn9p7.cloudfront.net/AdobeStock_161973588.mov
Resolving d286ovwbajn9p7.cloudfront.net (d286ovwbajn9p7.cloudfront.net)... 3.162.93.55, 3.162.93.167, 3.162.93.143, ...
Connecting to d286ovwbajn9p7.cloudfront.net (d286ovwbajn9p7.cloudfront.net)|3.162.93.55|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21564633 (21M) [video/quicktime]
Saving to: ‘AdobeStock_161973588.mov’


2024-03-30 23:49:55 (103 MB/s) - ‘AdobeStock_161973588.mov’ saved [21564633/21564633]

https://d286ovwbajn9p7.cloudfront.net/RaceToTheCloud_444_PR_M_040623-FINAL_264.mp4
--2024-03-30 23:49:55--  https://d286ovwbajn9p7.cloudfront.net/RaceToTheCloud_444_PR_M_040623-FINAL_264.mp4
Resolving d286ovwbajn9p7.cloudfront.net (d286ovwbajn9p7.cloudfront.net)... 3.162.93.218, 3.162.93.143, 3.162.93.167, ...
Connecting to d286ovwbajn9p7.cloudfront.net (d286ovwbajn9p7.cloudfront.net)|3.162.93.218|:443..

In [6]:
# !cd {video_dir} && curl https://gist.githubusercontent.com/jsturgis/3b19447b304616f18657/raw/a8c1f60074542d28fa8da4fe58c3788610803a65/gistfile1.txt | grep -o 'http[^"]*.mp4' | xargs -n 1 curl -O

### > upload videos to S3

In [20]:
!rm -rf `find -type d -name .ipynb_checkpoints`

In [21]:
s3_video_dir=f"s3://{bucket}/{prefix}/videos/"

!aws s3 sync {video_dir} {s3_video_dir}

In [22]:
from sagemaker.pytorch.processing import PyTorchProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker import get_execution_role

#Initialize the PyTorchProcessor
pytorch_processor = PyTorchProcessor(
    framework_version='2.0',
    py_version="py310",
    role=get_execution_role(),
    instance_type='ml.g4dn.4xlarge',
    instance_count=1,
    base_job_name=name_from_base(prefix)
)

#Run the processing job
pytorch_processor.run(
    code='video_ingestion.py',
    source_dir='code_dir',
    arguments=["--host", host, "--index_name", index_name, 
              "--prefix", prefix, "--bucket", bucket, "--region", region],
    inputs=[
        ProcessingInput(
            input_name='videos',
            source=s3_video_dir,
            destination='/opt/ml/processing/input/videos'
        )
    ]
)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.processing:Uploaded code_dir to s3://sagemaker-us-east-1-421307757250/multi-modal-search-2024-03-31-21-40-31--2024-03-31-21-40-31-576/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-us-east-1-421307757250/multi-modal-search-2024-03-31-21-40-31--2024-03-31-21-40-31-576/source/runproc.sh
INFO:sagemaker:Creating processing-job with name multi-modal-search-2024-03-31-21-40-31--2024-03-31-21-40-31-576


[34mCollecting opencv-python-headless (from -r requirements.txt (line 1))
  Downloading opencv_python_headless-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)[0m
[34mCollecting opensearch-py (from -r requirements.txt (line 2))
  Downloading opensearch_py-2.5.0-py2.py3-none-any.whl.metadata (6.8 kB)[0m
[34mCollecting ipywidgets (from -r requirements.txt (line 3))
  Downloading ipywidgets-8.1.2-py3-none-any.whl.metadata (2.4 kB)[0m
[34mCollecting boto3==1.34.62 (from -r requirements.txt (line 4))
  Downloading boto3-1.34.62-py3-none-any.whl.metadata (6.6 kB)[0m
[34mCollecting botocore<1.35.0,>=1.34.62 (from boto3==1.34.62->-r requirements.txt (line 4))
  Downloading botocore-1.34.74-py3-none-any.whl.metadata (5.7 kB)[0m
[34mCollecting s3transfer<0.11.0,>=0.10.0 (from boto3==1.34.62->-r requirements.txt (line 4))
  Downloading s3transfer-0.10.1-py3-none-any.whl.metadata (1.7 kB)[0m
[34mCollecting widgetsnbextension~=4.0.10 (from ipywidgets->

[34mupload frames to s3 bucket...[0m
[34mIndexed 22 documents[0m
[34m22 record succeeded, [] failed ...[0m
[34mprocess file: /opt/ml/processing/input/videos/AWS_España_Ya disponible.mp4...[0m
[34mframe rate is 23.976023976023978 per second[0m
[34mnumber of frames: 27[0m
[34mupload frames to s3 bucket...[0m
[34mIndexed 44 documents[0m
[34m44 record succeeded, [] failed ...[0m
[34mprocess file: /opt/ml/processing/input/videos/westiepoo.mov...[0m
[34mframe rate is 25.0 per second[0m
[34mnumber of frames: 1[0m
[34mupload frames to s3 bucket...[0m
[34mIndexed 2 documents[0m
[34m2 record succeeded, [] failed ...[0m
[34mprocess file: /opt/ml/processing/input/videos/AdobeStock_384597062.mov...[0m
[34mframe rate is 59.96002664890073 per second[0m
[34mnumber of frames: 5[0m
[34mupload frames to s3 bucket...[0m
[34mIndexed 10 documents[0m
[34m10 record succeeded, [] failed ...[0m
[34mprocess file: /opt/ml/processing/input/videos/RaceToTheCloud_444_PR_M_0

### > Local ingestion testing

In [None]:
!rm -rf `find -type d -name .ipynb_checkpoints`

In [None]:
try:
    entries = os.scandir("dog")#video_dir)
except FileNotFoundError as e:
    print(f"Could not scan folder: {e}")
    raise
except OSError as e: 
    print(f"Could not scan folder: {e}")
    raise

### > Process videos

The processing include following steps:
1. extract key frames from video
2. upload to s3 and build the index
3. ingest index to opensearch

In [None]:
from code_dir.helper import extract_key_frames, upload_frames
from code_dir.opensearch_util import bulk_index_ingestion
from tqdm import tqdm

for entry in tqdm(entries):
    try:
        if entry.is_file() and entry.path.endswith((".mp4", ".mkv", ".mov")): 

            print(f"process file: {entry.path}...")
            # extract key frames from video
            output_dir, fps = extract_key_frames(entry.path)
            print(f"frame rate is {fps} per second")

            print(f"number of frames: {len(os.listdir(output_dir))}")
            # upload fames to s3
            frames = upload_frames(output_dir, bucket, prefix, entry.path.split('/')[-1], fps)
            print(f"upload frames to s3 bucket...")

            # ingest the index file into opensearch
            sucess, failed = bulk_index_ingestion(host, index_name, frames)
            
    except OSError as e: 
        print(f"Could not read entry {entry.path}: {e}")

In [None]:
import os
import random
from IPython.display import display, Image

# Get a list of all image files in the folder
image_files = [f for f in os.listdir(output_dir) if f.endswith(('.jpg', '.png', '.gif'))]

# Display the sampled images
for image_file in image_files:
    image_path = os.path.join(output_dir, image_file)
    display(Image(filename=image_path))

### > Test Multi-modal text search

In [16]:
# build opensearch query
query = {
    "size": 10,
    "query":{
        "knn": {
        "multimodal_vector": {
            "vector": [],
            "k": 5
        }
        }
    },
    "_source": ["video", 
                "bucket", 
                "video_key" , 
                "frame_key", 
                "caption", 
                "timestamp", 
                "embedding_type"]

    }

In [17]:
from code_dir.helper import opensearch_query, render_search_result, get_embedding, encode_image_to_base64
from IPython.display import display, clear_output


query["query"]["knn"]["multimodal_vector"]["vector"] = get_embedding(text_description="Show me a race car driver")
        
results = opensearch_query(query,
                            host=host,
                           index_name=index_name)

display(render_search_result(results))

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:opensearch:POST https://1fk47z0pze7baugjoq9g.us-east-1.aoss.amazonaws.com:443/mm-search-2024-03-30-23--index/_search [status:200 request:0.218s]


HBox(children=(VBox(children=(Label(value='From video: RaceToTh\neCloud_444_PR_M_0406\n23-FINAL_264.mp4 at\nti…

### > Test MM mage Search

In [18]:
image_base64 = encode_image_to_base64("test-image.jpg")

query["query"]["knn"]["multimodal_vector"]["vector"] = get_embedding(image_base64=image_base64)
        
results = opensearch_query(query, 
                           host = host,
                           index_name=index_name)

display(render_search_result(results))

INFO:opensearch:POST https://1fk47z0pze7baugjoq9g.us-east-1.aoss.amazonaws.com:443/mm-search-2024-03-30-23--index/_search [status:200 request:0.141s]


HBox(children=(VBox(children=(Label(value='From video:\nwestiepoo.mov at\ntime: 0.00\nScore: 68.36%'), Image(v…

### > Interactive UI

Have fun.....!

Here are some sample prompts:

- cute dog running on grass
- flying squiral
- a bunny flying a kite
- Game of thrones

In [None]:
import ipywidgets as ipw
class ChatUX:
    """ A chat UX using IPWidgets
    """
    def __init__(self, qa):
        self.qa = qa
        self.name = None
        self.b=None
        self.out = ipw.Output()
        self.session_id = None

    def start_chat(self):
        print("Let's chat!")
        display(self.out)
        self.chat(None)

    def chat(self, _):
        if self.name is None:
            prompt = ""
        else:
            prompt = self.name.value
        if 'q' == prompt or 'quit' == prompt or 'Q' == prompt:
            print("Thank you , that was a nice chat !!")
            return
        elif len(prompt) > 0:
            with self.out:
                thinking = ipw.Label(value=f"Thinking...")
                display(thinking)
                try:
                    query["query"]["knn"]["multimodal_vector"]["vector"] = get_embedding(text_description=prompt)
                    results = opensearch_query(query,
                                               host=host,
                                               index_name=index_name)

                    output_results = render_search_result(results)
                except Exception as e:
                    print(e)
                    output_results = "No answer"

                thinking.value=""
                print("AI:")
                display(output_results)
                self.name.disabled = True
                self.b.disabled = True
                self.name = None

        if self.name is None:
            with self.out:
                self.name = ipw.Text(description="You: ", placeholder='q to quit')
                self.b = ipw.Button(description="Send")
                self.b.on_click(self.chat)
                display(ipw.Box(children=(self.name, self.b)))

In [None]:
agent_runtime_client = boto3.client('bedrock-runtime')

chat = ChatUX(agent_runtime_client)
chat.start_chat()