In [1]:
# installs
!pip install pinnacledb
!pip install opencv-python
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /private/var/folders/jt/hrc4w0jj3fdcz0hfhg15fq0m0000gn/T/pip-req-build-n6zpftbz
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /private/var/folders/jt/hrc4w0jj3fdcz0hfhg15fq0m0000gn/T/pip-req-build-n6zpftbz
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [2]:
import cv2
import requests
import os
import numpy as np
from tqdm import tqdm

import pymongo

import pinnacledb
from pinnacledb import pinnacle
import glob
from PIL import Image
from pinnacledb.ext.pillow.image import pil_image as i
from pinnacledb.container.document import Document as D
from pinnacledb.db.mongodb.query import Collection
from pinnacledb.ext.torch.tensor import tensor
from pinnacledb.ext.torch.model import TorchModel
import torch

# Create a pinnacle`db` instance

In [3]:
import os

# Uncomment one of the following lines to use a bespoke MongoDB deployment
# For testing the default connection is to mongomock

mongodb_uri = os.getenv("MONGODB_URI","mongomock://test")
# mongodb_uri = "mongodb://localhost:27017"
# mongodb_uri = "mongodb://pinnacle:pinnacle@mongodb:27017/documents"
# mongodb_uri = "mongodb://<user>:<pass>@<mongo_cluster>/<database>"
# mongodb_uri = "mongodb+srv://<username>:<password>@<atlas_cluster>/<database>"

# Super-Duper your Database!
from pinnacledb import pinnacle
db = pinnacle(mongodb_uri)

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


# Sample video url json

In [45]:
url = 'https://github.com/SuperDuperDB/pinnacledb/assets/138251983/99f35f54-d4b0-40e6-a22d-41043d7bd384'

In [92]:
class Video2Images:
    def __init__(self, url):
        self.url = url

    def download_video(self, url, output_file):
        response = requests.get(url)
        if response.status_code == 200:
            with open(output_file, 'wb') as f:
                f.write(response.content)
        else:
            print(f"Failed to download video from {url}")
            return False
        return True

    def save_frames_from_video(self, video_file, output_folder, threshold=10, min_frame_interval=300):
        cap = cv2.VideoCapture(video_file)
        if not cap.isOpened():
            print("Error: Could not open video file.")
            return
    
        prev_frame = None
        frame_count = 0
    
        os.makedirs(output_folder, exist_ok=True)
        fps = cap.get(cv2.CAP_PROP_FPS)
    
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            current_timestamp = frame_count // fps
            all_zeros = not np.any(np.asarray(gray_frame))
            frame_count += 1
            if all_zeros:
                continue
            
            if prev_frame is not None:
                
                frame_diff = cv2.absdiff(gray_frame, prev_frame)
                mean_diff = np.mean(frame_diff)
                if mean_diff > threshold and frame_count > min_frame_interval:
                    frame_filename = f"{output_folder}/frame_{current_timestamp}.jpg"
                    cv2.imwrite(frame_filename, frame)
    
            
            prev_frame = gray_frame
    
        cap.release()
        cv2.destroyAllWindows()

    def __call__(self, path, name='landscape.mp4'):        
        video_path = os.path.join(path, name)
        self.download_video(url, video_path)
        
        frame_path = os.path.join(path, 'frames')
        self.save_frames_from_video(video_path, frame_path)
        return frame_path

# Download videos and extract frames

In [95]:
!mkdir videos
video2images = Video2Images(url)
frames_path = video2images('videos')

In [96]:
def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [97]:
frames_path = 'videos/frames'

## Save the frames in collection

In [99]:
frames = glob.glob(f'{frames_path}/*.jpg')

for images in tqdm(batch(frames, 100)):
    _batch = []
    for image in images:
        current_timestamp = os.path.split(image)[-1].split('.')[0].split('_')[-1]
        image = cv2.imread(image)
        
        doc = {'image':i(Image.fromarray(image[:,:,::-1])), 'current_timestamp': current_timestamp}
        _batch.append(D(doc))
    db.execute(Collection('video_frames').insert_many(_batch, encoders=[i], refresh=False))

1it [00:47, 47.04s/it]


# Create CLIP model

In [57]:
import clip

In [100]:
model, preprocess = clip.load("RN50", device='cpu')
t = tensor(torch.float, shape=(512,))

In [101]:
visual_model = TorchModel(
    identifier='clip_image',
    preprocess=preprocess,
    object=model.visual,
    encoder=t,
)
text_model = TorchModel(
    identifier='clip_text',
    object=model,
    preprocess=lambda x: clip.tokenize(x)[0],
    forward_method='encode_text',
    encoder=t
)

## Create VectorIndex with an indexing and compatible listener

In [102]:
from pinnacledb.container.vector_index import VectorIndex
from pinnacledb.container.listener import Listener
from pinnacledb.ext.openai.model import OpenAIEmbedding
from pinnacledb.db.mongodb.query import Collection

db.add(
    VectorIndex(
        identifier='VideoSearchIndex',
        indexing_listener=Listener(
            model=visual_model,
            key='image',
            select=Collection(name='video_frames').find(),
        ),
        compatible_listener=Listener(
            model=text_model,
            key='text',
            select=None,
            active=False
        )
    )
)

INFO:root:Adding model clip_image to db
INFO:root:Done.


#######1.1
TorchModel(identifier='clip_image', object=<Artifact artifact=ModifiedResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU(inplace=True)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU(inplace=True)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU(inplace=True)
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1):

75it [00:02, 27.15it/s]


###########XXX
[<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF051BD0790>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0914BBAF0>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0914BB280>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0914BB430>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0914BB4C0>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF01C05C160>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0A7E62220>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FEFF5C69610>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FEFF5C69400>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0A0C2E040>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FF0A0C2ED00>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1280x720 at 0x7FEFF4238F40>, 

100%|████████████████████████████████████████████████████| 75/75 [00:44<00:00,  1.68it/s]


###########outputs
[tensor([-0.0325, -0.0202,  0.0233,  ...,  0.0084, -0.0203,  0.0428]), tensor([-0.0051, -0.0202,  0.0352,  ...,  0.0167, -0.0065, -0.0068]), tensor([0.0692, 0.0115, 0.0058,  ..., 0.0519, 0.0278, 0.0108]), tensor([-0.0279,  0.0086, -0.0165,  ...,  0.0220,  0.0109,  0.0686]), tensor([-0.0040, -0.0226,  0.0029,  ..., -0.0376, -0.0153,  0.0273]), tensor([-0.0229,  0.0098,  0.0103,  ..., -0.0540, -0.0069,  0.0118]), tensor([-0.0282,  0.0093, -0.0458,  ...,  0.0229,  0.0358,  0.0531]), tensor([ 0.0596, -0.0220,  0.0006,  ...,  0.0774, -0.0010,  0.0149]), tensor([ 0.0378, -0.0002,  0.0132,  ...,  0.0725,  0.0244,  0.0331]), tensor([ 0.0028,  0.0080, -0.0077,  ..., -0.0146,  0.0166, -0.0742]), tensor([-0.0086,  0.0158, -0.0008,  ..., -0.0065,  0.0007, -0.0453]), tensor([ 0.0166, -0.0101,  0.0235,  ...,  0.0379,  0.0079,  0.0069]), tensor([ 0.0477,  0.0155, -0.0019,  ..., -0.0293,  0.0310, -0.0750]), tensor([-0.0543, -0.0306, -0.0666,  ..., -0.0223, -0.0108, -0.0091]), tensor

INFO:root:loading hashes: 'VideoSearchIndex'
Loading vectors into vector-table...: 75it [00:02, 33.06it/s]


[]

# Test vector search by quering a text against saved frames.
## We will get the timestamp of the resultant frame and start the video from this timestamp.

In [116]:
out = db.execute(
    Collection('video_frames').like(D({'text': 'cave'}), vector_index='VideoSearchIndex', n=1).find({})
)
result = [c for c in out]
search_timestamp = result[0]['current_timestamp']


#######1.1
TorchModel(identifier='clip_text', object=<Artifact artifact=CLIP(
  (visual): ModifiedResNet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=T

# Start the video from the resultant timestamp

In [123]:
from IPython.display import display, HTML
video_html = f"""
<video width="640" height="480" controls>
  <source src="./videos/landscape.mp4" type="video/mp4">
</video>
<script>
var video = document.querySelector('video');
video.currentTime = {search_timestamp};
video.play();
</script>
"""

display(HTML(video_html))