# Project Week 1: ActivityNet Video Data Preparation and Indexing

In this example we will use the ActivityNet dataset https://github.com/activitynet/ActivityNet. 

 - Select the 10 videos with more moments.
 - Download these videos onto your computer.
 - Extract the frames for every video.
 - Read the textual descriptions of each video.
 - Index the video data in OpenSearch.

 In this week, you will index the video data and make it searchable with OpenSearch. You should refer to the OpenSearch tutorial laboratory.

## Select videos
Download the `activity_net.v1-3.min.json` file containing the list of videos. The file is in the github repository of ActivityNet.
Parse this file and select the 10 videos with more moments.

In [4]:
import json
from pprint import pprint
import subprocess

with open('activity_net.v1-3.min.json', 'r') as json_data:
    data = json.load(json_data)
    video_annotations = [
    {
        "video_id": vid,
        "num_moments": len(details["annotations"]),
        "url": details["url"],
        "duration": details["duration"]
    }
    for vid, details in data["database"].items()
    ]
    top_videos = sorted(video_annotations, key=lambda x: x["num_moments"], reverse=True)[:11]
    
    print(top_videos)
    #for video in top_videos:
        #subprocess.run(["yt-dlp", "-f", "best", video['url']])



[{'video_id': 'o1WPnnvs00I', 'num_moments': 23, 'url': 'https://www.youtube.com/watch?v=o1WPnnvs00I', 'duration': 229.86}, {'video_id': 'oGwn4NUeoy8', 'num_moments': 23, 'url': 'https://www.youtube.com/watch?v=oGwn4NUeoy8', 'duration': 153.09}, {'video_id': 'VEDRmPt_-Ms', 'num_moments': 20, 'url': 'https://www.youtube.com/watch?v=VEDRmPt_-Ms', 'duration': 232.07999999999998}, {'video_id': 'qF3EbR8y8go', 'num_moments': 19, 'url': 'https://www.youtube.com/watch?v=qF3EbR8y8go', 'duration': 204.1}, {'video_id': 'DLJqhYP-C0k', 'num_moments': 18, 'url': 'https://www.youtube.com/watch?v=DLJqhYP-C0k', 'duration': 186.968}, {'video_id': 't6f_O8a4sSg', 'num_moments': 18, 'url': 'https://www.youtube.com/watch?v=t6f_O8a4sSg', 'duration': 218.52}, {'video_id': '6gyD-Mte2ZM', 'num_moments': 18, 'url': 'https://www.youtube.com/watch?v=6gyD-Mte2ZM', 'duration': 188.245}, {'video_id': 'jBvGvVw3R-Q', 'num_moments': 18, 'url': 'https://www.youtube.com/watch?v=jBvGvVw3R-Q', 'duration': 218.62}, {'video_id

In [5]:
with open('captions_dataset/train.json', 'r') as captions_data:
    captions = json.load(captions_data)

# Add sentences to top_videos
for video in top_videos:
    video_id = "v_" + video["video_id"]
    video["sentences"] = captions.get(video_id, {}).get("sentences", [])

print(top_videos)

[{'video_id': 'o1WPnnvs00I', 'num_moments': 23, 'url': 'https://www.youtube.com/watch?v=o1WPnnvs00I', 'duration': 229.86, 'sentences': ['A man is playing a flute in front of a microphone.', ' A few other men are shown playing guitars as they sit.', ' The group plays for the audience, occasionally zooming in on individuals.', ' One man is playing drums while the others are on flute and guitar.', ' The lights move fluidly as they crescendo, and they screen goes black.']}, {'video_id': 'oGwn4NUeoy8', 'num_moments': 23, 'url': 'https://www.youtube.com/watch?v=oGwn4NUeoy8', 'duration': 153.09, 'sentences': ['A small group of people are seen on a stage getting their instruments ready.', ' A woman begins playing the drums while another plays piano and the others watch.', ' The two continue to play their instruments and others on the side watch.']}, {'video_id': 'VEDRmPt_-Ms', 'num_moments': 20, 'url': 'https://www.youtube.com/watch?v=VEDRmPt_-Ms', 'duration': 232.07999999999998, 'sentences': 

## Video frame extraction

PyAV is a wrapper library providing you access to `ffmpeg`, a command-line video processing tool. In the example below, you will be able to extract frames from the a video shot.

In [6]:
import av
import av.datasets
import os

video_folder = "./video"
output_folder = "./frames"

for video in os.listdir(video_folder):
    # File name without extension
    filename = os.path.splitext(video)[0]
    if not os.path.isdir(output_folder + "/" + filename):
        os.makedirs(output_folder + "/" + filename)
    with av.open(video_folder + "/" + video) as container:
        stream = container.streams.video[0]
        # We want 1 frame per second
        fps = stream.average_rate
        interval = int(fps)
        #print(fps)
        cpt = 0
        for i,frame in enumerate(container.decode(stream)):
            if i % interval == 0:
                frame.to_image().save(output_folder+"/"+filename+"/"+str(cpt)+".jpg", quality=80)
                cpt += 1

## Video metadata

Process the video metadata provided in the `json` file and index the video data in OpenSearch.

In [12]:
#print(top_videos)

from opensearchpy import OpenSearch
import pprint as pp


host = 'api.novasearch.org'
port = 443

user = 'user01' # Add your user name here.
password = 'erasmus+2025' # Add your user password here. For testing only. Don't store credentials in code. 
index_name = user 

client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = (user, password),
    use_ssl = True,
    url_prefix = 'opensearch_v2',
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False
)

index_body = {
   "settings":{
      "index":{
         "number_of_replicas":0,
         "number_of_shards":4,
         "refresh_interval":"-1",
         "knn":"true"
      }
   },
   "mappings": {
    "properties": {
      "url": {
        "type": "text"
      },
      "num_moments": {
        "type": "integer"
      },
      "video_id": {
        "type": "keyword"
      },
      "CaptionText": {
        "type": "text"
      },
      "length": {
        "type": "float"
      }
    }
  }
}

if client.indices.exists(index=index_name):
    print("Index already existed. Nothing to be done.")
else:        
    response = client.indices.create(index_name, body=index_body)
    print('\nCreating index:')
    print(response)

Index already existed. Nothing to be done.


In [16]:
video_annotations = []
for video in top_videos:
    data = {
        "url": video['url'],
        "num_moments": video['num_moments'],
        "id":  video['video_id'],
        "captions":  video['sentences'],
        "length": video['duration']
    }
    video_annotations.append(data)


for video in video_annotations:
    print("Video: ",video)
    client.index(index=index_name, body=video)

Video:  {'url': 'https://www.youtube.com/watch?v=o1WPnnvs00I', 'num_moments': 23, 'id': 'o1WPnnvs00I', 'captions': ['A man is playing a flute in front of a microphone.', ' A few other men are shown playing guitars as they sit.', ' The group plays for the audience, occasionally zooming in on individuals.', ' One man is playing drums while the others are on flute and guitar.', ' The lights move fluidly as they crescendo, and they screen goes black.'], 'length': 229.86}
Video:  {'url': 'https://www.youtube.com/watch?v=oGwn4NUeoy8', 'num_moments': 23, 'id': 'oGwn4NUeoy8', 'captions': ['A small group of people are seen on a stage getting their instruments ready.', ' A woman begins playing the drums while another plays piano and the others watch.', ' The two continue to play their instruments and others on the side watch.'], 'length': 153.09}
Video:  {'url': 'https://www.youtube.com/watch?v=VEDRmPt_-Ms', 'num_moments': 20, 'id': 'VEDRmPt_-Ms', 'captions': ['women are in a gymnastics competit

In [15]:
search_query = {
    "query": {
        "match_all": {}  # Returns all documents
    },
    "size": 10  # Limit to 10 results
}

# Execute the search
response = client.search(
    body=search_query,
    index=user
)

# Print the results to verify data was indexed correctly
print(f"Found {response['hits']['total']['value']} documents")
for hit in response['hits']['hits']:
    print(f"Document ID: {hit['_id']}")
    print(f"Source: {hit['_source']}")
    print("-" * 50)

Found 0 documents


## Video captions

The ActivityNetCaptions dataset https://cs.stanford.edu/people/ranjaykrishna/densevid/ dataset provides a textual description of each videos. Index the video captions on a text field of your OpenSearch index.