Mount the drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Get the files from the drive

In [2]:
import json
import os

# the 50 videos in the shared text / video set
set_50 = []

directory_55 = '/content/drive/MyDrive/Project_Dataset_MSVD/ten_class_test_set'
directory = os.fsencode(directory_55)

for file in os.listdir(directory):
     filename = os.fsdecode(file)
     filename = filename.replace('.mp4', '')
     set_50.append(filename)

def read_jsonl(f):
  res = []
  for line in f:
    res.append(json.loads(line))
  return res

base_path = '/content/drive/MyDrive/Project_Dataset_MSVD/'

train_labels_directory = base_path + 'train_annotations/train_annotations.json'
train_labels_file = open(train_labels_directory, "r")

train_labels_json = read_jsonl(train_labels_file)
train_labels_videos = train_labels_json[0]['videos']
train_labels_captions = train_labels_json[0]['sentences']

# 20 labels per video

train_dict = dict()
train_dict_50 = dict()

for video in train_labels_videos:
  video_id = video['video_id']
  train_dict[video_id] = ''
  # only adding videos in the 50 set
  if video_id in set_50:
    train_dict_50[video_id] = ''

for video_caption in train_labels_captions:
  video_id = video_caption['video_id']
  caption = video_caption['caption']
  train_dict[video_id] = train_dict.get(video_id) + ' ' + caption
  # only adding videos in the 50 set
  if video_id in set_50:
    train_dict_50[video_id] = train_dict.get(video_id) + ' ' + caption

Make the dataset a dictionary linking the video to a dictionary with the count of every word

In [3]:
from typing_extensions import Text
import string

train_collection_50 = dict()
train_collection_50_size = 0

def dataset_to_count_dictionary(given_dict):
  return_collection = dict()
  return_collection_size = 0

  for key, value in given_dict.items():
    txt = value.replace(' - ', ' ')
    txt = txt.split(' ')
    txt.remove('')

    count_dict = dict()
    doc_size = 0

    for token in txt:
      count_dict[token] = count_dict.get(token, 0) + 1
      return_collection[token] = return_collection.get(token, 0) + 1
      doc_size += 1
      return_collection_size += 1

    given_dict[key] = [doc_size, count_dict]

  return return_collection, return_collection_size


train_collection, train_collection_size = dataset_to_count_dictionary(train_dict)
train_collection_50, train_collection_size_50 = dataset_to_count_dictionary(train_dict_50)

Now we implement Naive Bayes Query Likelihood with Dirichlet smoothing

In [4]:
import math

# Returns a dictionary of video ids mapped to their 
# query likelihood scores for given query
# from the large dataset
def rank_by_query(query):
  mu = train_collection_size / len(train_dict)
  likelihood_dict = dict()
  query_tokens = query.split(' ')
  
  for key, value in train_dict.items():
    doc_word_count = value[0]
    doc_token_frequencies = value[1]
    doc_likelihood_value = 0
    for query_token in query_tokens:
      numerator = doc_token_frequencies.get(query_token, 1) + (mu * train_collection.get(query_token, 1) / train_collection_size)
      denominator = doc_word_count + mu
      query_probability_estimation = numerator / denominator
      log_query_probability_estimation = math.log(query_probability_estimation + 1)
      doc_likelihood_value += log_query_probability_estimation

    likelihood_dict[key] = doc_likelihood_value


  return likelihood_dict


# Returns a dictionary of video ids mapped to their 
# query likelihood scores for given query
# from the 50 video combined dataset
def rank_by_query_50(query):
  mu = train_collection_size_50 / len(train_dict_50)
  likelihood_dict = dict()
  query_tokens = query.split(' ')
  
  for key, value in train_dict_50.items():
    doc_word_count = value[0]
    doc_token_frequencies = value[1]
    doc_likelihood_value = 0
    for query_token in query_tokens:
      numerator = doc_token_frequencies.get(query_token, 1) + (mu * train_collection_50.get(query_token, 1) / train_collection_size_50)
      denominator = doc_word_count + mu
      query_probability_estimation = numerator / denominator
      log_query_probability_estimation = math.log(query_probability_estimation + 1)
      doc_likelihood_value += log_query_probability_estimation

    likelihood_dict[key] = doc_likelihood_value


  return likelihood_dict

Create a function that for a query returns the 10 video_ids with the highest query likelihood scores

In [5]:
def return_top_query(query):
  likelihood_dict = rank_by_query(query)
  sorted_list = sorted(likelihood_dict.items(), key=lambda x:x[1], reverse=True)
  print('For query: \"', query, '\" the top 10 videos are:')
  for i in range(10):
    print(str(i + 1) + ': ' + str(sorted_list[i][0]) + ' with score:' + str(sorted_list[i][1]))
  print()

def return_top_query_50(query):
  likelihood_dict = rank_by_query_50(query)
  sorted_list = sorted(likelihood_dict.items(), key=lambda x:x[1], reverse=True)
  print('For query: \"', query, '\" the top 10 videos are:')
  for i in range(10):
    print(str(i + 1) + ': ' + str(sorted_list[i][0]) + ' with score:' + str(sorted_list[i][1]))
  print()

Rankings for the Combined Section

In [6]:
# video 189
video189_caption = 'car'
# video 539
video539_caption = 'natural land with mountain'
# video 153
video153_caption = 'cooking chicken'
# video 598
video598_caption = 'software on computer screen'
# video 185
video185_caption = 'women singing'

ranked_dict_video189 = rank_by_query_50(video189_caption)
ranked_dict_video539 = rank_by_query_50(video539_caption)
ranked_dict_video153 = rank_by_query_50(video153_caption)
ranked_dict_video598 = rank_by_query_50(video598_caption)
ranked_dict_video185 = rank_by_query_50(video185_caption)

combined_dict = {
    'video189': ranked_dict_video189,
    'video539': ranked_dict_video539,
    'video153': ranked_dict_video153,
    'video598': ranked_dict_video598,
    'video185': ranked_dict_video185,
}

with open("/content/drive/MyDrive/Project_Dataset_MSVD/query_results_model_text_250.json", 'w') as comb:
     comb.write(json.dumps(combined_dict))



The 5 queries for 50 video model

In [None]:
# 4 out of 10 are relevant
return_top_query_50('soldiers at war')
print()

# 6 out of 10 are relevant
return_top_query_50('car driving')
print()

# 10 out of 10 are relevant
return_top_query_50('people')
print()

# 9 out of 10 are relevant
return_top_query_50('music')
print()

# 4 out of 10 are relevant
return_top_query_50('mountain')
print()

For query: " soldiers at war " the top 10 videos are:
1: video329 with score:0.05991851901396783
2: video152 with score:0.04541888403857017
3: video243 with score:0.029810057330894382
4: video237 with score:0.026984816462516326
5: video402 with score:0.0230534135363573
6: video285 with score:0.021633367419074505
7: video325 with score:0.021574091929236257
8: video207 with score:0.0207733759722459
9: video154 with score:0.019383050027731923
10: video543 with score:0.0181054848070942


For query: " car driving " the top 10 videos are:
1: video251 with score:0.053912726780435405
2: video243 with score:0.05304219247087257
3: video250 with score:0.0367213136692524
4: video149 with score:0.022987770070295212
5: video259 with score:0.021118100667801176
6: video296 with score:0.018736015110691246
7: video325 with score:0.010612346859289799
8: video214 with score:0.010377317646692075
9: video496 with score:0.010344589185247333
10: video227 with score:0.0102476308405432


For query: " people " t

The 5 queries for 7010 video model

In [None]:
# 9 out of 10 are relevant
return_top_query('soldiers at war')
print()

# 10 out of 10 are relevant
return_top_query('car driving')
print()

# 10 out of 10 are relevant
return_top_query('people')
print()

# 9 out of 10 are relevant
return_top_query('music')
print()

# 10 out of 10 are relevant
return_top_query('mountain')
print()

For query: " soldiers at war " the top 10 videos are:
1: video329 with score:0.05783905668536443
2: video5136 with score:0.05779750766526294
3: video5818 with score:0.05570188615399504
4: video2642 with score:0.05531763398660794
5: video6045 with score:0.053389639215551454
6: video611 with score:0.05121364821343706
7: video692 with score:0.050620619999772166
8: video5639 with score:0.04874665526206627
9: video4870 with score:0.04746201644497107
10: video4290 with score:0.04664868504868611


For query: " car driving " the top 10 videos are:
1: video2381 with score:0.10551513331165109
2: video2032 with score:0.10164096347165652
3: video4292 with score:0.09868412217080427
4: video2236 with score:0.09683648184566418
5: video6432 with score:0.09412495832447357
6: video1082 with score:0.0931851837085185
7: video2556 with score:0.09233806696118196
8: video3666 with score:0.09164062533610784
9: video5733 with score:0.09039386677084957
10: video2947 with score:0.08847553923375878


For query: "