## Import and setup Word2Vec model

In [1]:
!pip install gensim -q
!pip install --upgrade gensim -q

[K     |████████████████████████████████| 24.1 MB 1.4 MB/s 
[?25h

In [2]:
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import urllib.parse as p
import re
import os
import pickle
import numpy as np
import gensim.downloader as gensim_api
wordvec = gensim_api.load('word2vec-google-news-300')



## Local Variables

In [3]:
SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]
DEVELOPER_KEY = 'AIzaSyDe7ZtSqPYL-mtK3RD8Nj-NkxTUBe7OE9g'
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

## Setup Youtube Svc Object

In [4]:
#INIT YOUTUBE SERVICE
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

## All the helper functions

In [5]:
def get_video_details(youtube, **kwargs):
    return youtube.videos().list(
        part="snippet,contentDetails,statistics",
        **kwargs
    ).execute()

In [6]:
def get_video_title_descr(video_response):
    items = video_response.get("items")[0]
    snippet = items["snippet"]
    title = snippet["title"]
    description = snippet["description"]
    return title, description

In [7]:
def search(youtube, **kwargs):
    return youtube.search().list(
        part="snippet",
        **kwargs
    ).execute()

In [8]:
def np_cosine_sim(a, b):
  return np.dot(a, b)/(np.linalg.norm(a)*np.linalg.norm(b))

In [32]:
def get_mean_embedding(model, sentence):
  words = sentence.split()
  words = [word for word in words if word in model]
  if len(words) >= 1:
    return np.mean(model[words],axis=0)
  else:
    return np.zeros(300,)

## Main function to use

In [37]:
def leetcode_title_to_yt_video(query_string):
  response = search(youtube, q=query_string, maxResults=10)
  items = response.get("items")
  query_embedding = get_mean_embedding(wordvec, query_string)

  cosine_to_ytid =dict()
  for item in items:
    video_id = item["id"]["videoId"]
    video_response = get_video_details(youtube, id=video_id)
    title, description = get_video_title_descr(video_response)
    total_str = title + " " + description
    title_embedding = get_mean_embedding(wordvec, title)
    description_embedding = get_mean_embedding(wordvec, description)
    title_sim = np_cosine_sim(query_embedding, title_embedding)
    descr_sim = np_cosine_sim(query_embedding, description_embedding)
    weighted_cosine_sim = (0.75*title_sim) + (0.25*descr_sim)
    cosine_to_ytid[video_id ] = weighted_cosine_sim

  max_value = max(cosine_to_ytid, key=cosine_to_ytid.get)
  return(f"https://www.youtube.com/watch?v={max_value}")

## Example

In [38]:
query_string = "37. Sudoku Solver Array Backtracking Matrix" # This string is supposed to be a string of the title of the LC problem + all tags
youtube_video_url = leetcode_title_to_yt_video(query_string)
print(youtube_video_url)

  


https://www.youtube.com/watch?v=eqUwSA0xI-s
