In [1]:
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from dotenv import load_dotenv
import os
from ast import literal_eval
import json
from tqdm import tqdm

from yt_info.yt_video_data import get_channel_videos, get_video_details
from yt_rag.agent import llm

load_dotenv()

OLLAMA_URL = os.getenv("LOCAL_OLLAMA_URL")
CHANNELS = literal_eval(os.getenv("YT_CHANNELS"))
YT_API_KEY = os.getenv("YT_API_KEY")

ollama_client = OpenAI(base_url=OLLAMA_URL, api_key="ollama")
embedding_model = SentenceTransformer("multi-qa-distilbert-cos-v1")

  from tqdm.autonotebook import tqdm, trange


In [13]:
video = get_video_details("ZSL_Q6Pe-Ao", YT_API_KEY)

HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/videos?part=snippet&id=ZSL_Q6Pe-Ao&key=AIzaSyDJz56PKlVUKwsIAlr9awrFkMeeUWZ029g&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [15]:
video

Video(title="Every Cooking Gadget You'll Ever Need", video_id='ZSL_Q6Pe-Ao', description='This is a distillation of decades of cooking and restaurant experience into one cooking tool and gadget list that every single kitchen in the world will benefit from.\n\nEverything I used in this video: https://bit.ly/JWCookingGadgets\n\nMy Spatula (Supports No Kid Hungry!):  https://bit.ly/JWspatula\n\nGet My Cookbook: https://bit.ly/TextureOverTaste\n\nAdditional Cookbook Options (other stores, international, etc.): https://bit.ly/WeissmanCookbook\n\nThe Ultimate Guide to Picking The Perfect Kitchen Knife: https://bit.ly/JWKnifeGuide\n\nFOLLOW ME:\nInstagram: https://www.instagram.com/joshuaweissman\nTik Tok: https://www.tiktok.com/@flakeysalt\nTwitter: https://twitter.com/therealweissman\nFacebook: https://www.facebook.com/thejoshuaweissman\nSubreddit: https://www.reddit.com/r/JoshuaWeissman/\n---------------------------------------------------------------', is_short=False)

In [None]:
videos = get_channel_videos(
    channel_id=CHANNELS["Joshua Weissman"], api_key=YT_API_KEY
)

In [None]:
prompt_template = """
You emulate a user of our cooking and recipe assistant application.
Formulate 5 questions this user might ask based on a provided video.
Make the questions specific to this video.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

title: {title}
description: {description}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [None]:
prompt = prompt_template.format(**videos[0].__dict__)


In [None]:
a = llm(prompt, client=ollama_client)

In [None]:
a

In [None]:
def generate_questions(video):
    prompt = prompt_template.format(**video.__dict__)

    response = llm(prompt, client=ollama_client)

    json_response = response.choices[0].message.content
    return json_response

In [None]:
results = {}

In [None]:
for video in tqdm(videos): 
    video_id = video.video_id
    if video_id in results:
        continue

    questions_raw = generate_questions(video)
    questions = json.loads(questions_raw)
    results[video_id] = questions['questions']