In [None]:
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from dotenv import load_dotenv
import os
from ast import literal_eval
import json
from tqdm import tqdm
import pickle

from yt_rag.agent import llm


load_dotenv()

OLLAMA_URL = os.getenv("LOCAL_OLLAMA_URL")

ollama_client = OpenAI(base_url=OLLAMA_URL, api_key="ollama")

In [None]:
with open('../data/yt_videos_details.pkl', 'rb') as f:
    videos = pickle.load(f)


In [None]:
prompt_llm = """
You are a user of a cooking and recipe assistant app.
Generate 5 general questions related to the cooking techniques, ingredients, or recipes discussed in the provided video.
The questions should be clear, concise, and relevant to the video's content.
Avoid being overly specific about particular ingredients or using too many details from the title and description.
Do not mention or reference the video title, description, or any URLs.
Focus on broader cooking themes that would be of interest to a home cook.

The video information:

title: {title}
description: {description}

Output the result as a JSON object without using code blocks:

{{"questions": ["question1", "question2", "question3", "question4", "question5"]}}
""".strip()

In [None]:
def generate_questions(video):
    prompt = prompt_llm.format(**video.__dict__)

    response = llm(prompt, client=ollama_client)

    return response

In [None]:
results = {}
failed_video_ids = {}

In [None]:
for video in tqdm(videos): 
    if video.video_id in results:
        continue

    try:
        questions_raw = generate_questions(video)
    except Exception as err:
        print(f"Failed {video.video_id}", err)
        continue


    try:
        questions = json.loads(questions_raw.replace("json", "").replace("`", "").strip())
    except json.JSONDecodeError:
        # print(f"JSON fail for {video.video_id}")
        failed_video_ids[video.video_id] = questions_raw
        continue        
    except Exception as err:
        # print(f"Failed {video.video_id}", err)
        failed_video_ids[video.video_id] = questions_raw
        continue
    else:
        
        try:
            results[video.video_id] = questions['questions']
        except KeyError:
            failed_video_ids[video.video_id] = questions_raw
            continue
        