In [90]:
# !pip install duckduckgo-search
# !pip install youtube-search
# !pip install youtube-transcript-api
# !pip install pytube
# !pip install opencv-python
# !pip install youtube-transcript-api
# !pip install google-generativeai
# !pip install streamlit

In [2]:
from duckduckgo_search import DDGS
from skimage import io
import matplotlib.pyplot as plt
from PIL import Image

def get_image_of_dish(recipe_name):
    '''this function returns a list of image urls'''
    image_urls = []
    with DDGS() as ddgs:
        keywords = recipe_name
        ddgs_images_gen = ddgs.images(
        keywords,region="wt-wt",safesearch="off",size=None,type_image=None,layout=None,license_image=None,max_results=10)
        image_urls.extend(r['image'] for r in ddgs_images_gen)
    return image_urls

def display_image(image_url):
    '''this function displays the image possible from all the urls'''
    try:
        a = io.imread(image_url)
        plt.imshow(a)
        plt.axis('off')
        plt.show()
    except Exception as e:
        print(f"Could not load image {e}")

In [None]:
# from pytube import YouTube
# import cv2
# import subprocess
# import numpy as np
# import json
# import math
# import pdb

# from youtube_transcript_api import YouTubeTranscriptApi

In [3]:
from youtube_search import YoutubeSearch
from youtube_transcript_api import YouTubeTranscriptApi

def get_eng_recipe(recipe_name, min_duration = 5):
    '''this function uses transcript api to find recipes with eng subtitles and list them,
    and returns a list of dictionaries with video details & transcript'''
    eng_recipe = []
    results = YoutubeSearch(recipe_name, max_results=100).to_dict()
    for result in results:
        # checking if the duration > min duration
        if float(result['duration'].split(':')[0]) >= min_duration: 
            try:
                transcript_list = YouTubeTranscriptApi.list_transcripts(result['id'])
                for transcript in transcript_list:
                    # check if the lang is english
                    if 'english' in transcript.language.lower():
                        result['transcript'] = YouTubeTranscriptApi.get_transcript(result['id'], ['en', 'en-GB', 'en-US'])
                        eng_recipe.append(result)
                    if len(eng_recipe) == 10:
                        break
            except:
                None
    return eng_recipe

In [4]:
import pathlib
import textwrap
from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
  text = text.replace('â€¢', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [5]:
import google.generativeai as genai
gemini_api_key = 'AIzaSyD_KjRhWvNQ5EBoFvEQ54Bo16mRMZ-Yfa0'
genai.configure(api_key=gemini_api_key)

def get_gemini_response(input, prompt):
    generation_config = {
        "temperature": 0.9,
        "top_p": 1,
        "top_k": 1,
        "max_output_tokens": 2048,
    }
    model = genai.GenerativeModel(model_name="gemini-1.0-pro",
                                    generation_config=generation_config)

    response = model.generate_content([input, prompt])
    return response.text

In [23]:
import json
import re

def extract_recipe_details(recipe_json):
    # Remove any non-JSON characters (e.g., backticks)
    recipe_json = recipe_json.strip("`").replace('```', '')

    # Remove the word "JSON" (case-insensitive) along with surrounding spaces or newline characters
    recipe_json = re.sub(r'\s*JSON\s*\n?', '', recipe_json, flags=re.IGNORECASE)

    # Remove multiple consecutive spaces
    recipe_json = re.sub(r'\s+', ' ', recipe_json)

    # Load the JSON string into a Python dictionary
    try:
        recipe_dict = json.loads(recipe_json)
    except json.JSONDecodeError:
        print("Error decoding JSON. Please check the input format.")
        return {}, {}, "", []

    # Extract the details
    steps = recipe_dict.get("Step-by-step instructions", {})
    ingredients = recipe_dict.get("Ingredients", {})
    servings = recipe_dict.get("Servings", "")
    utensils = recipe_dict.get("Utensils", [])

    # Ensure utensils is a list
    if isinstance(utensils, (set, dict)):
        utensils = list(utensils)

    return steps, ingredients, servings, utensils

In [31]:

def create_prompt_for_recipe(transcript, recipe_to_cook):
    
    response_object = {
    "Step-by-step instructions": {
        "Step 1": "Step 1 goes here",
        "Step 2": "Step 2 goes here"
        # Add more steps as needed
    },
    "Ingredients": {
        "Ingredient 1": "Quantity of ingredient 1",
        "Ingredient 2": "Quantity of ingredient 2"
        # Add more ingredients as needed
    },
    "Servings": "number of people in integer for which the recipe is for",
    "Utensils": ["utensil 1", "utensil 2", "add more utensils to the list as needed"]
    # Add more utensils to the list as needed
    }

    model_role = "You are a large language model trained to extract step-by-step cooking instructions from a transcript of a YouTube cooking video. Your task is to identify and list all the steps involved in cooking the dish described in the transcript. Each step should be clear, concise, and in the order they appear in the video. If there are any specific measurements, cooking times, or temperatures mentioned, include those details in the corresponding steps."

    prompt =  f"""Transcript: {transcript}
    Based on the transcript above of the {recipe_to_cook} recipe, extract the step-by-step cooking instructions for the dish. List each step clearly and concisely, including any specific measurements, cooking times, or temperatures mentioned. Ensure the steps are in the order they appear in the video. Also list down ingredients, servings and utensils used.
    Give the output only in JSON format as shown below:
    {json.dumps(response_object)}
    """
    
    return model_role, prompt

In [36]:
recipe_to_cook = 'aloe vera hair mask'
eng_recipe = get_eng_recipe(recipe_to_cook)
index_of_recipe_chosen = 1 # int value selected by user
transcript = eng_recipe[index_of_recipe_chosen]['transcript'] # transcript with time stamps
transcript_list = [i['text'] for i in transcript] # has individual element from the transcript
transcript_combined = "".join(f"{i} " for i in transcript_list) # has transcript in the form of a single combined para
model_role, prompt = create_prompt_for_recipe(transcript, recipe_to_cook)
gemini_output = get_gemini_response(model_role, prompt)
steps, ingredients, servings, utensils = extract_recipe_details(gemini_output) 

In [83]:

def create_prompt_for_recipe(transcript, steps, recipe_to_cook):
    response_object_step_time = {
    "Step-by-step timestamps": {
        "Step 1": 10.2,
        "Step 2": 15.3,
        "Step 3": 'and so on....'
        # Add more steps as needed
    }
    }
    model_role_time = "You are a large language model trained to extract precise timestamps for each step in a cooking process from a transcript of a YouTube cooking video. Your task is to analyze the transcript and identify the exact moment each step begins. Pay attention to cues in the language that indicate the start of a new step, such as changes in activity or the introduction of new ingredients. Provide the start time of each step in a clear and concise format."

    prompt =  f"""Transcript: {transcript}  
    Steps from the transcript: {steps}
    Based on the transcript above of the {recipe_to_cook} recipe and the steps extracted, determine the starting time of each step in the video. The start time should be in the format of "HH:MM:SS" or "MM:SS" depending on the length of the video. Focus on identifying the exact moment when each step begins, considering any verbal cues or changes in activity.
    Provide the output in JSON format as shown below:
    {json.dumps(response_object_step_time, indent=4)}
    """
    return model_role_time, prompt

In [84]:
import json
import re

def extract_timestamps(recipe_json):
    recipe_json = recipe_json.strip("`").replace('```', '')
    recipe_json = re.sub(r'\s*JSON\s*\n?', '', recipe_json, flags=re.IGNORECASE)
    recipe_json = re.sub(r'\s+', ' ', recipe_json)
    try:
        recipe_dict = json.loads(recipe_json)
    except json.JSONDecodeError:
        print("Error decoding JSON. Please check the input format.")
        return {}
    return recipe_dict.get("Step-by-step timestamps", {})

In [85]:
model_role_time, time_stamp_prompt = create_prompt_for_timestamps(transcript, steps, recipe_to_cook)
time_gemini_output = get_gemini_response(model_role_time, time_stamp_prompt)
timestamps = extract_timestamps(time_gemini_output)

In [87]:
print(timestamps)

{'Step 1': '22.72', 'Step 2': '28.24', 'Step 3': '45.6', 'Step 4': '51.92', 'Step 5': '56.64', 'Step 6': '69.12', 'Step 7': '74.96', 'Step 8': '149.12', 'Step 9': '181.76', 'Step 10': '194.56', 'Step 11': '200.08', 'Step 12': '207.6', 'Step 13': '225.2', 'Step 14': '284.16', 'Step 15': '296.56', 'Step 16': '310.24', 'Step 17': '332.08', 'Step 18': '350.56', 'Step 19': '357.52', 'Step 20': '390.24'}


In [91]:
steps

{'Step 1': 'Take a few minutes to extract the aloe vera gel before hopping in the shower.',
 'Step 2': 'Use a fresh aloe vera leaf, which is the best way to use it on the hair.',
 'Step 3': 'Cut the aloe leaf into square pieces.',
 'Step 4': 'Carefully remove the spiky size of each piece to allow the latex to come out.',
 'Step 5': 'Soak the aloe pieces in water for about 20 minutes to drain the latex from the gel.',
 'Step 6': 'Cut the aloe pieces right there in the middle and then using a spoon scrape the jelly part.',
 'Step 7': "Scoop the gooey slime gel from the yellow which is what you're gonna apply on the hair.",
 'Step 8': 'Before applying the aloe gel, shampoo your hair for three minutes going in with your fingertips first and crossing motion, and then with your scalp exfoliator.',
 'Step 9': 'Rinse off the shampoo with warm water for one minute.',
 'Step 10': 'Remove the excess water out of the hair.',
 'Step 11': 'Scoop with your hands the gooey stuff again.',
 'Step 12': '

In [94]:
eng_recipe[0].keys()

dict_keys(['id', 'thumbnails', 'title', 'long_desc', 'channel', 'duration', 'views', 'publish_time', 'url_suffix', 'transcript'])

In [None]:
import streamlit as st

def main():
    st.title("Recipe Helper")

    # Base URL of the YouTube video
    base_youtube_video_url = "https://www.youtube.com/embed/pfinDLGRG8Y"
    timestamps = [0, 60, 120, 180]  # Example timestamps: 0s, 1min, 2min, 3min

    # Create buttons for each timestamp
    for timestamp in timestamps:
        if st.button(f"Start video at {timestamp} seconds"):
            # Update the video URL to start at the selected timestamp and autoplay
            youtube_video_url = f"{base_youtube_video_url}?start={timestamp}&autoplay=1"

            # Embed the YouTube video using HTML iframe
            st.components.v1.html(
                f'<iframe width="560" height="315" src="{youtube_video_url}" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>',
                height=315,
            )

if __name__ == "__main__":
    main()

