In [2]:
import os
import googleapiclient.discovery
import googleapiclient.errors
from dotenv import load_dotenv
from openai import OpenAI
import json
import base64
import os

load_dotenv()


OPENAI_API_KEY=os.environ["OPENAI_API_KEY"]
client = OpenAI(api_key=OPENAI_API_KEY)
model = "gpt-3.5-turbo-0125"


In [86]:

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def get_query_from_image(image_path):
  # Getting the base64 string
  base64_image = encode_image(image_path)
  response = client.chat.completions.create(
    model="gpt-4-vision-preview",
    messages=[
      {
        "role": "user",
        "content": [
          {"type": "text", "text": 'Given the image, come up with a youtube query to search for related videos. Respond in given json format: {"query": "surfing"}'},
          {
            "type": "image_url",
            "image_url": {
              "url": f"data:image/jpeg;base64,{base64_image}",
              # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
            },
          },
        ],
      }
    ],
    max_tokens=300,
  )
  query = json.loads(response.choices[0].message.content)['query']
  print("Query from image:", query)
  return query
  # return json.loads(response.choices[0]['content'])["query"]


def get_tool_get_query_from_image():
    return {
        "type": "function",
        "function": {
            "name": "get_query_from_image",
            "description": "Parse input image and get search query for youtube api.",
            "parameters": {
                "type": "object",
                "properties": {
                    "image_path": {
                        "type": "string",
                        "description": "Path to the image to parse.",
                    },
                },
                "required": ["image_path"],
            },
        },
    }


In [87]:
def youtube_search(query):
    api_service_name = "youtube"
    api_version = "v3"
    api_key = os.environ["YOUTUBE_API_KEY"]

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey=api_key
    )

    # enable safe search
    request = youtube.search().list(
        part="snippet", maxResults=5, q=query, safeSearch="strict"
    )
    response = request.execute()
    video_links = [f"https://www.youtube.com/watch?v={item["id"]["videoId"]}" for item in response["items"]]
    print("Video links:", video_links)
    return video_links

def get_tool_youtube_search():
    return {
        "type": "function",
        "function": {
            "name": "youtube_search",
            "description": "Searches for relevant youtube videos based on the query.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query to search for.",
                    },
                },
                "required": ["query"],
            },
        },
    }

In [88]:

def run_conversation(query, image_path):
    user_message_content = query
    if image_path:
       user_message_content += "\n Image: " + image_path
    messages = [
        {"role": "system", "content": "You are a tutor. You are helping a student with their queries. You have access to the following tools: youtube_search, get_query_from_image. Use them to provide helpful youtube videos."},
        {"role": "user", "content": user_message_content}
    ]
    tools = [get_tool_youtube_search(), get_tool_get_query_from_image()]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools,
        tool_choice="auto",
    )
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls
    available_functions = {
        "youtube_search": youtube_search,
    }
    if tool_calls:
        while tool_calls:
            messages.append(response_message)
            for tool_call in tool_calls:
                function_name = tool_call.function.name
                function_to_call = available_functions[function_name]
                function_args = json.loads(tool_call.function.arguments)
                function_response = function_to_call(
                    **function_args
                )
                messages.append(
                    {
                        "tool_call_id": tool_call.id,
                        "role": "tool",
                        "name": function_name,
                        "content": function_response,
                    }
                )
            second_response = client.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=messages,
            )
            response_message = second_response.choices[0].message
            tool_calls = response_message.tool_calls
    return response_message.content



In [89]:
run_conversation("How to create this pasta?", "/Users/shravanchaudhary/Documents/projects/personal/llm-tutor/pasta.jpg")

Query from image: penne pasta recipe


'I found a video that can help you create a Penne Pasta recipe similar to the image you shared. Here is the video: [Penne Pasta Recipe](https://www.youtube.com/watch?v=z40Cbkeg9eI) \n\nThis recipe should help you recreate a delicious plate of Penne Pasta. Let me know if you have any more questions!'

In [91]:
print(run_conversation("How to create this pasta?"))

Sure, I'd be happy to help! Could you provide me with the name or any specific details about the pasta dish you're interested in creating?


In [93]:
print(run_conversation("This: ", image_path="/Users/shravanchaudhary/Documents/projects/personal/llm-tutor/pasta.jpg", conversation_id=0))

Query from image: penne pasta recipe
Video links: ['https://www.youtube.com/watch?v=vxOzUCYJQ8M']
Here is a video that demonstrates a penne pasta recipe: [Click here to watch the video](https://www.youtube.com/watch?v=vxOzUCYJQ8M)


In [6]:
x = run_conversation()

In [7]:
x

[{'role': 'user',
  'content': "What's the weather like in San Francisco, Tokyo, and Paris?"},
 ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_hPMRR3wKZhM7jCBfI2i1WVIr', function=Function(arguments='{"location": "San Francisco"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_2KVX3hUYWxympU3woA3vok0b', function=Function(arguments='{"location": "Tokyo"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_wfd5RbTOAzCEXYnbgwrZl4jb', function=Function(arguments='{"location": "Paris"}', name='get_current_weather'), type='function')]),
 {'tool_call_id': 'call_hPMRR3wKZhM7jCBfI2i1WVIr',
  'role': 'tool',
  'name': 'get_current_weather',
  'content': '{"location": "San Francisco", "temperature": "72", "unit": null}'},
 {'tool_call_id': 'call_2KVX3hUYWxympU3woA3vok0b',
  'role': 'tool',
  'name': 'get_current_weather',
  'content': '{"location