In [0]:
%pip install -U -q openai mlflow databricks-agents

In [0]:
%pip install databricks-genai

In [0]:
pip install databricks-genai-inference

In [0]:
%pip install geopy

In [0]:
%restart_python

In [0]:
### import 
import pandas as pd
import numpy as np
import mlflow
import json
import requests
import mlflow
from math import radians, sin, cos, sqrt, atan2
from geopy.geocoders import Nominatim
import requests

from openai import OpenAI
from databricks.sdk import WorkspaceClient

from ipywidgets import widgets

In [0]:
# allow tracing with mlflow
mlflow.openai.autolog()

# Set the MLflow tracking URI
mlflow.set_tracking_uri("databricks")

In [0]:
raw_data = spark.sql('''select `name`, address, category, lat, lon, open_hours_updated, open_website 
                        from workspace.default.brightint_sanfran_bus''').toPandas()
data = raw_data.copy(deep=True)

In [0]:
data.head()

## Get Just the live URLs

In [0]:
def check_url_live(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Check if the URL is live
    except requests.RequestException as e:
        return False
    return True

def check_urls_in_dataframe(df, url_column):
    df['url_live'] = df[url_column].apply(check_url_live)
    return df

# Assuming 'data' is your DataFrame and 'open_website' is the column with URLs
data_check = check_urls_in_dataframe(data, 'open_website')
display(data_check.url_live.value_counts(dropna=False))

In [0]:
data = data[data['url_live'] == True]

## Get stores with prices

In [0]:
def check_grocery_store_content(url):
    chat_completion = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "You are an AI assistant specialized in finding grocery store websites with deals and food prices."},
        {"role": "user", "content": f"Check if the following grocery store URL has food items and prices listed: {url}. if it does return True, otherwise return False"},
    ],
    model="databricks-meta-llama-3-3-70b-instruct",
    max_tokens=2048,
    )
    return chat_completion.choices[0].message.content.strip()

In [0]:
def check_urls_in_dataframe_price(df, url_column):
    df['has_food_and_prices'] = df[url_column].apply(check_grocery_store_content)
    return df

# Assuming 'data' is your DataFrame and 'open_website' is the column with URLs
data = check_urls_in_dataframe_price(data, 'open_website')

## Function Calls

In [0]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    return R * 2 * atan2(sqrt(a), sqrt(1 - a))

def find_closest_stores(user_lat, user_lon, data):
    distances = [(row['name'], row['open_website'], haversine(user_lat, user_lon, row['lat'], row['lon'])) for _, row in data.iterrows()]
    return sorted(distances, key=lambda x: x[2])[:2]

In [0]:
# Create a temporary token
w = WorkspaceClient()
tmp_token = w.tokens.create(lifetime_seconds=2400).token_value

client = OpenAI(
    api_key=tmp_token,
    base_url=f"{w.config.host}/serving-endpoints",
)

mlflow.openai.autolog()

def build_meal(url1, url2):
    # print('CALLING BUILD MEAL')
    # print(r, url2)
    url1 = str(url1)
    url2 = str(url2)
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are an AI assistant specialized in building affordable meal plans from a given list of food."},
            {"role": "user", "content": f"Using the inventory on these websites: {url1}, and {url2}, build a healthy meal based on five items that are discounted or available at a low price that could be used to build a healthy meal. Only provide the recipe including instructions and ingredients, and the total cost at each store. Put the ingredient list at the end in a table format that is easy to read while at the store."},
        ],
        model="databricks-meta-llama-3-3-70b-instruct",
        max_tokens=2048,
    )

## Tool Call

In [0]:
tools_list = [
    {
        "type": "function",
        "function": {
            "name": "find_closest_stores",
            "description": "Retrieves the closest grocery stores to the user and their websites. Use this tool when a user asks about nearby grocery stores.",
            "parameters": {
                "type": "object",
                "properties": {
                    "user_lat": {
                        "type": "string",
                        "description": "The user's latitude."
                    },
                    "user_lon": {
                        "type": "string",
                        "description": "The user's longitude."
                    }
                },
                "required": ["user_lat", "user_lon"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "build_meal",
            "description": "Gets information from multiple grocery store websites to build a meal. Use this tool when a user wants to create a meal.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url1": {
                        "type": "string",
                        "description": "The URL of the website."
                    }, 
                    "url2": {
                        "type": "string",
                        "description": "The URL of the website."
                    }
                },
                "required": ["url1", "url2"]
            }
        }
    }
]

In [0]:
def run_tool_loop(user_query, user_lat, user_lon):
    # Initialize messages ONCE with system and initial user query
    messages = [
        {"role": "system", "content": f"You are an AI assistant."},
        {"role": "user", "content": user_query},
    ]
    with mlflow.start_span(span_type="CHAIN") as span:
        break_condition = False
        while True:
            response = client.chat.completions.create(
                messages=messages,
                model="databricks-meta-llama-3-3-70b-instruct",  # Your specified model
                max_tokens=256,
                tools=tools_list,
                tool_choice="auto",
            )
            assistant_message = response.choices[0].message
            # print('here', assistant_message)
            messages.append(assistant_message)  # Add assistant's full response
            
            if not assistant_message.tool_calls:
                break  # No tool call, LLM gave final text answer

            # Ensure both functions are called
            tool_results = []
            for tool_call in assistant_message.tool_calls:
                # print(tool_call)
                try:
                    arguments = json.loads(tool_call.function.arguments)
                    # print(arguments)
                    if tool_call.function.name == "find_closest_stores":
                        tool_result_str = str(find_closest_stores(float(user_lat), float(user_lon), data.iloc[[0, 2], :]))
                    elif tool_call.function.name == "build_meal":
                        tool_result_str = str(build_meal(str(arguments["url1"]), str(arguments["url2"])))
                        break_condition = True
                    else:
                        tool_result_str = json.dumps({"error": "Unknown tool function"})
                    tool_results.append(tool_result_str)
                except Exception as e:  # Catch-all for any error during arg parsing or tool execution
                    tool_result_str = json.dumps({"error": f"Tool execution failed: {str(e)}"})
                    tool_results.append(tool_result_str)

            for tool_result_str in tool_results:
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,  # Essential
                    "name": tool_call.function.name,  # Essential
                    "content": tool_result_str,  # Must be a string
                })
            if break_condition:
                break

            # Loop continues: sends messages including tool result back to LLM for summarization

    return assistant_message.content

In [0]:
def get_location():
    # Get the public IP address
    ip_address = requests.get('https://api.ipify.org').text
    # Get the location based on the IP address
    response = requests.get(f'https://ipapi.co/{ip_address}/json/').json()
    latitude = response.get('latitude')
    longitude = response.get('longitude')
    return latitude, longitude

latitude, longitude = get_location()
print(f"Latitude: {latitude}, Longitude: {longitude}")

In [0]:
# Example call to run_tool_loop
user_query = "Find the closest grocery stores and build a meal for someone with diabetes."
# user_query = "Find the closest grocery stores and build a meal for someone with a nut allergy."

response = run_tool_loop(user_query, latitude, longitude)
print(response)