In [1]:
import os
from dotenv import load_dotenv
load_dotenv(".env")

openai_api_base = os.getenv("OPENAI_API_BASE")
openai_api_key = os.getenv("OPENAI_API_KEY")
openai_model_id = os.getenv("OPENAI_MODEL_ID")

In [2]:
from langchain.llms import OpenAI

llm = OpenAI(
    openai_api_key = openai_api_key,
    api_base = openai_api_base,
    model_name = openai_model_id,
    temperature = 0.1
)

  llm = OpenAI(
                api_base was transferred to model_kwargs.
                Please confirm that api_base is what you intended.
  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
from langchain_core.prompts import ChatPromptTemplate

with open("./data/prompts/extract_features.txt", "r") as f:
    # Create a LangChain PromptTemplate
    extract_features_prompt = ChatPromptTemplate([
        ("system", "You are a helpful assistant"),
        ("user",  f.read())
])


In [4]:
from langchain_core.output_parsers import JsonOutputParser
parser = JsonOutputParser()

query = "I want juicy fried chicken near world trade centers around 25$"
chain = extract_features_prompt | llm | parser

params = chain.invoke({"query": query})
params 

{'location': 'world trade centers',
 'price_range': 25.0,
 'taste': 'juicy fried chicken'}

In [9]:
import logging
logger = logging.getLogger(__name__)


import time
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import urllib.parse

def get_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=chrome_options)     
    return driver 

def get_coordinates(location, max_retries = 3):
    retries = 0
    while retries < max_retries:
        try:
            driver = get_driver()
            pattern = r'@(-?\d+\.\d+),(-?\d+\.\d+)'
            driver.get(f'https://www.google.com/maps/place/{location.replace(" ","+")}')
            time.sleep(4)
            current_url = driver.current_url
            matches = re.search(pattern, current_url)
            if matches:
                latitude = matches.group(1)
                longitude = matches.group(2)
                result = [latitude, longitude]
                retries = max_retries
            else:
                raise ValueError
        except:
            retries += 1
            if retries == max_retries:
                logger.error(f"Scrape coordinates for {location} failed after retrying {max_retries} times")
        finally:
            driver.quit()                
            time.sleep(1)
    return result


In [12]:
location = get_coordinates(params['location'])
location

['-6.1833216', '106.8138496']

In [1]:
from application.embeddings import StellaEmbeddingModel
emb_model = StellaEmbeddingModel()
emb_model.download_model()
emb_model.load_model()
emb_model

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at .\data\models\dunzhang--stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


FileNotFoundError: [Errno 2] No such file or directory: '.\\data\\models\\dunzhang--stella_en_400M_v5\\2_Dense_512\\pytorch_model.bin'

In [None]:
from langchain.vectorstores import Neo4j
from langchain.embeddings import OpenAIEmbeddings

neo4j_username = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")
neo4j_host = os.getenv("NEO4J_HOST")

# Custom retriever to include filtering logic based on location, price, and taste
class CustomRestaurantRetriever:
    def __init__(self):
        pass
    
    def query_menus(self, input_lat, input_long, input_price, input_distance, input_embeddings):
        graph_db = Neo4j(url = neo4j_host, auth = (neo4j_username, neo4j_password))
        
        """Query the menus based on price, distance, and embedding similarity."""
        result = graph_db.run(
            "MATCH (m:Menu)-[:OFFERS]->(r:Restaurant) "
            "WHERE m.price < $input_price AND "
            "distance(point(r), point({latitude: $input_lat, longitude: $input_long})) < $input_distance "
            "RETURN m",
            input_price=input_price,
            input_lat=input_lat,
            input_long=input_long,
            input_distance=input_distance
        )

        # Filter by cosine similarity
        filtered_menus = []
        for record in result:
            menu = record["m"]
            menu_embedding = menu["embedding"]
            similarity = self.get_cosine_similarity(menu_embedding, input_embeddings)
            if similarity > 0.7:
                filtered_menus.append(menu)

        return filtered_menus