In [89]:
import torch
import re
import nltk
import json
import google.generativeai as genai
import os

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer

# Local
from constants import *
from utils import *

In [90]:
import os

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

PINECONE_API_KEY = "6023ab23-f5bb-4db8-b6ed-2599c49aa064"
ENCODE_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"

LLM_SYSTEM_MESSAGE = """
You are a food expert and a tour guide who knows all the restaurant and their food menu inside Brodhead center at Duke University, Durham, NC.
You are responsible for providing information about the restaurants and their food menu to the customers.
You are also responsible for providing the best restaurant and food menu recommendations to the customers based on their preferences.
You are using the context I provide as a RAG prompt to generate the response.
"""

USER_SYSTEM_MESSAGE = """
Use the following JSON list as context.
Output in the same JSON list format as provided in the 'Database' section of the prompt.
Pay attention to the name, type, category and price of the dish in the menu.
Give me top 10 results for the given query in order of relevance based on what the user might be searching for.:
Query: {}
Database:
{}
"""

In [91]:
from pinecone import Pinecone

def Pinecone_index():
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index = pc.Index('items')
    return index

In [92]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/ritutoshniwal/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ritutoshniwal/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [93]:
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Tokenize text
    tokens = word_tokenize(text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]

    #remove all special characters except spaces
    filtered_tokens = [re.sub(r'[^a-zA-Z0-9\s]', '', token) for token in filtered_tokens]
    
    # Re-join tokens into a string
    processed_text = ' '.join(filtered_tokens)
    
    return processed_text

In [94]:
def encode_model(query):
    """
    This function takes a query and returns the embeddings of the query
    """
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = SentenceTransformer(ENCODE_MODEL_NAME, device=device)
    embeddings = model.encode(query, convert_to_tensor=True)
    embeddings = embeddings.tolist()
    return embeddings

In [95]:
def metadata_from_pinecone(embeddings):
    """
    This function takes a query and returns the metadata of the query
    """
    index = Pinecone_index()
    response = index.query(vector=embeddings, top_k=50, include_metadata=True)
    matches_list = response.get('matches', [])
    metadata_list = []
    for match in matches_list:
        metadata = match.get('metadata', {})
        metadata_list.append(metadata)
    return metadata_list

In [96]:
def __get_gemini_client__() -> genai.GenerativeModel:
    genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
    gemini_model = genai.GenerativeModel("gemini-pro")
    return gemini_model

In [97]:
def get_LLM_Response(query, metadata, gemini_model: genai.GenerativeModel) -> str:
    metadata = json.dumps(metadata)
    prompt = f"""
    {LLM_SYSTEM_MESSAGE}

    {USER_SYSTEM_MESSAGE.format(query, metadata)}
    """
    response = gemini_model.generate_content(prompt)
    return response.text

In [98]:
def main ():
    query = input("Enter your query: ")
    query = preprocess_text(query)
    embeddings = encode_model(query)
    metadata = metadata_from_pinecone(embeddings)
    gemini_model = __get_gemini_client__()
    response = get_LLM_Response(query, metadata, gemini_model)
    print(response)

In [99]:
main()


    
You are a food expert and a tour guide who knows all the restaurant and their food menu inside Brodhead center at Duke University, Durham, NC.
You are responsible for providing information about the restaurants and their food menu to the customers.
You are also responsible for providing the best restaurant and food menu recommendations to the customers based on their preferences.
You are using the context I provide as a RAG prompt to generate the response.


    
Use the following JSON list as context.
Output in the same JSON list format as provided in the 'Database' section of the prompt.
Pay attention to the name, type, category and price of the dish in the menu.
Give me top 10 results for the given query in order of relevance based on what the user might be searching for.:
Query: food 50 calories
Database:
[{"calories": 350.0, "category": "Snack Boxes and Sides", "description": "A protein-packed snack box with turkey and cheese.", "ingredients": ["Turkey breast", "Cheddar chee