# How To Build An AI Agent With OpenAI, LlamaIndex and MongoDB

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/agents/airbnb_agent_openai_llamaindex_mongodb.ipynb)

## Install Libraries

In [1]:
!pip install -qU llama-index  # main llamaindex libary
!pip install -qU llama-index-vector-stores-mongodb # mongodb vector database
!pip install -qU llama-index-llms-openai # openai llm provider
!pip install -qU llama-index-embeddings-openai # openai embedding provider
!pip install -qU pymongo pandas datasets # others

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m51.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.8/176.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

## Setup Prerequisites

In [19]:
import getpass
import os

from pymongo import MongoClient

In [4]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter OpenAI API Key:")

Enter OpenAI API Key:··········


In [20]:
MONGODB_URI = getpass.getpass("Enter your MongoDB URI: ")
mongodb_client = MongoClient(
    MONGODB_URI, appname="devrel.content.airbnb_agent_mongodb_llamaindex"
)

Enter your MongoDB URI: ··········


## Configure LLMs and Embedding Models

In [53]:
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

Settings.embed_model = OpenAIEmbedding(
    model="text-embedding-3-small",
    dimensions=256,
    embed_batch_size=10,
    openai_api_key=os.environ["OPENAI_API_KEY"],
)
llm = OpenAI(model="gpt-4o", temperature=0)

## Download the Dataset

In [29]:
import pandas as pd
from datasets import load_dataset

# https://huggingface.co/datasets/MongoDB/airbnb_embeddings
data = load_dataset("MongoDB/airbnb_embeddings", split="train", streaming=True)
data = data.take(200)

# Convert the dataset to a pandas dataframe
data_df = pd.DataFrame(data)

In [30]:
data_df.head(5)

Unnamed: 0,_id,listing_url,name,summary,space,description,neighborhood_overview,notes,transit,access,...,images,host,address,availability,review_scores,reviews,weekly_price,monthly_price,text_embeddings,image_embeddings
0,10006546,https://www.airbnb.com/rooms/10006546,Ribeira Charming Duplex,Fantastic duplex apartment with three bedrooms...,Privileged views of the Douro River and Ribeir...,Fantastic duplex apartment with three bedrooms...,"In the neighborhood of the river, you can find...",Lose yourself in the narrow streets and stairc...,Transport: • Metro station and S. Bento railwa...,We are always available to help guests. The ho...,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '51399391', 'host_url': 'https://w...","{'street': 'Porto, Porto, Portugal', 'suburb':...","{'availability_30': 28, 'availability_60': 47,...","{'review_scores_accuracy': 9, 'review_scores_c...","[{'_id': '58663741', 'date': 2016-01-03 05:00:...",,,"[0.0123710884, -0.0180913936, -0.016843712, -0...","[-0.1302358955, 0.1534578055, 0.0199299306, -0..."
1,10021707,https://www.airbnb.com/rooms/10021707,Private Room in Bushwick,Here exists a very cozy room for rent in a sha...,,Here exists a very cozy room for rent in a sha...,,,,,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '11275734', 'host_url': 'https://w...","{'street': 'Brooklyn, NY, United States', 'sub...","{'availability_30': 0, 'availability_60': 0, '...","{'review_scores_accuracy': 10, 'review_scores_...","[{'_id': '61050713', 'date': 2016-01-31 05:00:...",,,"[0.0153845912, -0.0348115042, -0.0093448907, 0...","[0.0340401195, 0.1742489338, -0.1572628617, 0...."
2,1001265,https://www.airbnb.com/rooms/1001265,Ocean View Waikiki Marina w/prkg,A short distance from Honolulu's billion dolla...,Great studio located on Ala Moana across the s...,A short distance from Honolulu's billion dolla...,You can breath ocean as well as aloha.,,Honolulu does have a very good air conditioned...,"Pool, hot tub and tennis",...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '5448114', 'host_url': 'https://ww...","{'street': 'Honolulu, HI, United States', 'sub...","{'availability_30': 16, 'availability_60': 46,...","{'review_scores_accuracy': 9, 'review_scores_c...","[{'_id': '4765259', 'date': 2013-05-24 04:00:0...",650.0,2150.0,"[-0.0400562622, -0.0405789167, 0.000644172, 0....","[-0.1640156209, 0.1256971657, 0.6594450474, -0..."
3,10009999,https://www.airbnb.com/rooms/10009999,Horto flat with small garden,One bedroom + sofa-bed in quiet and bucolic ne...,Lovely one bedroom + sofa-bed in the living ro...,One bedroom + sofa-bed in quiet and bucolic ne...,This charming ground floor flat is located in ...,"There´s a table in the living room now, that d...","Easy access to transport (bus, taxi, car) and ...",,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '1282196', 'host_url': 'https://ww...","{'street': 'Rio de Janeiro, Rio de Janeiro, Br...","{'availability_30': 0, 'availability_60': 0, '...","{'review_scores_accuracy': None, 'review_score...",[],1492.0,4849.0,"[-0.063234821, 0.0017937823, -0.0243996996, -0...","[-0.1292964518, 0.037789464, 0.2443587631, 0.0..."
4,10047964,https://www.airbnb.com/rooms/10047964,Charming Flat in Downtown Moda,Fully furnished 3+1 flat decorated with vintag...,The apartment is composed of 1 big bedroom wit...,Fully furnished 3+1 flat decorated with vintag...,With its diversity Moda- Kadikoy is one of the...,,,,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '1241644', 'host_url': 'https://ww...","{'street': 'Kadıköy, İstanbul, Turkey', 'subur...","{'availability_30': 27, 'availability_60': 57,...","{'review_scores_accuracy': 10, 'review_scores_...","[{'_id': '68162172', 'date': 2016-04-02 04:00:...",,,"[0.023723349, 0.0064210771, -0.0339970738, -0....","[-0.1006749049, 0.4022984803, -0.1821258366, 0..."


## Data Processing

In [31]:
from llama_index.core import Document

In [32]:
# Convert the DataFrame to dictionary
docs = data_df.to_dict(orient="records")

In [167]:
llama_documents = []
fields_to_include = [
    "amenities",
    "address",
    "availability",
    "review_scores",
    "listing_url",
]

In [168]:
for doc in docs:
    metadata = {key: doc[key] for key in fields_to_include}
    llama_doc = Document(text=doc["description"], metadata=metadata)
    llama_documents.append(llama_doc)

In [169]:
llama_documents[0]

Document(id_='54f8e3ba-9624-4ac4-986a-e19d67a89e7c', embedding=None, metadata={'amenities': ['TV', 'Cable TV', 'Wifi', 'Kitchen', 'Paid parking off premises', 'Smoking allowed', 'Pets allowed', 'Buzzer/wireless intercom', 'Heating', 'Family/kid friendly', 'Washer', 'First aid kit', 'Fire extinguisher', 'Essentials', 'Hangers', 'Hair dryer', 'Iron', 'Pack ’n Play/travel crib', 'Room-darkening shades', 'Hot water', 'Bed linens', 'Extra pillows and blankets', 'Microwave', 'Coffee maker', 'Refrigerator', 'Dishwasher', 'Dishes and silverware', 'Cooking basics', 'Oven', 'Stove', 'Cleaning before checkout', 'Waterfront'], 'address': {'street': 'Porto, Porto, Portugal', 'suburb': '', 'government_area': 'Cedofeita, Ildefonso, Sé, Miragaia, Nicolau, Vitória', 'market': 'Porto', 'country': 'Portugal', 'country_code': 'PT', 'location': {'type': 'Point', 'coordinates': [-8.61308, 41.1413], 'is_location_exact': False}}, 'availability': {'availability_30': 28, 'availability_60': 47, 'availability_90'

## Create MongoDB Atlas Vector Store

In [186]:
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from pymongo.errors import OperationFailure

In [187]:
DB_NAME = "airbnb"
COLLECTION_NAME = "listings_reviews"
VS_INDEX_NAME = "vector_index"
FTS_INDEX_NAME = "fts_index"
collection = mongodb_client[DB_NAME][COLLECTION_NAME]

In [189]:
vector_store = MongoDBAtlasVectorSearch(
    mongodb_client,
    db_name=DB_NAME,
    collection_name=COLLECTION_NAME,
    vector_index_name=VS_INDEX_NAME,
    fulltext_index_name=FTS_INDEX_NAME,
    embedding_key="embedding",
    text_key="text",
)
vector_store_context = StorageContext.from_defaults(vector_store=vector_store)
vector_store_index = VectorStoreIndex.from_documents(
    llama_documents, storage_context=vector_store_context, show_progress=True
)

Parsing nodes:   0%|          | 0/200 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/200 [00:00<?, ?it/s]

## Create Vector and Full-text Search Indexes

In [190]:
from pymongo.operations import SearchIndexModel

In [191]:
vs_model = SearchIndexModel(
    definition={
        "fields": [
            {
                "type": "vector",
                "path": "embedding",
                "numDimensions": 256,
                "similarity": "cosine",
            },
            {"type": "filter", "path": "metadata.amenities"},
            {"type": "filter", "path": "metadata.review_scores.review_scores_rating"},
        ]
    },
    name=VS_INDEX_NAME,
    type="vectorSearch",
)

In [192]:
fts_model = SearchIndexModel(
    definition={"mappings": {"dynamic": False, "fields": {"text": {"type": "string"}}}},
    name=FTS_INDEX_NAME,
    type="search",
)

In [193]:
for model in [vs_model, fts_model]:
    try:
        collection.create_search_index(model=model)
        print(f"Successfully created index for model {model}.")
    except OperationFailure:
        print(f"Duplicate index found for model {model}. Skipping index creation.")

Successfully created index for model <pymongo.operations.SearchIndexModel object at 0x7ea847d36380>.
Successfully created index for model <pymongo.operations.SearchIndexModel object at 0x7ea847f32170>.


## Creating Retriever Tool for the Agent

In [194]:
from typing import List

from llama_index.core.tools import FunctionTool
from llama_index.core.vector_stores import (
    FilterCondition,
    FilterOperator,
    MetadataFilter,
    MetadataFilters,
)

In [195]:
def get_airbnb_listings(query: str, amenities: List[str]) -> str:
    """
    Provides information about Airbnb listings.

    query (str): User query
    amenities (List[str]): List of amenities
    rating (int): Listing rating
    """
    filters = [
        MetadataFilter(
            key="metadata.review_scores.review_scores_rating",
            value=80,
            operator=FilterOperator.GTE,
        )
    ]
    amenities_filter = [
        MetadataFilter(
            key="metadata.amenities", value=amenity, operator=FilterOperator.EQ
        )
        for amenity in amenities
    ]
    filters.extend(amenities_filter)

    filters = MetadataFilters(
        filters=filters,
        condition=FilterCondition.AND,
    )

    query_engine = vector_store_index.as_query_engine(
        similarity_top_k=5, vector_store_query_mode="hybrid", alpha=0.7, filters=filters
    )
    response = query_engine.query(query)
    nodes = response.source_nodes
    listings = [node.metadata["listing_url"] for node in nodes]
    return listings

In [196]:
query_tool = FunctionTool.from_defaults(
    name="get_airbnb_listings", fn=get_airbnb_listings
)

## Create the AI Agent

In [197]:
from llama_index.core.agent import AgentRunner, FunctionCallingAgentWorker

In [198]:
agent_worker = FunctionCallingAgentWorker.from_tools(
    [query_tool], llm=llm, verbose=True
)
agent = AgentRunner(agent_worker)

In [199]:
response = agent.query("Give me listings in Porto with a Waterfront.")

Added user message to memory: Give me listings in Porto with a Waterfront.
=== Calling Function ===
Calling function: get_airbnb_listings with args: {"query": "Porto", "amenities": ["Waterfront"]}
=== Function Output ===
['https://www.airbnb.com/rooms/10006546', 'https://www.airbnb.com/rooms/11207193']
=== LLM Response ===
Here are some Airbnb listings in Porto with a waterfront:

1. [Listing 1](https://www.airbnb.com/rooms/10006546)
2. [Listing 2](https://www.airbnb.com/rooms/11207193)
