In [None]:

!pip install dotenv
!pip install langchain
!pip install -U langchain-openai
!pip install faiss-cpu
!pip install langchain_community

In [2]:
import json 

In [3]:

with open('user_messages.json', 'r') as f:
    user_messages = json.load(f) 

print(user_messages)



In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
embedding_dim = len(embeddings.embed_query("dimension test"))
embedding_dim

1536

In [None]:
users = list(user_messages['user_messages'].keys())

user_messages['user_messages'][f"{users[0]}"].split('\n')

In [7]:
users

['Sophia Al-Farsi',
 'Fatima El-Tahir',
 'Armand Dupont',
 'Hans Müller',
 'Layla Kawaguchi',
 'Amina Van Den Berg',
 'Vikram Desai',
 "Lily O'Sullivan",
 'Lorenzo Cavalli',
 'Thiago Monteiro']

In [8]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document



username_indexes = {username: index for index, username in enumerate(user_messages['user_messages'].keys())}

def create_vector_store(dict_messages):


    index = faiss.IndexFlatL2(embedding_dim)

    vector_store = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=InMemoryDocstore(),
        index_to_docstore_id={},
    )



def create_in_memory_faiss_vector_store(msgs: list[str]) -> FAISS:
    """
    Create an in-memory FAISS vector store for a given list of texts.
    """
    
    index = faiss.IndexFlatL2(embedding_dim)
    vector_store = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=InMemoryDocstore(),
        index_to_docstore_id={},
    )
    docs = [Document(page_content=msg, metadata={"index": i}) for i, msg in enumerate(msgs)]
    vector_store.add_documents(docs)
    return vector_store


vector_stores = {
    username: create_in_memory_faiss_vector_store(messages.split('\n'))
    for username, messages in user_messages['user_messages'].items()
}


In [79]:
vector_stores

{'Sophia Al-Farsi': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405d6c0>,
 'Fatima El-Tahir': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405d7e0>,
 'Armand Dupont': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405e860>,
 'Hans Müller': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405e890>,
 'Layla Kawaguchi': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405ec20>,
 'Amina Van Den Berg': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405eb90>,
 'Vikram Desai': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405e950>,
 "Lily O'Sullivan": <langchain_community.vectorstores.faiss.FAISS at 0x1b7d405e3b0>,
 'Lorenzo Cavalli': <langchain_community.vectorstores.faiss.FAISS at 0x1b7d4026770>,
 'Thiago Monteiro': <langchain_community.vectorstores.faiss.FAISS at 0x1b78b7a4040>}

In [None]:

for username in users:
    vector_store_t = vector_stores[username]
    vector_store_t.save_local(f"vector_stores/username_{username.split()[0]}_vector_store")   


In [93]:
#load vector stores from disk
vector_stores1 = {
    username: FAISS.load_local(f"vector_stores/username_{username.split()[0]}_vector_store", embeddings, allow_dangerous_deserialization=True)
    for username in users
}

In [107]:
# function that takes query and username and returns relevant messages
def get_relevant_member_messages_test(username: str, query: str, k: int = 5) -> list[str]:
    vector_store = vector_stores1.get(username)
    if not vector_store:
        return []
    docs = vector_store.similarity_search(query, k=k)
    docs.sort(key=lambda doc: doc.metadata["index"])
    return "\n ".join([doc.page_content for doc in docs])

In [None]:

system_prompt1 = """ You are an assistant designed to retrieve member-specific information using semantic search. You do **not** store or generate personal details yourself; instead, you rely entirely on the provided function interface.

You can answer questions **only** about the following users:

**Sophia Al-Farsi,
Fatima El-Tahir,
Armand Dupont,
Hans Müller,
Layla Kawaguchi,
Amina Van Den Berg,
Vikram Desai,
Lily O’Sullivan,
Lorenzo Cavalli,
Thiago Monteiro.**

To answer any question about one of these users, you **must** call the function:

**get_relevant_member_messages(username, query)**

* **username** must exactly match one name from the list above.
* **query** should be a detailed, descriptive explanation of what the user is asking, capturing intent, context, and topic for improved semantic-similarity retrieval and do not mention user name in the query .

If the user asks about **anyone not in the list**, you must reply: “I don’t have data about this user.”

If the user asks about **any of the listed users**, you must respond **only** by calling get_relevant_member_messages.

You may not answer directly; the function is the only source of truth for member-related information. """

In [None]:
from langchain_openai import ChatOpenAI
from langchain.tools import tool
from langchain.agents import create_agent


@tool
def get_relevant_member_messages(username: str, query: str) -> str:
    """
    Function to get relevant information about the query asked about the member or user
    with the help of username and query, and return the information most closely
    related to that query.

    Args:
        username: Exact user name of the member whose messages should be searched.
        query: A detailed description of what information to look for from the query.
    """
    vector_store = vector_stores.get(username)
    if not vector_store:
        return "no messages exists for this user"
    docs = vector_store.similarity_search(query, k=15)
    docs.sort(key=lambda doc: doc.metadata["index"])
    return "\n ".join([doc.page_content for doc in docs])

model = ChatOpenAI(
                    model="gpt-4o-mini",
                    temperature=0,
                )



system_prompt = """
You are an assistant designed to answer questions about member-specific information using semantic search. 
You do not store or generate personal details yourself; all information must come from the provided function interface.

You can answer questions only about the following users:

Sophia Al-Farsi,
Fatima El-Tahir,
Armand Dupont,
Hans Müller,
Layla Kawaguchi,
Amina Van Den Berg,
Vikram Desai,
Lily O’Sullivan,
Lorenzo Cavalli,
Thiago Monteiro.

To answer any question about one of these users, you must call the function:

get_relevant_member_messages(username, query)

• username must exactly match one name in the list above.
• query must NOT contain the user’s name or terms like “the user,” “this member,” etc.  
  It should contain only the information being requested — the topic, context, or details needed to understand what the user wants to know.

If the user asks about anyone not in the list, respond by saying I don’t have any data about them, mentioning the name from the query.

You may not answer directly; the function is the only source of truth for member-related information.
If the user asks about any of the listed users, respond only by calling get_relevant_member_messages.

Do not explain how the information was found.  
Do not say phrases like “I found,” “my search shows,” or “according to the data.”  
The final answer must read as a direct response, as if the information is simply being provided naturally.
"""

agent = create_agent(
    model,
    tools =[get_relevant_member_messages],
    system_prompt=system_prompt,
)

In [68]:

response = agent.invoke({
    "messages": [{"role": "user", "content": f" what did {users[6]} do at the restaurant ?"}]
})

print(response['messages'])
print("\n --------------------------------------------")
print(response['messages'][1].tool_calls)

print("\n --------------------------------------------")
print(response['messages'][-1].content)


[HumanMessage(content=' what did Vikram Desai do at the restaurant ?', additional_kwargs={}, response_metadata={}, id='ff4b9c9c-4011-4531-abb5-97fc72921015'), AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 416, 'total_tokens': 444, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_560af6e559', 'id': 'chatcmpl-CbNJ0foGV2cl4wpF2IotvDFKIf6R9', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--df50ac7f-fa22-4d29-8c38-4725c08c2d1a-0', tool_calls=[{'name': 'get_relevant_member_messages', 'args': {'username': 'Vikram Desai', 'query': 'activity at the restaurant'}, 'id': 'call_0L5dk0BQKPSY5paV7AW742Hl', 'type': 'tool_call'

In [17]:
print(get_relevant_member_messages(users[2], "likes"))

I'd like my room stocked with a selection of fine teas.
 I’d love some feedback on the recent trip to Tokyo with curated shopping tours.
 Add my preference for down-filled pillows to every booking note.
 We enjoyed the exclusive wine tasting immensely, would love a repeat.
 Wonderful service at the restaurant last night; it was truly memorable.


In [24]:
print(get_relevant_member_messages(users[2], "enjoy"))

The yacht charter went splendidly, cheers for that touch.
 We enjoyed the exclusive wine tasting immensely, would love a repeat.
 Wonderful service at the restaurant last night; it was truly memorable.
 Our VIP theater experience was superb, props to the team.
 Arrange for a camel ride during my stay in Morocco; kids are excited!


In [None]:


system_prompt = """ You are an assistant designed to retrieve member-specific information using semantic search. You do **not** store or generate personal details yourself; instead, you rely entirely on the provided function interface.

You can answer questions **only** about the following users:

**Sophia Al-Farsi,
Fatima El-Tahir,
Armand Dupont,
Hans Müller,
Layla Kawaguchi,
Amina Van Den Berg,
Vikram Desai,
Lily O’Sullivan,
Lorenzo Cavalli,
Thiago Monteiro.**

To answer any question about one of these users, you **must** call the function:

**get_relevant_member_messages(username, query)**

* **username** must exactly match one name from the list above.
* **query** should be a detailed, descriptive explanation of what the user is asking, capturing intent, context, and topic for improved semantic-similarity retrieval.

  * Example: instead of "hobbies?", use:
    "Retrieve messages where this member discussed their hobbies, interests, or recurring leisure activities."

If the user asks about **anyone not in the list**, you must reply: “I don’t have data about this user.”

If the user asks about **any of the listed users**, you must respond **only** by calling get_relevant_member_messages.

You may not answer directly; the function is the only source of truth for member-related information. """


