In [3]:
import os
from dotenv import load_dotenv
from pymongo import MongoClient
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langchain.schema import Document


In [4]:
# Load environment variables
load_dotenv()

# Load the MongoDB URI and OpenAI API key from environment variables
mongo_uri = os.getenv("MONGO_CONNECTION_STRING")
db_name = "WH"
embeddings = OpenAIEmbeddings()
vector_store_collection_name = "vector_whbr"
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] =  os.getenv('LANGSMITH_API_KEY')
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]= "PolicyBot"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "index_name"


In [6]:
# Verify the number of documents in the vector store collection
mongo_client = MongoClient(mongo_uri)
vector_store_db = mongo_client[db_name]
vector_store_collection = vector_store_db[vector_store_collection_name]
atlas_collection = mongo_client[db_name][vector_store_collection_name]


print("Number of documents in vector store:", vector_store_collection.count_documents({}))
print("Number of documents in vector store:", atlas_collection.count_documents({}))



Number of documents in vector store: 15478
Number of documents in vector store: 15478


In [7]:
# Fetch a few documents from the vector store to inspect
sample_docs = vector_store_collection.find().limit(5)
for doc in sample_docs:
    print(doc)
    print(len(doc['embedding']))

{'_id': ObjectId('66a94630fff3d0e48a255e28'), 'text': '8:01 P.M. EDT THE PRESIDENT: My fellow Americans, I’m speaking to you tonight from behind the Resolute Desk in the Oval Office. In this sacred space, I’m surrounded by portraits of extraordinary American presidents. Thomas Jefferson, who wrote the immortal words that guide this nation. George Washington, who showed us presidents are not kings. Abraham Lincoln, who implored us to reject malice. Franklin Roosevelt, who inspired us to reject fear. I revere this office, but I love my country more. It’s been the honor of my life to serve as your president. But in the defense of democracy, which is at stake, I think it’s more important than any title. I draw strength and I find joy in working for the American people. But this sacred task of perfecting our Union — it’s not about me. It’s about you, your families, your futures. It’s about “We the People.” We can never forget that, and I never have. I’ve made it clear that I believe America

In [8]:
# Create a sample embedding to verify the embeddings are working
sample_text = "Sample text for embedding"
embedding = embeddings.embed_query(sample_text)
print("Sample embedding:", len(embedding))

Sample embedding: 1536


In [33]:
 # Create MongoDB vector store instance
 # FIXME Not connecting, unknown problem
vector_store = MongoDBAtlasVectorSearch(
    atlas_collection,
    embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
)

#test_doc = vector_store.get_by_ids('66a94630fff3d0e48a255e28')
# Define the retrieval query
query = "How have Biden's tax policies impacted a 23 year old making 100k a year?"
k = 3

In [34]:
retrieved_documents = vector_store.similarity_search(query=query, k=k)
print(len(retrieved_documents))

0


In [28]:
for i, doc in enumerate(retrieved_documents, 1):
    print(f"Result {i}:\n")
    print(f"Title: {doc.metadata.get('title', 'N/A')}")
    print(f"Author: {doc.metadata.get('author', 'N/A')}")
    print(f"Date: {doc.metadata.get('date', 'N/A')}")
    print(f"Category: {doc.metadata.get('category', 'N/A')}")
    print(f"URL: {doc.metadata.get('url', 'N/A')}\n")
    print(f"Content: {doc.page_content[:500]}...")  # Displaying first 500 characters of content
    print("\n" + "-"*80 + "\n")

In [6]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()
llm.invoke("Hello, world!")

AIMessage(content='Hello! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 11, 'total_tokens': 20}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a9117bf9-7ddd-4eba-8f62-263ad206ec43-0', usage_metadata={'input_tokens': 11, 'output_tokens': 9, 'total_tokens': 20})