In [12]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

In [25]:
# List of URLs to scrape
urls = [
    "https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island",
    "https://deusex.fandom.com/wiki/Deus_Ex_2nd_Mission:_Battery_Park,_Hell%27s_Kitchen,_and_Warehouse_District",
    "https://deusex.fandom.com/wiki/Deus_Ex_3rd_Mission:_Brooklyn_Bridge_Station,_Mole_Tunnels,_and_LaGuardia_Airport",
    "https://deusex.fandom.com/wiki/Deus_Ex_4th_Mission:_Hell%27s_Kitchen_(Second_Visit)_and_NSF_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_5th_Mission:_Secret_MJ12_Facility_and_UNATCO_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong"
]

In [26]:
# Load the content
loader = UnstructuredURLLoader(urls=urls, remove_selectors=["nav", "header", "footer"])
data = loader.load()

In [75]:
# Process the content
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
texts = text_splitter.split_documents(data)

In [76]:
# Create embeddings and store in vector database
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma.from_documents(texts, embeddings)

In [79]:
# Function to query the database
def query_database(query: str, k: int = 2):
    matching_docs = vectorstore.similarity_search(query, k=k)
    for doc in matching_docs:
        print(f"Content: {doc.page_content}...")
        print(f"Source URL: {doc.metadata['source']}")
        print(f"Other metadata: {doc.metadata}")
        print("---")

In [80]:
# Example usage
query_database("What is in the MJ12 place?")

Content: In Chow's penthouse, there's a lantern switch that opens a direct passage to the MJ12 quarters.

In the lobby of Queen's Tower, there's a second elevator (keypad code 3444) that leads to an empty floor where a junkie is encountered. On this floor, there is a locked fence that leads to the MJ12 quarters.

On the right exterior side of the Queen's Tower (where a robot is patrolling), there is a locked fence. Open the fence to access the elevator shaft of the penthouse elevator. Activate the keypad (code 1709) to the roof area, gaining an exploration bonus once you reach the top portion of the shaft. You can then enter the MJ12 quarters from the roof via a door locked by a keypad....
Source URL: https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong
Other metadata: {'source': 'https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong'}
---
Content: In Chow's penthouse, there's a lantern switch that opens a direct passage to the MJ12 quarters.

In the lobby of Queen's T