In [1]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

In [2]:
# List of URLs to scrape
urls = [
    "https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island",
    "https://deusex.fandom.com/wiki/Deus_Ex_2nd_Mission:_Battery_Park,_Hell%27s_Kitchen,_and_Warehouse_District",
    "https://deusex.fandom.com/wiki/Deus_Ex_3rd_Mission:_Brooklyn_Bridge_Station,_Mole_Tunnels,_and_LaGuardia_Airport",
    "https://deusex.fandom.com/wiki/Deus_Ex_4th_Mission:_Hell%27s_Kitchen_(Second_Visit)_and_NSF_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_5th_Mission:_Secret_MJ12_Facility_and_UNATCO_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong"
]

In [3]:
# Load the content
loader = UnstructuredURLLoader(urls=urls, remove_selectors=["nav", "header", "footer"])
data = loader.load()

In [11]:
# Process the content
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)

In [12]:
# Create embeddings and store in vector database
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma.from_documents(texts, embeddings)

In [13]:
# Function to query the database
def query_database(query: str, k: int = 2):
    matching_docs = vectorstore.similarity_search(query, k=k)
    for doc in matching_docs:
        print(f"Content: {doc.page_content}...")
        print(f"Source URL: {doc.metadata['source']}")
        print(f"Other metadata: {doc.metadata}")
        print("---")

In [19]:
# Example usage
query_database("Where do I find Lebedev?")

Content: Speak to Lebedev, who surrenders and tells JC Denton more about the conspiracy. Lebedev tells JC that he was created by a secret organization crazed for power, and that the same corporation that manufactures Ambrosia also created the Plague. At this point, Anna Navarre will require that you kill Lebedev, as ordered, or she will kill Lebedev herself. The options are: kill Lebedev, allow Anna to kill Lebedev, or kill Anna.

If you kill Lebedev, you will earn no bonus skill points. However, Manderley will be pleased with your actions.

If you allow Anna to kill Lebedev, you will earn 175 skill points.

If you kill Anna and Lebedev does not die, you will earn 200 skill points when you speak to Lebedev again. Lebedev will then tell you more about the conspiracy, and will name the organization behind it all, Majestic 12, a group powerful enough to control the United Nations....
Source URL: https://deusex.fandom.com/wiki/Deus_Ex_3rd_Mission:_Brooklyn_Bridge_Station,_Mole_Tunnels,_and