In [8]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, HTMLHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

In [2]:
# List of URLs to scrape
urls = [
    "https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island",
    "https://deusex.fandom.com/wiki/Deus_Ex_2nd_Mission:_Battery_Park,_Hell%27s_Kitchen,_and_Warehouse_District",
    "https://deusex.fandom.com/wiki/Deus_Ex_3rd_Mission:_Brooklyn_Bridge_Station,_Mole_Tunnels,_and_LaGuardia_Airport",
    "https://deusex.fandom.com/wiki/Deus_Ex_4th_Mission:_Hell%27s_Kitchen_(Second_Visit)_and_NSF_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_5th_Mission:_Secret_MJ12_Facility_and_UNATCO_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong"
]

In [4]:
headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
]

In [5]:
html_splitter = HTMLHeaderTextSplitter(
    headers_to_split_on,
    return_each_element=False,
)

In [29]:
docs = []

for u in urls:
    splits = html_splitter.split_text_from_url(u)
    for split in splits:
        split.metadata['url'] = u
    docs.extend(splits)

In [32]:
for doc in docs:
    if doc.metadata:
        print(doc.metadata, "\n")

{'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'Header 3': 'Location', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'Header 3': 'Next', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'Header 3': 'Previous', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': 'Additional Information', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': 'Additional Information', 'Header 3': 'Mission Name in Official Guide', 'url': 

In [33]:
# Create embeddings and store in vector database
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vectorstore = Chroma.from_documents(docs, embeddings)

In [54]:
# Function to query the database
def query_database(query: str, k: int = 1):
    matching_docs = vectorstore.similarity_search(query, k=k)
    return matching_docs if matching_docs else []

In [82]:
# Example usage
query_database("When do I talk to my brother on liberty island?", 3)

[Document(metadata={'Header 2': '2nd Mission', 'Header 3': 'Previous'}, page_content='1st Mission: Liberty Island'),
 Document(metadata={'Header 2': '2nd Mission', 'Header 3': 'Previous', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_2nd_Mission:_Battery_Park,_Hell%27s_Kitchen,_and_Warehouse_District'}, page_content='1st Mission: Liberty Island'),
 Document(metadata={'Header 2': 'Detailed walkthrough[]', 'Header 3': 'South Dock[]'}, page_content='You begin on the south pier of Liberty Island, equipped only with a Pistol, a Riot Prod, and a Medkit. Alex Jacobson messages you through your infolink and tells you that your brother, Paul Denton, is on his way to meet you on the pier. Move forward and you\'ll see a figure in a trench coat heading towards your direction. This is Paul, and a conversation will trigger as soon as he\'s near you.  \nPaul tells you that the NSF terrorist group has taken over the statue and has taken Gunther Hermann, one of UNATCO\'s top agents, hostage. Paul offe

In [35]:
from openai import OpenAI

client = OpenAI()

In [101]:
def create_prompt(retrieved_content, user_question):
    prompt = f"Context:\n{retrieved_content}\n\nQuestion: {user_question}\n\nAnswer:"
    print("Retrieved content: ", retrieved_content, "-------------- \n\n")
    return prompt

In [96]:
def chat_with_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an assistant helping with questions about a specific mission. Use the provided context to answer questions. Provide detailed instructions to guide the player."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [111]:
def mission_qa(user_question):
    # Perform similarity search
    relevant_docs = query_database(user_question, k=2)  # Adjust k as needed
    
    # Construct context from relevant documents
    context = "\n".join([doc.page_content for doc in relevant_docs])
    
    # Create prompt and get response
    prompt = create_prompt(context, user_question)
    response = chat_with_gpt(prompt)
    
    return response

In [112]:
# Example usage
question = "After I escape the jail cell in UNATCO HQ, how do I get to the robot maintenance area?"
answer = mission_qa(question)
print("Response: ", answer)

Retrieved content:  The compound can be entered through either the front entrance or the rear entrance.  
Going in the front way, though straightforward, has its hazards. A wheeled Security Bot patrols the area immediately in front of the statue, and there are also three or four terrorists nearby, one of which is especially dangerous as he is armed with a sniper rifle.  
Exit the northern docks and head East, following the same path you took before. If you took the GEP Gun at the beginning, use it to destroy the bot from a distance (hold the crosshair over the target until the weapon beeps continuously, signaling that it is locked on, and then fire). Preferably, engage the bot when it is patrolling on the south side of its route to minimize the chance that the nearby terrorists will hear the explosion. Once the bot is taken care of, either neutralize the remaining terrorists or simply avoid them. There is a room in the center of this area with a locked chest containing ammo and a credi