In [18]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, HTMLHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [4]:
# List of URLs to scrape
urls = [
    "https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island",
    #"https://deusex.fandom.com/wiki/Deus_Ex_2nd_Mission:_Battery_Park,_Hell%27s_Kitchen,_and_Warehouse_District",
    #"https://deusex.fandom.com/wiki/Deus_Ex_3rd_Mission:_Brooklyn_Bridge_Station,_Mole_Tunnels,_and_LaGuardia_Airport",
    #"https://deusex.fandom.com/wiki/Deus_Ex_4th_Mission:_Hell%27s_Kitchen_(Second_Visit)_and_NSF_Headquarters",
    #"https://deusex.fandom.com/wiki/Deus_Ex_5th_Mission:_Secret_MJ12_Facility_and_UNATCO_Headquarters",
    #"https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong"
]

In [5]:
headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
]

In [6]:
html_splitter = HTMLHeaderTextSplitter(
    headers_to_split_on,
    return_each_element=False,
)

In [7]:
docs = []

for u in urls:
    splits = html_splitter.split_text_from_url(u)
    for split in splits:
        split.metadata['url'] = u
    docs.extend(splits)

In [8]:
for doc in docs:
    if doc.metadata:
        print(doc.metadata, "\n")

{'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'Header 3': 'Location', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'Header 3': 'Next', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': '1st Mission', 'Header 3': 'Previous', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': 'Additional Information', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'} 

{'Header 2': 'Additional Information', 'Header 3': 'Mission Name in Official Guide', 'url': 

In [26]:
# Create embeddings and store in vector database
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory = "./vectorstore")

In [23]:
vectorstore2 = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)

In [24]:
# Function to query the database
def query_database(query: str, k: int = 1):
    matching_docs = vectorstore2.similarity_search(query, k=k)
    return matching_docs if matching_docs else []

In [25]:
# Example usage
query_database("When do I talk to my brother on liberty island?", 3)

[Document(metadata={'Header 2': 'Detailed walkthrough[]', 'Header 3': 'South Dock[]', 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'}, page_content='You begin on the south pier of Liberty Island, equipped only with a Pistol, a Riot Prod, and a Medkit. Alex Jacobson messages you through your infolink and tells you that your brother, Paul Denton, is on his way to meet you on the pier. Move forward and you\'ll see a figure in a trench coat heading towards your direction. This is Paul, and a conversation will trigger as soon as he\'s near you.  \nPaul tells you that the NSF terrorist group has taken over the statue and has taken Gunther Hermann, one of UNATCO\'s top agents, hostage. Paul offers you to choose one additional weapon for the mission: a sniper rifle, a GEP gun, or a mini-crossbow. Each weapon suits very different tactics. However, the GEP gun is the only weapon that is not otherwise obtainable on this mission. A sniper rifle can be found on an NSF m

In [12]:
from openai import OpenAI

client = OpenAI()

In [13]:
def create_prompt(retrieved_content, user_question):
    prompt = f"Context:\n{retrieved_content}\n\nQuestion: {user_question}\n\nAnswer:"
    print("Retrieved content: ", retrieved_content, "-------------- \n\n")
    return prompt

In [14]:
def chat_with_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an assistant helping with questions about a specific mission. Use the provided context to answer questions. Provide detailed instructions to guide the player."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [15]:
def mission_qa(user_question):
    # Perform similarity search
    relevant_docs = query_database(user_question, k=2)  # Adjust k as needed
    
    # Construct context from relevant documents
    context = "\n".join([doc.page_content for doc in relevant_docs])
    
    # Create prompt and get response
    prompt = create_prompt(context, user_question)
    response = chat_with_gpt(prompt)
    
    return response

In [16]:
# Example usage
question = "How do I get into the statue? Should I go through the front or the back?"
answer = mission_qa(question)
print("Response: ", answer)

Retrieved content:  At the entrance you'll find Tech Sergeant Kaplan, who triggers a conversation with you. You'll have to make a choice halfway through. Your choice will have a small effect: if you choose the first one, he'll give you the code (0451) to the shed next to the helipad, but he won't give you the code if you select the second option. Afterward, he'll offer you some hardware: a Scope for 700 credits, some 10mm Ammo for 200 credits, or some Tranquilizer Darts for 60 credits. Buy the darts if you're going for a pacifist playthrough, or 10mm if you are going for regular playthrough. At the beginning, you won't have enough credits for the scope, but you can still buy it from him anytime before leaving Liberty Island for the next mission.  
When you're done talking to Kaplan, approach the green shed next to the helipad to find a Crowbar and a wooden crate which contains a multitool. There's also a wooden crate that has a Lockpick inside that's behind the shed. A key for the UNAT