In [1]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, HTMLHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [2]:
# List of URLs to scrape
urls = [
    "https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island",
    "https://deusex.fandom.com/wiki/Deus_Ex_2nd_Mission:_Battery_Park,_Hell%27s_Kitchen,_and_Warehouse_District",
    "https://deusex.fandom.com/wiki/Deus_Ex_3rd_Mission:_Brooklyn_Bridge_Station,_Mole_Tunnels,_and_LaGuardia_Airport",
    "https://deusex.fandom.com/wiki/Deus_Ex_4th_Mission:_Hell%27s_Kitchen_(Second_Visit)_and_NSF_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_5th_Mission:_Secret_MJ12_Facility_and_UNATCO_Headquarters",
    "https://deusex.fandom.com/wiki/Deus_Ex_6th_Mission:_Hong_Kong",
    "https://deusex.fandom.com/wiki/Deus_Ex_7th_Mission:_Hell%27s_Kitchen_(Third_Visit)",
    "https://deusex.fandom.com/wiki/Deus_Ex_8th_Mission:_Brooklyn_Naval_Shipyards_and_Lower_East_Side_Cemetery",
    "https://deusex.fandom.com/wiki/Deus_Ex_9th_Mission:_Paris_Streets_and_Chateau_DuClare",
    "https://deusex.fandom.com/wiki/Deus_Ex_10th_Mission:_Knights_Templar_Cathedral_and_Morgan_Everett%27s_Home",
    "https://deusex.fandom.com/wiki/Deus_Ex_11th_Mission:_Vandenberg_Air_Force_Base",
    "https://deusex.fandom.com/wiki/Deus_Ex_12th_Mission:_Sub_Base,_Ocean_Lab,_and_Missile_Silo",
    "https://deusex.fandom.com/wiki/Deus_Ex_13th_Mission:_Area_51"
]

In [3]:
headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
]

In [4]:
html_splitter = HTMLHeaderTextSplitter(
    headers_to_split_on,
    return_each_element=False,
)

In [75]:
docs = []

for u in urls:
    splits = html_splitter.split_text_from_url(u)
    for split in splits:
        split.metadata['length'] = len(split.page_content)
        split.metadata['lines'] = split.page_content.count("\n")
        split.metadata['url'] = u
    docs.extend(splits)

In [81]:
docs_multi = []

for doc in docs:
    if doc.metadata:
        if doc.metadata['length'] > 500 and doc.metadata['lines'] > 2:
            print(doc.metadata, " -- length: ", len(doc.page_content), " -- lines: ", doc.page_content.count("\n"), "\n", doc.page_content[:200])
            docs_multi.append(doc)

{'length': 5383, 'lines': 53, 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'}  -- length:  5383  -- lines:  53 
 Deus Ex Wiki  
Explore  
Main Page Discuss All Pages Community Interactive Maps Recent Blog Posts  
Deus Ex series  
Deus Ex Invisible War Human Revolution The Fall Mankind Divided Deus Ex GO  
Charac
{'Header 2': 'Brief walkthrough[]', 'length': 2343, 'lines': 5, 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'}  -- length:  2343  -- lines:  5 
 The mission begins at the south dock. Speak with Paul Denton, who offers you a choice among three weapons. The GEP gun is likely the best choice because it is useful for destroying turrets, robots, an
{'Header 2': 'Detailed walkthrough[]', 'Header 3': 'South Dock[]', 'length': 3274, 'lines': 5, 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_1st_Mission:_Liberty_Island'}  -- length:  3274  -- lines:  5 
 You begin on the south pier of Liberty Island, equipped only with a Pistol,

In [82]:
# Create embeddings and store in vector database
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vectorstore = Chroma.from_documents(docs_multi, embeddings, persist_directory = "./vectorstore")

In [83]:
vectorstore2 = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)

In [84]:
# Function to query the database
def query_database(query: str, k: int = 1):
    matching_docs = vectorstore2.similarity_search(query, k=k)
    return matching_docs if matching_docs else []

In [85]:
# Example usage
query_database("What's at area 51?", 1)

[Document(metadata={'Header 2': 'Walkthrough[]', 'Header 3': 'Missile Silo[]', 'length': 1864, 'lines': 5, 'url': 'https://deusex.fandom.com/wiki/Deus_Ex_12th_Mission:_Sub_Base,_Ocean_Lab,_and_Missile_Silo'}, page_content='From the starting position, you will need to infiltrate the walled base. You may open the locked front gate or infiltrate the guardhouse at the SW corner of the map and use the elevated catwalk.  \nOnce inside the walled area, there are two routes into the underground facility:  \nIn the fenced area to the NE, there is a locked door (80% lock strength) leading underground. The alternate route is through the vent inside the building with the repair bot. The door to this building is blocked. However, you may enter through the building immediately to the south.  \nInside the underground facility, use the code 8456 to open the first underground door. Use it again to open another door. Then, turn left and use it on the door labeled "launch command" (note that the other do

In [15]:
from openai import OpenAI

client = OpenAI()

In [14]:
def create_prompt(retrieved_content, user_question):
    prompt = f"Context:\n{retrieved_content}\n\nQuestion: {user_question}\n\nAnswer:"
    print("Retrieved content: ", retrieved_content, "-------------- \n\n")
    return prompt

In [17]:
def chat_with_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an assistant helping with questions about a specific mission. Use the provided context to answer questions. Provide detailed instructions to guide the player."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [21]:
def mission_qa(user_question):
    # Perform similarity search
    relevant_docs = query_database(user_question, k=4)  # Adjust k as needed
    
    # Construct context from relevant documents
    context = "\n".join([doc.page_content for doc in relevant_docs])
    
    # Create prompt and get response
    prompt = create_prompt(context, user_question)
    response = chat_with_gpt(prompt)
    
    return response

In [22]:
# Example usage
question = "What do I do at area 51?"
answer = mission_qa(question)
print("\n\nResponse: ", answer)

Retrieved content:  Area 51
13th Mission: Area 51
"Area 51"
From the starting position, you will need to infiltrate the walled base. You may open the locked front gate or infiltrate the guardhouse at the SW corner of the map and use the elevated catwalk.  
Once inside the walled area, there are two routes into the underground facility:  
In the fenced area to the NE, there is a locked door (80% lock strength) leading underground. The alternate route is through the vent inside the building with the repair bot. The door to this building is blocked. However, you may enter through the building immediately to the south.  
Inside the underground facility, use the code 8456 to open the first underground door. Use it again to open another door. Then, turn left and use it on the door labeled "launch command" (note that the other door, labeled "missile silo," is not accessible at this time). The next area is well guarded by Men in Black and MJ12 commandos. Head upstairs and press the abort butto