# Valorant Parallel RAG Notebook
This notebook uses **GPT-5** and **FAISS** to perform parallel retrieval and generation on Databricks.

In [None]:
%pip install openai faiss-cpu pandas

In [None]:
import os
import pandas as pd
import numpy as np
import faiss
from openai import OpenAI
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import StringType

# Retrieve API Key from Secrets
os.environ['OPENAI_API_KEY'] = dbutils.secrets.get("solution-accelerator-cicd", "openai_api")

client = OpenAI()
EMBEDDING_MODEL = "text-embedding-3-small"
LLM_MODEL = "gpt-5"

In [None]:
# Create Local Documents
doc_dir = "/documents"
if not os.path.exists(doc_dir):
    os.makedirs(doc_dir)

agents = {
    "jett.txt": "Jett is a duelist. Abilities: Tailwind, Cloudburst.",
    "sage.txt": "Sage is a sentinel. Abilities: Healing Orb, Resurrection.",
    "sova.txt": "Sova is an initiator. Abilities: Recon Bolt, Owl Drone.",
    "phoenix.txt": "Phoenix is a duelist. Abilities: Curveball, Blaze.",
    "viper.txt": "Viper is a controller. Abilities: Toxic Screen, Poison Cloud."
}

for filename, content in agents.items():
    with open(os.path.join(doc_dir, filename), "w") as f:
        f.write(content)

In [None]:
# Indexing with FAISS
documents = [open(os.path.join(doc_dir, f)).read() for f in os.listdir(doc_dir)]

def get_emb(texts):
    res = client.embeddings.create(input=texts, model=EMBEDDING_MODEL)
    return np.array([d.embedding for d in res.data]).astype('float32')

embeddings = get_emb(documents)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

In [None]:
# Parallel RAG via Spark
queries = ["Jett abilities?", "Sage role?", "Sova tools?", "Phoenix flash?", "Viper gas?"]
df_q = spark.createDataFrame(pd.DataFrame(queries, columns=["question"]))

def ask_gpt(q):
    vec = get_emb([q])
    _, idx = index.search(vec, 1)
    ctx = documents[idx[0][0]]
    
    ans = client.chat.completions.create(
        model=LLM_MODEL,
        messages=[
            {"role": "system", "content": "Use context to answer concisely."},
            {"role": "user", "content": f"Context: {ctx}\nQuestion: {q}"}
        ]
    )
    return ans.choices[0].message.content

@pandas_udf(StringType())
def parallel_rag(s: pd.Series) -> pd.Series:
    return s.apply(ask_gpt)

display(df_q.withColumn("gpt_5_answer", parallel_rag(df_q["question"])))