In [128]:
#Uncomment to install libraries
#pip install openai langchain-openai faiss-cpu pypdf tiktoken langchain-community


# Import required libraries


In [129]:
import os
import openai
import faiss
import pickle
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI

# Set API Key
Set the OpenAI API key using an environment variable or directly in the code.

In [130]:
#set the openai api key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Test Connection
# Uncomment the following code to test the connection


#response = openai.chat.completions.create(
#    model="gpt-4o-mini",
#    messages=[{"role": "user", "content": "Say this is a test"}],
#    stream=True,
#)

#print(response, "\n")

# Create the vector
TThe workflow consists of loading planetary data from PDFs, storing and retrieving information using a FAISS vector database, and generating AI-powered responses.

In [131]:

# Load and process planetary data from PDFs
def load_pdfs(pdf_paths):
    all_texts = []
    for pdf_path in pdf_paths:
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        all_texts.extend(pages)
    return all_texts

#Split text into chunks for better retrieval
def split_text(documents, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(documents)

# Convert text into vector embeddings and store in FAISS
def create_faiss_index(texts):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = FAISS.from_documents(texts, embeddings)
    vectorstore.save_local("vectorstore")

# Load FAISS index from file (if exists)
def load_faiss_index():
        return  FAISS.load_local("vectorstore", OpenAIEmbeddings(model="text-embedding-3-small"), allow_dangerous_deserialization=True)

#Retrieve planetary context
def retrieve_context(query, vector_store, top_k=3):
    retriever = vector_store.similarity_search(query, top_k)
    return "\n".join([doc.page_content for doc in retriever])


In [132]:
# 🚀 Retrieve planetary context
def retrieve_context(query, vector_store, top_k=3):
    retriever = vector_store.similarity_search(query, top_k)
    return "\n".join([doc.page_content for doc in retriever])

# 🚀 Generate response using OpenAI GPT-4
def generate_starship_response(query):
    vector_store = load_faiss_index()
    if not vector_store:
        return "Starfleet Archives unavailable. Please process the planetary database first."

    context = retrieve_context(query, vector_store)
    if not context:
        return "No planetary records found for your query."

    prompt = f"""
    You are a Starfleet AI assistant aboard the USS Enterprise.
    Answer the following question using the provided planetary records:
    Context:
    {context}
    Question:
    {query}
    Answer:
    """
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "system", "content": "You are a Starfleet AI assistant."},
                  {"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


# Scenatio execution
The API is called with and without RAG for exemplification

In [133]:
#Starship drops from hyperspace and AI responds
print("The USS Enterprise drops out of hyperspace next to an Zorath planet...")

# First AI response without RAG
query = "Tell me something about Zorath planet."
response_no_rag = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "system", "content": "You are a Starfleet AI assistant."},
              {"role": "user", "content": query}]
)

print("AI (without RAG):", response_no_rag.choices[0].message.content)

# Implement RAG
pdf_files = ["rag_pdf_data/zorath_planet_info.pdf"]  
try:
    print("Processing Starfleet planetary archives...")
    raw_docs = load_pdfs(pdf_files)
    split_docs = split_text(raw_docs)
    create_faiss_index(split_docs)
except Exception as e:
    print("Cannot load planetary database...")

# AI response with RAG
response_with_rag = generate_starship_response(query)
print("\nAI (with RAG):", response_with_rag)

# Captain asks about habitat and population
query_habitat = "What is the habitat and population of Zorath planet?"
response_habitat = generate_starship_response(query_habitat)
print("\nCaptain: What is the habitat and population of Zorath planet?")
print("AI (with RAG):", response_habitat)


The USS Enterprise drops out of hyperspace next to an Zorath planet...
AI (without RAG): Zorath is a fictional planet in the Star Trek universe, specifically associated with the Star Trek: Starfleet Command video game series. As a lesser-known location, details about Zorath may vary across different sources or may not be extensively covered in canon media. 

In the context of games or fan-made content, Zorath might be characterized by its unique environments, potential resources, and the presence of various alien species or factions. These elements often contribute to the gameplay and storytelling within that specific media.

For precise or detailed information, you may find references to Zorath within fan wikis or specific game manuals. If you're looking for information on a different "Zorath," please clarify, and I'll do my best to assist you!
Processing Starfleet planetary archives...

AI (with RAG): Zorath is a fascinating planet primarily known as the homeworld of the Klingon spec