In [7]:
#Uncomment to install libraries
#pip install openai langchain-openai faiss-cpu pypdf tiktoken langchain-community


# Import required libraries


In [1]:
import os
import openai
import faiss
import pickle
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI

# Set API Key
Set the OpenAI API key using an environment variable or directly in the code.

In [2]:
#set the openai api key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Test Connection
# Uncomment the following code to test the connection


#response = openai.chat.completions.create(
#    model="gpt-4o-mini",
#    messages=[{"role": "user", "content": "Say this is a test"}],
#    stream=True,
#)

#print(response, "\n")

# Create the vector
TThe workflow consists of loading planetary data from PDFs, storing and retrieving information using a FAISS vector database, and generating AI-powered responses.

In [3]:

# Load and process planetary data from PDFs
def load_pdfs(pdf_paths):
    all_texts = []
    for pdf_path in pdf_paths:
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        all_texts.extend(pages)
    return all_texts

#Split text into chunks for better retrieval
def split_text(documents, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(documents)

# Convert text into vector embeddings and store in FAISS
def create_faiss_index(texts):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = FAISS.from_documents(texts, embeddings)
    vectorstore.save_local("vectorstore")

# Load FAISS index from file (if exists)
def load_faiss_index():
        return  FAISS.load_local("vectorstore", OpenAIEmbeddings(model="text-embedding-3-small"), allow_dangerous_deserialization=True)

#Retrieve planetary context
def retrieve_context(query, vector_store, top_k=3):
    retriever = vector_store.similarity_search(query, top_k)
    return "\n".join([doc.page_content for doc in retriever])


In [4]:
#Retrieve do context
def retrieve_context(query, vector_store, top_k=1):
    retriever = vector_store.similarity_search(query, top_k)
    print( "\n\n".join((f"Source: {doc.metadata}\n") for doc in retriever))
    return "\n".join([doc.page_content for doc in retriever])

# Generate response using OpenAI GPT-4
def generate_response(query):
    vector_store = load_faiss_index()
    if not vector_store:
        return "Archives unavailable. Please process database first."

    context = retrieve_context(query, vector_store)

    if not context:
        return "No records found for your query."

    prompt = f"""
    You are a company assistent, and you have to ansfer the following questions based on the information you have in the company's archives.
    Context:
    {context}
    Question:
    {query}
    Answer:
    """
    response = openai.chat.completions.create(
        model="gpt-4o-mini",

        messages=[{"role": "system", "content": "You are a Starfleet AI assistant."},
                  {"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


# Scenario execution
The API is called with and without RAG for exemplification

In [5]:


# Implement RAG
pdf_files = ["pdf_files/Short_guide_EU.pdf"]  
try:
    print("Processing documents...")
    raw_docs = load_pdfs(pdf_files)
    split_docs = split_text(raw_docs)
    create_faiss_index(split_docs)
except Exception as e:
    print("Cannot process documents...")


Processing documents...


In [6]:
query = "Give me the population of Hungary, Romania and Greece?"
# AI response with RAG
response_with_rag = generate_response(query)
print("\nAI response:", response_with_rag)


Source: {'source': 'pdf_files/Short_guide_EU.pdf', 'page': 8, 'page_label': '9'}


AI response: The populations are as follows:
- Hungary: 9.6 million
- Romania: 19.1 million
- Greece: 10.4 million
