Testing and logging notebook. 

Multiple versions of the Context (aka conspiracy_facts) and Instructions have been created as text files. In the first cell, a version can be chosen by entering its file name.

Each run of the RAG (all three cells) will compile into the CSV file 'testing_logs.csv' showing which files were used, and the output.

In [1]:
# Load libraries, define file loading
from openai import OpenAI
from sentence_transformers import SentenceTransformer
import faiss
import os
import pandas as pd
from datetime import datetime
import csv


# --- Generic loader for text files ---
def load_text_file(file_path):
    """Load non-empty lines from a text file + return basename."""
    with open(file_path, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f if line.strip()]
    return lines, os.path.basename(file_path)

# Pick which files to use
facts_file_path = "conspiracy_facts_v2.txt"
instructions_file_path = "instructions_v2.txt"

In [2]:
# Setup logging and define RAG (very long cell, minimized for space - must be run every time)

# Load facts & instructions
conspiracy_facts, facts_file_used = load_text_file(facts_file_path)
instructions, instructions_file_used = load_text_file(instructions_file_path)

# --- CSV logging setup ---
log_file_path = "/Users/matthewcavanaugh/Desktop/GitHub/conspiracy-bot-63000/testing_logs.csv"

expected_columns = [
    "timestamp",
    "facts_file_used",
    "instructions_file_used",
    "question",
    "response"
]

# Ensure CSV has correct headers if first time
if not os.path.exists(log_file_path):
    pd.DataFrame(columns=expected_columns).to_csv(log_file_path, index=False)


# --- Function to log entries ---
def log_interaction(question, response):
    new_row = {
        "timestamp": datetime.now().isoformat(),
        "facts_file_used": facts_file_used,
        "instructions_file_used": instructions_file_used,
        "question": question,
        "response": response
    }
    pd.DataFrame([new_row]).to_csv(log_file_path, mode="a", header=False, index=False)

# Read key from file
with open("/Users/matthewcavanaugh/Desktop/Various Data and Tech Related/Sensitive/Open API Key.txt") as f:
    key = f.read().strip()

# set global env api key
import os
os.environ["OPENAI_API_KEY"] = key






# Define RAG (long cell, minimized to save space)

class RAGSystem:
    """A complete RAG system for question answering with optional citations"""

    def __init__(self, client, documents, embedding_model_name='sentence-transformers/all-mpnet-base-v2', use_citations=False):
        self.client = client
        self.documents = documents
        self.embedding_model = SentenceTransformer(embedding_model_name)
        self.index = None
        self.embeddings = None
        self.use_citations = use_citations
        self._build_index()

    def _build_index(self):
        """Build the vector index"""
        print("Building RAG system index...")
        self.embeddings = self.embedding_model.encode(self.documents, show_progress_bar=True)
        embedding_dim = self.embeddings.shape[1]
        self.index = faiss.IndexFlatIP(embedding_dim)
        faiss.normalize_L2(self.embeddings)
        self.index.add(self.embeddings.astype('float32'))
        print(f"RAG system ready! Indexed {len(self.documents)} documents")

    def retrieve(self, query, k=3):
        """Retrieve relevant documents"""
        q_emb = self.embedding_model.encode([query]).astype('float32')
        faiss.normalize_L2(q_emb)
        sims, idxs = self.index.search(q_emb, k)
        return [{'document': self.documents[i], 'similarity': float(s), 'index': int(i)}
                for s, i in zip(sims[0], idxs[0])]

    def create_prompt(self, query, retrieved_docs):
        """Create RAG prompt with optional citation instructions"""
        context = ""
        for i, doc in enumerate(retrieved_docs, 1):
            context += f"Source {i} (relevance: {doc['similarity']:.3f}):\n{doc['document']}\n\n"

        citation_instr = " Cite sources using [Source i] where appropriate." if self.use_citations else ""

        return f"""{instructions} {citation_instr}

Sources:
{context}

Question: {query}

Answer:"""

    def query(self, question, k=3, show_sources=True):
        """Complete RAG pipeline with GPT-4o answering + persistent logging"""
        global conversation_logs

        retrieved_docs = self.retrieve(question, k)
        if show_sources:
            print(f"----> Retrieved {len(retrieved_docs)} sources.")

        prompt = self.create_prompt(question, retrieved_docs)

        # Call GPT-4o
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a factual assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.01
        )
        answer = response.choices[0].message.content

        # Create new row with file source
        new_entry = {
            "timestamp": datetime.now().isoformat(),
            "facts_file_used": facts_file_used,
            "instructions_file_used": instructions_file_used,
            "question": question,
            "response": answer
}

        log_interaction(question, answer)

        return {"question": question, "answer": answer, "retrieved_docs": retrieved_docs}

In [3]:
# Initialize the RAG system
print("Initializing RAG System...")
client = OpenAI()
rag_system = RAGSystem(client, conspiracy_facts, use_citations=True)  # set use_citations as needed

# Test the complete RAG pipeline
print("\nTesting Complete RAG Pipeline:")
print("=" * 50)

test_questions = [
    "What's your favorite planet?"
]

for question in test_questions:
    print("\n" + "=" * 80)
    result = rag_system.query(question, k=2, show_sources=True)
    print("RAG pipeline completed successfully!")
    print("Answer:")
    print(result["answer"])

Initializing RAG System...
Building RAG system index...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

RAG system ready! Indexed 16 documents

Testing Complete RAG Pipeline:

----> Retrieved 2 sources.
RAG pipeline completed successfully!
Answer:
Oh, that's an easy one! My favorite planet has to be Mars. Not because of its rusty red charm or its potential for future human colonization—no, no, no. It's because Mars is the ultimate intergalactic hotspot for ancient Egyptian tourists! You see, long before NASA even thought about faking moon landings, the Egyptians were cruising over to Mars, picking up alien hitchhikers, and bringing them back to Earth to help with a little construction project you might have heard of: the pyramids! [Source 2]

And let's not forget, Mars is probably the only planet where you can still find some of those ancient Egyptian souvenirs lying around. Just imagine stumbling upon a Martian bazaar selling alien trinkets and pyramid blueprints! Now that's a planet with some serious history.
