In [3]:
# testing_notebook.ipynb

# --- Cell 1: Imports and Setup ---
import os
from dotenv import load_dotenv
import warnings

# LangChain Imports
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate


print("Libraries imported successfully.")


Libraries imported successfully.


In [5]:

load_dotenv() 

# --- Configuration ---
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
MODEL_NAME = "deepseek/deepseek-chat"
EMBEDDING_MODEL = "text-embedding-ada-002" # Standard, reliable embedding model

# --- Verification ---
if not OPENROUTER_API_KEY:
    print("❌ ERROR: OPENROUTER_API_KEY not found. Make sure your .env file is correct.")
else:
    print("✅ API Key loaded successfully.")
    # For security, let's just show the first few and last few characters
    print(f"   API Key Preview: {OPENROUTER_API_KEY[:5]}...{OPENROUTER_API_KEY[-4:]}")

✅ API Key loaded successfully.
   API Key Preview: sk-or...c4ea


In [6]:
# --- Cell 3: Document Loading ---

DOCUMENT_PATH = "./documents/project_quasar_brief.txt"

try:
    loader = TextLoader(DOCUMENT_PATH)
    docs = loader.load()
    print("✅ Document loaded successfully.")
    print(f"   - Number of documents loaded: {len(docs)}")
    print(f"   - Type of a document: {type(docs[0])}")
    print("\n--- Document Content Preview ---")
    print(docs[0].page_content[:300] + "...") # Print the first 300 characters
except Exception as e:
    print(f"❌ ERROR: Failed to load document. Check the path: {DOCUMENT_PATH}")
    print(f"   Details: {e}")

✅ Document loaded successfully.
   - Number of documents loaded: 1
   - Type of a document: <class 'langchain_core.documents.base.Document'>

--- Document Content Preview ---
**Project Quasar: Internal Project Brief**
**Document ID:** P-QSR-2025-01
**Date:** September 22, 2025
**Author:** Dr. Evelyn Reed

---

**1. Project Overview**
Project Quasar is a strategic initiative by InnovateForward Inc. to develop a next-generation, decentralized data analytics platform. The p...


In [7]:
# --- Cell 4: Document Splitting (Chunking) ---

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = text_splitter.split_documents(docs)

print("✅ Document split into chunks successfully.")
print(f"   - Original number of documents: {len(docs)}")
print(f"   - Number of chunks created: {len(split_docs)}")
print("\n--- Preview of the first chunk ---")
print(split_docs[0].page_content)

✅ Document split into chunks successfully.
   - Original number of documents: 1
   - Number of chunks created: 5

--- Preview of the first chunk ---
**Project Quasar: Internal Project Brief**
**Document ID:** P-QSR-2025-01
**Date:** September 22, 2025
**Author:** Dr. Evelyn Reed

---


In [23]:
# --- Cell 5 (New): Create a Custom, Reliable Embeddings Class ---
import requests
import json
from langchain_core.embeddings import Embeddings
from typing import List

# This class is our custom-built bridge to OpenRouter's embedding API.
class OpenRouterEmbeddings(Embeddings):
    def __init__(self, model: str = "text-embedding-ada-002", api_key: str = None):
        if not api_key:
            raise ValueError("OpenRouter API key must be provided.")
        self.model = model
        self.api_key = api_key
        self.api_url = "https://openrouter.ai/api/v1/embeddings"
        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }

    def _get_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Helper function to call the API."""
        response = requests.post(
            self.api_url,
            headers=self.headers,
            data=json.dumps({"model": self.model, "input": texts})
        )
        # Raise an error if the API call was unsuccessful
        response.raise_for_status() 
        
        response_data = response.json()
        
        # Extract the embedding vectors from the response
        return [item['embedding'] for item in response_data['data']]

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """The method LangChain's FAISS will call for a batch of documents."""
        print(f"Embedding {len(texts)} documents using direct API call...")
        return self._get_embeddings(texts)

    def embed_query(self, text: str) -> List[float]:
        """The method LangChain's retriever will call for a single query."""
        print("Embedding a single query using direct API call...")
        return self._get_embeddings([text])[0]

print("✅ Custom `OpenRouterEmbeddings` class defined successfully.")

✅ Custom `OpenRouterEmbeddings` class defined successfully.


In [24]:
# --- Cell 6 (New): Test the Custom Embeddings Class ---

# Initialize our new class
try:
    custom_embeddings = OpenRouterEmbeddings(api_key=OPENROUTER_API_KEY)
    print("✅ Custom Embeddings class initialized.")

    # Test 1: Embed a single query
    test_query_text = "This is a test query."
    query_embedding = custom_embeddings.embed_query(test_query_text)
    print(f"   - Successfully embedded a single query. Vector length: {len(query_embedding)}")

    # Test 2: Embed multiple documents
    test_docs_text = ["First test document.", "Second test document."]
    doc_embeddings = custom_embeddings.embed_documents(test_docs_text)
    print(f"   - Successfully embedded {len(doc_embeddings)} documents. First vector length: {len(doc_embeddings[0])}")
    
except Exception as e:
    print(f"❌ ERROR: Testing the custom embeddings class failed.")
    print(f"   Details: {e}")

✅ Custom Embeddings class initialized.
Embedding a single query using direct API call...
❌ ERROR: Testing the custom embeddings class failed.
   Details: Expecting value: line 1 column 1 (char 0)


In [None]:
# --- Cell 7 (New): Integrate with FAISS and Assemble the Chain ---

# 1. Initialize our custom embeddings class
embeddings = OpenRouterEmbeddings(api_key=OPENROUTER_API_KEY)
print("✅ Using our custom embeddings class.")

# 2. Create the vector store (this will now use our requests-based class)
print("\nCreating vector store with custom embedder...")
try:
    vector_store = FAISS.from_documents(split_docs, embedding=embeddings)
    print("✅ Vector store created successfully!")
except Exception as e:
    print(f"❌ ERROR: Failed to create vector store even with custom class.")
    print(f"   Details: {e}")

# 3. Initialize the LLM (DeepSeek via OpenRouter)
llm = ChatOpenAI(
    model_name=MODEL_NAME,
    openai_api_base=OPENROUTER_API_BASE,
    openai_api_key=OPENROUTER_API_KEY,
    temperature=0.3
)
print("\n✅ LLM Initialized (DeepSeek via OpenRouter)")

# 4. Create the prompt and the final chain
prompt = ChatPromptTemplate.from_template("""
Answer the user's question based only on the following context.
If the answer is not in the context, say you don't know.
Context:
{context}

Question: {input}
""")
document_chain = create_stuff_documents_chain(llm, prompt)
retriever = vector_store.as_ retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
print("✅ Retrieval Chain Assembled")

In [25]:
import requests
import json

response = requests.post(
  url="https://openrouter.ai/api/v1/chat/completions",
  headers={
    "Authorization": "Bearer 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",
    "Content-Type": "application/json",
  },
  data=json.dumps({
    "model": "deepseek/deepseek-chat-v3.1:free",
    "messages": [
      {
        "role": "user",
        "content": "What is the meaning of life?"
      }
    ],
    
  })
)

response_data = response.json()
response_data


{'id': 'gen-1758585875-SZnrpjUuUMSLE2JprrF2',
 'provider': 'DeepInfra',
 'model': 'deepseek/deepseek-chat-v3.1:free',
 'object': 'chat.completion',
 'created': 1758585875,
 'choices': [{'logprobs': None,
   'finish_reason': 'stop',
   'native_finish_reason': 'stop',
   'index': 0,
   'message': {'role': 'assistant',
    'content': 'That is one of the oldest and most profound questions humanity has ever asked. There isn\'t a single, definitive answer that satisfies everyone, because the meaning of life is deeply personal and often tied to one\'s beliefs, values, and experiences.\n\nHere’s a breakdown of how different perspectives approach the question:\n\n### 1. Philosophical Perspectives\n*   **Existentialism (e.g., Sartre, Camus):** This philosophy argues that life has no *inherent* meaning. We are "condemned to be free," and it is our individual responsibility to create our own meaning and purpose through our choices, actions, and passions. For Camus, the meaning is found in rebellin