# ðŸ“Š ContractSentinel: Exploratory Analysis

This notebook demonstrates how to interact with the **Endee Vector Database** directly for analysis and visualization of contract embeddings.

In [None]:
import sys
import os
sys.path.append(os.path.abspath('..'))

from backend.vector_db.client import EndeeClient
from backend.embeddings.model import EmbeddingModel
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

### 1. Initialize Clients

In [None]:
db_client = EndeeClient(host="localhost", port=8080)
embedder = EmbeddingModel()

print("Endee Health Check:", db_client.health_check())

### 2. Visualize Embeddings
We will search for a generic term to retrieve vectors and visualize their distribution using PCA.

In [None]:
query = "confidentiality agreement terms"
query_vector = embedder.embed_text(query)

# Search for top 50 results to get a cloud of points
# Note: In a real notebook, you might fetch all vectors if Endee API supports 'scan' or 'list'.
results = db_client.search("contracts", query_vector, limit=50)

if results:
    print(f"Found {len(results)} chunks.")
    
    # Extract scores and dummy vectors (since search response might not return raw vectors by default)
    # For visualization, let's pretend we have data or use scores as a proxy for 'distance' from query.
    scores = [r.score for r in results]
    ids = [r.chunk_id for r in results]
    
    # Plotting Scores
    plt.figure(figsize=(10, 4))
    plt.hist(scores, bins=10, color='skyblue', edgecolor='black')
    plt.title("Similarity Score Distribution")
    plt.xlabel("Cosine Similarity")
    plt.ylabel("Frequency")
    plt.show()
else:
    print("No results found. Please ingest some documents first via the API or Ingestion Script.")

### 3. Direct RAG Experiment
Test the retrieval quality without the full Agent wrapper.

In [None]:
test_query = "What is the termination period?"
print(f"Query: {test_query}\n")

q_vec = embedder.embed_text(test_query)
matches = db_client.search("contracts", q_vec, limit=3)

for i, match in enumerate(matches, 1):
    print(f"{i}. [Score: {match.score:.4f}] {match.content[:200]}...")