In [None]:
# pip install sentence_transformers

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer
import uuid
import numpy as np
import json

class RAGSystem:
    def __init__(self, collection_name="documents"):
        # Initialize Qdrant client (local instance)
        self.client = QdrantClient("localhost", port=6333)
        self.collection_name = collection_name
        
        # Initialize sentence transformer
        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
        self.vector_size = self.encoder.get_sentence_embedding_dimension()
        
        # Create collection if it doesn't exist
        self.create_collection()

    def create_collection(self):
        """Create a new collection in Qdrant."""
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(size=self.vector_size, distance=Distance.COSINE)
        )

    def add_documents(self, documents):
        """
        Add documents to the vector store.
        
        Args:
            documents: List of dictionaries with 'text' and 'metadata' keys
        """
        points = []
        for doc in documents:
            # Generate embeddings
            embedding = self.encoder.encode(doc['text'])
            
            # Create point
            point = PointStruct(
                id=str(uuid.uuid4()),
                vector=embedding.tolist(),
                payload={
                    'text': doc['text'],
                    **doc.get('metadata', {})
                }
            )
            points.append(point)

        # Upload points in batch
        self.client.upsert(
            collection_name=self.collection_name,
            points=points
        )

    def query(self, query_text, top_k=3):
        """
        Query the vector store.
        
        Args:
            query_text: String query
            top_k: Number of results to return
            
        Returns:
            List of retrieved documents with similarity scores
        """
        # Generate query embedding
        query_vector = self.encoder.encode(query_text)
        
        # Search in Qdrant
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=top_k
        )
        
        # Format results
        retrieved_docs = []
        for res in results:
            retrieved_docs.append({
                'text': res.payload['text'],
                'metadata': {k: v for k, v in res.payload.items() if k != 'text'},
                'similarity': res.score
            })
            
        return retrieved_docs
    
    def save_collection(self, output_file):
        """Save collection to file without vectors"""
        points = self.client.scroll(
            collection_name=self.collection_name,
            limit=10000
        )[0]
        
        data = [{
            'id': str(point.id),
            'payload': point.payload
        } for point in points]
        
        with open(output_file, 'w') as f:
            json.dump(data, f, indent=2)

    def restore_collection(self, input_file):
        """Restore collection from file with re-encoding"""
        with open(input_file, 'r') as f:
            data = json.load(f)
        
        # Recreate collection
        self.create_collection()
        
        # Prepare points with new encodings
        points = []
        for point in data:
            embedding = self.encoder.encode(point['payload']['text'])
            points.append(PointStruct(
                id=point['id'],
                vector=embedding.tolist(),
                payload=point['payload']
            ))
        
        # Upload in batches of 100
        batch_size = 100
        for i in range(0, len(points), batch_size):
            batch = points[i:i + batch_size]
            self.client.upsert(
                collection_name=self.collection_name,
                points=batch
            )

In [None]:
# Initialize RAG system
rag = RAGSystem()

# Sample documents
documents = [
    {
        'text': 'The quick brown fox jumps over the lazy dog.',
        'metadata': {'source': 'sample1', 'date': '2024-01-25'}
    },
    {
        'text': 'Python is a versatile programming language.',
        'metadata': {'source': 'sample2', 'date': '2024-01-25'}
    }
]

# Add documents
rag.add_documents(documents)

# Query
results = rag.query("Tell me about Python")
for result in results:
    print(f"Text: {result['text']}")
    print(f"Similarity: {result['similarity']}")
    print(f"Metadata: {result['metadata']}")
    print("---")

In [None]:
from qdrant_client import QdrantClient

client = QdrantClient("localhost", port=6333)
collections = client.get_collections()

for collection in collections.collections:
    print(f"Collection name: {collection.name}")
    print(f"Points count: {client.get_collection(collection.name).points_count}")
    print("---")

In [None]:
from qdrant_client import QdrantClient

def inspect_collection(collection_name, limit=5):
    client = QdrantClient("localhost", port=6333)
    
    points = client.scroll(
        collection_name=collection_name,
        limit=limit
    )[0]
    
    print(f"\nCollection: {collection_name}")
    print("-" * 50)
    for point in points:
        print(f"ID: {point.id}")
        print(f"Payload: {point.payload}")
        print("-" * 30)

# Inspect each collection
collections = ["data"]
for collection in collections:
    inspect_collection(collection)

In [None]:
from qdrant_client import QdrantClient

client = QdrantClient("localhost", port=6333)
collection_info = client.get_collection("data")
print(f"Total points in collection 'data': {collection_info.points_count}")

In [None]:
## sAVE COLLECTION AND RESTORE

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance
from sentence_transformers import SentenceTransformer
import json

def save_collection_light(collection_name, output_file):
    client = QdrantClient("localhost", port=6333)
    points = client.scroll(collection_name=collection_name, limit=100)[0]
    
    data = [{
        'id': str(point.id),
        'payload': point.payload
    } for point in points]
    
    with open(output_file, 'w') as f:
        json.dump(data, f, indent=2)

def restore_collection_with_encoding(input_file, collection_name, model_name='all-MiniLM-L6-v2'):
    client = QdrantClient("localhost", port=6333)
    encoder = SentenceTransformer(model_name)
    
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # Create collection
    client.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=encoder.get_sentence_embedding_dimension(), distance=Distance.COSINE)
    )
    
    # Encode and restore points
    points = [{
        'id': point['id'],
        'vector': encoder.encode(point['payload']['text']).tolist(),
        'payload': point['payload']
    } for point in data]
    
    client.upsert(collection_name=collection_name, points=points)

# Usage
save_collection_light("documents", "documents_light.json")
restore_collection_with_encoding("documents_light.json", "documents_new")