---
title: "vDB관련 샘플 코드들"
date: "2025-07-25"
category: "Data Science"
tags: ["VectorDB", "Qdrant", "ChromaDB", "FAISS", "Milvus"]
excerpt: "vDB관련 샘플 코드들"
---

In [None]:
# Qdrant 파이썬 클라이언트 설치 필요
# pip install qdrant-client

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import numpy as np

# 1. Qdrant 서버에 연결 (로컬 서버 예시)
client = QdrantClient(host="localhost", port=6333)

# 2. 컬렉션 생성 (벡터 차원 1536, cosine 거리)
client.recreate_collection(
    collection_name="my_collection",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

# 3. 벡터 데이터 삽입
vectors = np.random.rand(3, 1536).tolist()  # 3개 벡터, 1536차원
payloads = [
    {"doc_id": "A1", "text": "문서1"},
    {"doc_id": "A2", "text": "문서2"},
    {"doc_id": "A3", "text": "문서3"},
]
points = [
    PointStruct(id=i, vector=vectors[i], payload=payloads[i])
    for i in range(3)
]
client.upsert(collection_name="my_collection", points=points)

# 4. 유사도 검색 (임의 쿼리 벡터)
query_vector = np.random.rand(1536).tolist()
results = client.search(
    collection_name="my_collection",
    query_vector=query_vector,
    limit=2,
)
for hit in results:
    print(f"ID: {hit.id}, Score: {hit.score}, Payload: {hit.payload}")

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import numpy as np

client = QdrantClient(host="localhost", port=6333)

# 회사A 컬렉션 생성
client.recreate_collection(
    collection_name="companyA_docs",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

# 회사B 컬렉션 생성
client.recreate_collection(
    collection_name="companyB_docs",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

# 회사A 데이터 삽입
vectors_a = np.random.rand(2, 1536).tolist()
payloads_a = [{"doc_id": "A1"}, {"doc_id": "A2"}]
points_a = [PointStruct(id=i, vector=vectors_a[i], payload=payloads_a[i]) for i in range(2)]
client.upsert(collection_name="companyA_docs", points=points_a)

# 회사B 데이터 삽입
vectors_b = np.random.rand(2, 1536).tolist()
payloads_b = [{"doc_id": "B1"}, {"doc_id": "B2"}]
points_b = [PointStruct(id=i, vector=vectors_b[i], payload=payloads_b[i]) for i in range(2)]
client.upsert(collection_name="companyB_docs", points=points_b)

# 회사A 데이터만 검색
query_vector = np.random.rand(1536).tolist()
results = client.search(
    collection_name="companyA_docs",
    query_vector=query_vector,
    limit=1,
)
for hit in results:
    print(f"회사A 결과: {hit.payload}")

# 회사B 데이터만 검색
results = client.search(
    collection_name="companyB_docs",
    query_vector=query_vector,
    limit=1,
)
for hit in results:
    print(f"회사B 결과: {hit.payload}")

# Multi tenancy 를 잘 구현하는 법

In [None]:
# 1. 컬렉션 네이밍 컨벤션

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import numpy as np

client = QdrantClient(host="localhost", port=6333)

# 테넌트별 컬렉션 네이밍 패턴
def get_collection_name(tenant_id: str, data_type: str = "docs") -> str:
    """테넌트별 컬렉션 이름 생성"""
    return f"{tenant_id}_{data_type}"

# 예시: 회사별 컬렉션 생성
tenants = ["company_a", "company_b", "company_c"]
data_types = ["docs", "images", "products"]

for tenant in tenants:
    for data_type in data_types:
        collection_name = get_collection_name(tenant, data_type)
        client.recreate_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
        )
        print(f"Created collection: {collection_name}")

In [None]:
# 2. 멀티 테넌시 매니저 클래스

class QdrantMultiTenantManager:
    def __init__(self, client: QdrantClient):
        self.client = client
        self.tenant_configs = {
            "company_a": {
                "collections": ["docs", "images"],
                "vector_size": 1536,
                "distance": Distance.COSINE
            },
            "company_b": {
                "collections": ["docs", "products"],
                "vector_size": 1536,
                "distance": Distance.COSINE
            }
        }
    
    def get_collection_name(self, tenant_id: str, collection_type: str) -> str:
        """테넌트별 컬렉션 이름 생성"""
        return f"{tenant_id}_{collection_type}"
    
    def ensure_collection_exists(self, tenant_id: str, collection_type: str):
        """컬렉션이 없으면 생성"""
        if tenant_id not in self.tenant_configs:
            raise ValueError(f"Unknown tenant: {tenant_id}")
        
        collection_name = self.get_collection_name(tenant_id, collection_type)
        config = self.tenant_configs[tenant_id]
        
        # 컬렉션 존재 여부 확인
        collections = self.client.get_collections()
        collection_names = [c.name for c in collections.collections]
        
        if collection_name not in collection_names:
            self.client.create_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(
                    size=config["vector_size"], 
                    distance=config["distance"]
                ),
            )
            print(f"Created collection: {collection_name}")
    
    def upsert_for_tenant(self, tenant_id: str, collection_type: str, 
                         vectors, payloads, ids):
        """테넌트별 데이터 삽입"""
        self.ensure_collection_exists(tenant_id, collection_type)
        collection_name = self.get_collection_name(tenant_id, collection_type)
        
        points = [
            PointStruct(id=id_, vector=vector, payload=payload)
            for id_, vector, payload in zip(ids, vectors, payloads)
        ]
        
        return self.client.upsert(
            collection_name=collection_name,
            points=points
        )
    
    def search_for_tenant(self, tenant_id: str, collection_type: str,
                         query_vector, limit=10, **kwargs):
        """테넌트별 검색"""
        collection_name = self.get_collection_name(tenant_id, collection_type)
        
        return self.client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=limit,
            **kwargs
        )
    
    def get_tenant_collections(self, tenant_id: str):
        """테넌트의 모든 컬렉션 조회"""
        collections = self.client.get_collections()
        tenant_prefix = f"{tenant_id}_"
        
        return [
            c.name for c in collections.collections 
            if c.name.startswith(tenant_prefix)
        ]
    
    def delete_tenant_data(self, tenant_id: str):
        """테넌트의 모든 데이터 삭제"""
        collections = self.get_tenant_collections(tenant_id)
        
        for collection_name in collections:
            self.client.delete_collection(collection_name)
            print(f"Deleted collection: {collection_name}")

In [None]:
# 3. 실제 사용 예시

# 매니저 초기화
manager = QdrantMultiTenantManager(client)

# 회사A에 문서 데이터 삽입
vectors_a = np.random.rand(3, 1536).tolist()
payloads_a = [
    {"doc_id": "A1", "title": "회사A 문서1", "tenant": "company_a"},
    {"doc_id": "A2", "title": "회사A 문서2", "tenant": "company_a"},
    {"doc_id": "A3", "title": "회사A 문서3", "tenant": "company_a"}
]
ids_a = ["A1", "A2", "A3"]

manager.upsert_for_tenant("company_a", "docs", vectors_a, payloads_a, ids_a)

# 회사B에 제품 데이터 삽입
vectors_b = np.random.rand(2, 1536).tolist()
payloads_b = [
    {"product_id": "B1", "name": "회사B 제품1", "tenant": "company_b"},
    {"product_id": "B2", "name": "회사B 제품2", "tenant": "company_b"}
]
ids_b = ["B1", "B2"]

manager.upsert_for_tenant("company_b", "products", vectors_b, payloads_b, ids_b)

# 테넌트별 검색
query_vector = np.random.rand(1536).tolist()

# 회사A 문서 검색
results_a = manager.search_for_tenant("company_a", "docs", query_vector)
print("회사A 검색 결과:", [hit.payload for hit in results_a])

# 회사B 제품 검색
results_b = manager.search_for_tenant("company_b", "products", query_vector)
print("회사B 검색 결과:", [hit.payload for hit in results_b])

In [None]:
# 백업 복구
import json
import os

class TenantBackup:
    def __init__(self, manager: QdrantMultiTenantManager):
        self.manager = manager
    
    def backup_tenant(self, tenant_id: str, backup_path: str):
        """테넌트별 백업"""
        collections = self.manager.get_tenant_collections(tenant_id)
        
        for collection_name in collections:
            # 컬렉션 데이터 내보내기
            points = self.manager.client.scroll(
                collection_name=collection_name,
                limit=10000  # 적절한 배치 크기
            )[0]
            
            # 백업 파일에 저장
            backup_file = f"{backup_path}/{collection_name}_backup.json"
            with open(backup_file, 'w') as f:
                json.dump(points, f, indent=2)
            
            print(f"Backed up {collection_name} to {backup_file}")
    
    def restore_tenant(self, tenant_id: str, backup_path: str):
        """테넌트별 복구"""
        collections = self.manager.get_tenant_collections(tenant_id)
        
        for collection_name in collections:
            backup_file = f"{backup_path}/{collection_name}_backup.json"
            
            if os.path.exists(backup_file):
                with open(backup_file, 'r') as f:
                    points_data = json.load(f)
                
                # 데이터 복구
                self.manager.client.upsert(
                    collection_name=collection_name,
                    points=points_data
                )
                
                print(f"Restored {collection_name} from {backup_file}")