# Langchain_milvus를 사용한 케이스

In [3]:
pip install -qU langchain_milvus

Note: you may need to restart the kernel to use updated packages.


In [5]:
from langchain_milvus import Milvus
from langchain_openai import OpenAIEmbeddings


collection_name = "cg_code_assist"
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": "http://localhost:19530"},
    collection_name=collection_name,
    index_params={"index_type": "FLAT", "metric_type": "L2"},
)


In [20]:
from pymilvus import connections, Collection

# ✅ Milvus 연결 설정
connections.connect("default", uri="http://localhost:19530")

# ✅ 컬렉션 정보 확인
collection = Collection("cg_code_assist")
print(f"Collection Name: {collection.name}")
print("Schema:", collection.schema)


Collection Name: cg_code_assist
Schema: {'auto_id': False, 'description': 'Vector collection with string ID', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 50}, 'is_primary': True, 'auto_id': False}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 3072}}], 'enable_dynamic_field': True}


In [18]:
# collection 삭제

from pymilvus import MilvusClient

collection_name = "cg_code_assist"
client = MilvusClient(uri="http://localhost:19530")

# 컬렉션 존재 여부 확인 후 삭제 (필요하면 활성화)
if client.has_collection(collection_name):
    client.drop_collection(collection_name)



In [None]:
# 컬렉션에서 데이터 샘플 조회
results = collection.query(
    expr="id != ''",  # 모든 데이터를 조회하는 조건
    output_fields=["id", "vector"],  # 조회할 필드 지정
    limit=100  # 상위 5개 데이터만 가져옴
)

for result in results:
    print(result)


{'id': '0082321f-8919-4369-9317-ddf354770f65', 'vector': [np.float32(0.016090011), np.float32(-0.02504487), np.float32(-0.02118199), np.float32(0.040033482), np.float32(0.028971598), np.float32(0.026736874), np.float32(-0.017702205), np.float32(0.019218626), np.float32(0.002669697), np.float32(-0.0024621869), np.float32(0.034287047), np.float32(0.036330223), np.float32(0.0048325905), np.float32(-0.035340562), np.float32(-0.02429464), np.float32(0.020383874), np.float32(-0.051015552), np.float32(-0.003497742), np.float32(-0.02878005), np.float32(0.010583013), np.float32(-0.00029929337), np.float32(-0.012690038), np.float32(-0.00014964669), np.float32(0.035564035), np.float32(0.028732164), np.float32(0.021277763), np.float32(0.009026688), np.float32(0.0055469037), np.float32(0.01864398), np.float32(0.019761343), np.float32(-0.046833426), np.float32(0.0025000973), np.float32(0.02925892), np.float32(-0.0032922272), np.float32(-0.017191412), np.float32(0.018404547), np.float32(0.042140506),

# pymilvus를 사용한 케이스

In [None]:
pip install "pymilvus[model]"


In [2]:
from pymilvus import MilvusClient

# client = MilvusClient("milvus_demo.db") # sqlite
client = MilvusClient("http://localhost:19530")


In [None]:
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
    
client.create_collection(
    collection_name="demo_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)


In [4]:
from pymilvus import model


embedding_fn = model.DefaultEmbeddingFunction()

docs = [
    "이성욱은 AI 담당자이다.",
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = embedding_fn.encode_documents(docs)
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))


  from .autonotebook import tqdm as notebook_tqdm


Dim: 768 (768,)
Data has 4 entities, each with fields:  dict_keys(['id', 'vector', 'text', 'subject'])
Vector dim: 768


In [5]:
res = client.insert(collection_name="demo_collection", data=data)

print(res)


{'insert_count': 4, 'ids': [0, 1, 2, 3]}


In [8]:
query_vectors = embedding_fn.encode_queries(["Who is Alan Turing?"])

res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)

print(res)


data: ["[{'id': 3, 'distance': 0.5859944820404053, 'entity': {'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}}, {'id': 2, 'distance': 0.5118255615234375, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]"]


In [11]:
from pymilvus import connections, Collection

# Milvus 서버 연결
connections.connect(
    alias="default",
    host="localhost",  # Milvus 서버 주소
    port="19530"       # Milvus 기본 포트 (클러스터 환경에서는 변경 가능)
)

# Collection 객체 가져오기
collection = Collection("demo_collection")

# Flush 실행 (Growing Segment → Sealed Segment로 변환)
collection.flush()

print("Flush completed successfully!")


Flush completed successfully!
