In [3]:
# connection
from pymilvus import connections


connections.connect("default", host="localhost", port="27017")
connections.disconnect("default")
# connections.remove_connection("default")
connections.connect()


In [6]:
# database
from pymilvus import  db


database = db.create_database("star")

db.using_database("star") # 切换数据库
db.list_database()
# db.drop_database("book")


['default', 'math', 'book', 'star']

In [18]:
# collection
from pymilvus import Collection, CollectionSchema, FieldSchema, DataType, utility

## 需要提前创建列的名称、类型等数据，并且必须添加一个主键
book_id = FieldSchema(
  name="book_id",
  dtype=DataType.INT64,
  is_primary=True,
)
book_name = FieldSchema(
  name="book_name",
  dtype=DataType.VARCHAR,
  max_length=200,
  # The default value will be used if this field is left empty during data inserts or upserts.
  # The data type of `default_value` must be the same as that specified in `dtype`.
  default_value="Unknown"
)
word_count = FieldSchema(
  name="word_count",
  dtype=DataType.INT64,
  # The default value will be used if this field is left empty during data inserts or upserts.
  # The data type of `default_value` must be the same as that specified in `dtype`.
  default_value=9999
)
book_intro = FieldSchema(
  name="book_intro",
  dtype=DataType.FLOAT_VECTOR,
  dim=2
)
# dim=2是向量的维度

schema = CollectionSchema(
  fields=[book_id, book_name, word_count, book_intro],
  description="Test book search",
  enable_dynamic_field=True
)


collection_name = "book"

collection = Collection(
    name=collection_name,
    schema=schema,
    using='default',
    shards_num=2
    )

utility.rename_collection("book", "lights4") 
utility.has_collection("lights1")
utility.list_collections()
# utility.drop_collection("lights")

collection = Collection("lights3")      
collection.load(replica_number=2)
# reduce memory usage
collection.release()

['light', 'lights4', 'lights', 'lights2', 'lights3', 'lights5']

In [None]:
# Create a Partition

collection = Collection("book")      # Get an existing collection.
collection.create_partition("novel")

In [None]:
# Insert data and delete
import pandas as pd
import numpy as np

insert_data = pd.read_csv("<Your_File>")
mr = collection.insert(insert_data)

expr = "book_id in [0,1]"
collection.delete(expr)

collection.compact()

In [None]:
# Index
index_params = {
  "metric_type":"L2",
  "index_type":"IVF_FLAT",
  "params":{"nlist":1024}
}

collection.create_index(
  field_name="book_intro", 
  index_params=index_params
)

## metric_type是相似性计算算法，可选的有以下
## For floating point vectors:
## L2 (Euclidean distance)
## IP (Inner product)
## COSINE (Cosine similarity)
## For binary vectors:
## JACCARD (Jaccard distance)
## HAMMING (Hamming distance)
utility.index_building_progress("<Your_Collection>")

In [None]:
# search
search_params = {
    "metric_type": "L2", 
    "offset": 5, 
    "ignore_growing": False, 
    "params": {"nprobe": 10}
}

results = collection.search(
    data=[[0.1, 0.2]], 
    anns_field="book_intro", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
    # 这里需要将想看的列名列举出来
    output_fields=['title'],
    consistency_level="Strong"
)

# get the IDs of all returned hits
results[0].ids

# get the distances to the query vector from all returned hits
results[0].distances

# get the value of an output field specified in the search request.
hit = results[0][0]
hit.entity.get('title')