# Цель занятия - разобраться в векторной базе milvus.
# В качестве векторов будем использовать векторные представления пользователей и айтемов, полученные с помощью обучения модели машинного обучения (например разложение матрицы user-item)

In [1]:
import pandas as pd

from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
from lightfm.data import Dataset
import numpy as np

import random

from pymilvus import (
    connections,
    FieldSchema, CollectionSchema, DataType,
    Collection,
    db,
)
from pymilvus.orm import utility
import time
from scipy import stats



## Данные из movie lens

In [2]:
# выкачиваем данные
movies = pd.read_csv("ml-latest-small/movies.csv")
tags = pd.read_csv("ml-latest-small/tags.csv")
ratings = pd.read_csv("ml-latest-small/ratings.csv")

In [3]:
ratings["label"] = ratings["rating"] == 5.0

In [4]:
dataset = Dataset()
dataset.fit(users=ratings["userId"].to_list(),       
            items=ratings["movieId"].to_list())


In [5]:
(interactions, weights)  = dataset.build_interactions(ratings[["userId", "movieId", "label"]].iloc[:, 0:3].values)

In [6]:
mapping = dataset.mapping()

In [7]:
# обучаем модель
model = LightFM(loss='warp', no_components=20, learning_rate=0.05)
model.fit(interactions=interactions, epochs=30)


<lightfm.lightfm.LightFM at 0xffff7b3cde80>

## Получаем векторы, с которыми будем далее работать

In [8]:
# получаем эмбеддинги (векторные представления) айтемов
item_embs = model.get_item_representations()[1]
item_embs = item_embs/np.linalg.norm(item_embs, ord=2, axis = 1)[:, np.newaxis]

In [9]:
# получаем эмбеддинги (векторные представления) юзеров
user_embs =  model.get_user_representations()[1]
user_embs = user_embs/np.linalg.norm(user_embs, ord=2, axis = 1)[:, np.newaxis]

## Немного проверки того, что мы получили адекватные результаты

In [10]:
reversed_mapping = {}
for i in mapping[2].keys():
    reversed_mapping[mapping[2][i]] = i

In [11]:
def getMovieName(ind):
    return movies[movies["movieId"] == reversed_mapping[ind]]["title"].to_list()[0]

In [12]:
def getGenres(ind):
    return movies[movies["movieId"] == reversed_mapping[ind]]["genres"].to_list()[0]

In [13]:
dists = item_embs.dot(item_embs[70])

In [14]:
arr = dists.argsort()[-10:][::-1]


In [15]:
getMovieName(70)

'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)'

In [16]:
[getMovieName(i) for i in arr]

['Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)',
 'Star Wars: Episode V - The Empire Strikes Back (1980)',
 'Indiana Jones and the Last Crusade (1989)',
 'Indiana Jones and the Temple of Doom (1984)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Alien (1979)',
 'Sixth Sense, The (1999)',
 'Godfather, The (1972)',
 'Aliens (1986)',
 'Die Hard (1988)']

# Начинаем работу с векторной базой

## Шаг 1: Подключаемся к базе

In [17]:
HOST = 'milvus-standalone'
PORT = '19530'
USER = "username"
USER_PASSWORD = "password"
db_name="default"

connections.connect(host=HOST,
                    port=PORT,
                    user=USER,
                    password=USER_PASSWORD,
                    db_name=db_name,
                    )


<pymilvus.orm.connections.Connections at 0xffff558ae430>

In [86]:
connections.list_connections()

[('default', None)]

In [85]:
connections.disconnect("default")

## Шаг 2: Создаем коллекцию

In [24]:
default_fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
    FieldSchema(name="name", dtype=DataType.VARCHAR, max_length=200),
    FieldSchema(name="genre", dtype=DataType.VARCHAR, max_length=200),
    FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=20)
]

In [25]:
default_schema = CollectionSchema(fields=default_fields)

In [30]:
current_collection = Collection(name="movies", schema=default_schema, using="default")

In [31]:
current_collection

<Collection>:
-------------
<name>: movies
<description>: 
<schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'name', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'genre', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'emb', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 20}}]}

In [32]:
embs_list = [i.tolist() for i in item_embs]
names_list = [getMovieName(i) for i in range(len(embs_list))]
genres_list = [getGenres(i) for i in range(len(embs_list))]

In [36]:
len(embs_list)

9724

## Шаг 3: Заливаем векторы в коллекцию

In [33]:
# шаг 3.1: insert
num = len(embs_list)
data = [
    [i for i in range(num)],
    [names_list[i] for i in range(num)],
    [genres_list[i] for i in range(num)],
    [embs_list[i] for i in range(num)],
]
current_collection.insert(data)

(insert count: 9724, delete count: 0, upsert count: 0, timestamp: 453860413920772100, success count: 9724, err count: 0)

In [56]:
# шаг 3.2: flush
current_collection.flush()

## Шаг 4: Индексируем базу для быстрого доступа к векторами

## Index Types

|Название	| Вид индекса| Примечания|
| --- | --- | --- |
|**FLAT**	| нет| Небольшой датасет, 100% полнота (рекол)|
|**IVF_FLAT**	| Квантизационный индекс	| Быстрые запросы, Рекол настолько большой насколько возможно|
|**IVF_SQ8**	| Квантизационный индекс	| Быстрые запросы, Требуется ограничнное количество памяти, Небольшие компромисы по реколу|
**IVF_PQ**	| Квантизационный индекс | Очень быстрые запросы, Требуется ограничнное количество памяти, Большие компромисы по реколу|
**HNSW**	| Графовый индекс	| Очень быстрые запросы, Рекол настолько большой насколько возможно, Большие затраты памяти|
**SCANN**	| Квантизационный индекс	 | Очень быстрые запросы, Рекол настолько большой насколько возможно, Большие затраты памяти|

Помимо указанных индексов есть еще индексы для GPU

In [34]:
INDEX_TYPE = 'IVF_SQ8'

### Шаг 4.1: Помимо выбора типа индекса, надо определиться с метрикой подсчета расстояния 
#### - она будет очень сильно влиять на перфоманс и точность поиска, поэтому выбирайте метрику с умом

#### Виды метрик 
- Euclidean distance (L2)
- Inner product (IP)
- Cosine similarity (COSINE)


In [35]:
METRIC_TYPE = "IP"

In [38]:
NLIST = 1024 # а вот это - количество кластеров, на которое мы разбиваем наше пространство

#### Есть несколько подходов к определению оптимального количества кластеров
#### Правило большого пальца: В целом, значение nlist должно быть около √N. Это начальная оценка, и её можно уточнять экспериментально.
#### Говорят еще о √N * 4

In [39]:
# Шаг 4.2: создаем индекс для нашего поля "emb"
# Типы индексов https://milvus.io/docs/index.md
index_param = {
    "index_type": INDEX_TYPE,
    "params": {"nlist": NLIST},
    "metric_type": METRIC_TYPE}
current_collection.create_index("emb", index_param)

Status(code=0, message=)

In [40]:
# Шаг 4.3: Загружаем индекс в память
current_collection.load()

### Шаг 5: Мы загрузили векторы в память базы, можно ей наконец пользоваться и делать запросы на поиск ближайших к нашему вектору-зарпосу

#### Шаг 5.1: самый базовый запрос

In [41]:
NPROBE = 16

In [42]:
getMovieName(70)

'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)'

In [43]:
%%time
results = current_collection.search(
            data = [embs_list[70]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
            output_fields=['name'],
            limit = 10
            )

CPU times: user 4.18 ms, sys: 1.12 ms, total: 5.3 ms
Wall time: 18.3 ms


In [141]:
getMovieName(70)

'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)'

In [44]:
for elements in results:
    for element in elements:
        print(element.entity.get('name'), element.distance)

Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981) 0.9999999403953552
Star Wars: Episode V - The Empire Strikes Back (1980) 0.9606975317001343
Indiana Jones and the Last Crusade (1989) 0.9568753242492676
Indiana Jones and the Temple of Doom (1984) 0.926886260509491
Star Wars: Episode IV - A New Hope (1977) 0.9210113286972046
Alien (1979) 0.918973982334137
Sixth Sense, The (1999) 0.9135994911193848
Godfather, The (1972) 0.9086208343505859
Aliens (1986) 0.9065264463424683
Die Hard (1988) 0.903779923915863


In [45]:
[getMovieName(i) for i in arr]

['Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)',
 'Star Wars: Episode V - The Empire Strikes Back (1980)',
 'Indiana Jones and the Last Crusade (1989)',
 'Indiana Jones and the Temple of Doom (1984)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Alien (1979)',
 'Sixth Sense, The (1999)',
 'Godfather, The (1972)',
 'Aliens (1986)',
 'Die Hard (1988)']

In [46]:
%%time
#добавляем жанр в выходные поля, чтобы мы могли видеть помимо тайтла еще и жанр
results = current_collection.search(
            data = [embs_list[70]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
            output_fields=['name', 'genre'],
            limit = 10
            )

CPU times: user 5.01 ms, sys: 124 µs, total: 5.13 ms
Wall time: 11.1 ms


In [77]:
getMovieName(70)

'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)'

In [47]:
for elements in results:
    for element in elements:
        print(element.entity.get('name'), element.entity.get('genre'), element.distance)

Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981) Action|Adventure 0.9999999403953552
Star Wars: Episode V - The Empire Strikes Back (1980) Action|Adventure|Sci-Fi 0.9606975317001343
Indiana Jones and the Last Crusade (1989) Action|Adventure 0.9568753242492676
Indiana Jones and the Temple of Doom (1984) Action|Adventure|Fantasy 0.926886260509491
Star Wars: Episode IV - A New Hope (1977) Action|Adventure|Sci-Fi 0.9210113286972046
Alien (1979) Horror|Sci-Fi 0.918973982334137
Sixth Sense, The (1999) Drama|Horror|Mystery 0.9135994911193848
Godfather, The (1972) Crime|Drama 0.9086208343505859
Aliens (1986) Action|Adventure|Horror|Sci-Fi 0.9065264463424683
Die Hard (1988) Action|Crime|Thriller 0.903779923915863


#### Шаг 5.2: Простой запрос без векторов
* Можно выполнять запросы как в обычном sql
* Запросы должны быть булевыми выражениями.
* Подробнее: https://milvus.io/docs/boolean.md

In [48]:
%%time
results = current_collection.query(
            expr="genre == 'Comedy'",
            output_fields=['name', 'genre'],
            )

CPU times: user 9.97 ms, sys: 1.03 ms, total: 11 ms
Wall time: 17.9 ms


In [49]:
for hits in results[0:5]:
    print(hits)

{'id': 11, 'name': 'Billy Madison (1995)', 'genre': 'Comedy'}
{'id': 12, 'name': 'Clerks (1994)', 'genre': 'Comedy'}
{'id': 18, 'name': 'Tommy Boy (1995)', 'genre': 'Comedy'}
{'id': 24, 'name': 'Dazed and Confused (1993)', 'genre': 'Comedy'}
{'id': 61, 'name': "Monty Python's Life of Brian (1979)", 'genre': 'Comedy'}


In [50]:
%%time
results = current_collection.query(
            expr="id > 9000",
            output_fields=['name', 'genre'],
            )

CPU times: user 8.67 ms, sys: 3.16 ms, total: 11.8 ms
Wall time: 20 ms


In [51]:
for hits in results[0:5]:
    print(hits)

{'id': 9001, 'name': 'Time Code (2000)', 'genre': 'Comedy|Drama'}
{'id': 9002, 'name': 'Stay Tuned (1992)', 'genre': 'Comedy'}
{'id': 9003, 'name': 'Brother from Another Planet, The (1984)', 'genre': 'Drama|Sci-Fi'}
{'id': 9004, 'name': 'Sleepwalkers (1992)', 'genre': 'Horror'}
{'id': 9005, 'name': 'Ghoulies II (1987)', 'genre': 'Comedy|Horror'}


In [52]:
%%time
# Проверка по префиксу
results = current_collection.query(
            expr="name like 'Toy%'",
            output_fields=['name', 'genre'],
            )

CPU times: user 2.35 ms, sys: 1.01 ms, total: 3.37 ms
Wall time: 6.99 ms


In [53]:
for hits in results[0:5]:
    print(hits)

{'name': 'Toy Story (1995)', 'genre': 'Adventure|Animation|Children|Comedy|Fantasy', 'id': 0}
{'name': 'Toys (1992)', 'genre': 'Comedy|Fantasy', 'id': 143}
{'name': 'Toy Story 2 (1999)', 'genre': 'Adventure|Animation|Children|Comedy|Fantasy', 'id': 735}
{'name': 'Toy Soldiers (1991)', 'genre': 'Action|Drama', 'id': 850}
{'name': 'Toy Story 3 (2010)', 'genre': 'Adventure|Animation|Children|Comedy|Fantasy|IMAX', 'id': 1133}


#### Шаг 5.3: Гибридные запросы
##### Можно накладывать ограничения по запросам на поиск вектора.

##### Шаг 5.3.1: Ограничение на значение полей

In [54]:
%%time
results = current_collection.search(
            data = [embs_list[70]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
            output_fields=['name', 'genre'],
            limit = 10,
            expr="genre == 'Comedy'",
            )

CPU times: user 4.55 ms, sys: 2.37 ms, total: 6.93 ms
Wall time: 12.1 ms


In [55]:
for hits in results:
    for hit in hits:
        print("distance:", hit.distance, "name:", hit.entity.get('name'), "genre:", hit.entity.get('genre'))

distance: 0.8614852428436279 name: Airplane! (1980) genre: Comedy
distance: 0.8261951208114624 name: Ferris Bueller's Day Off (1986) genre: Comedy
distance: 0.8081492185592651 name: Monty Python's Life of Brian (1979) genre: Comedy
distance: 0.7901660203933716 name: Planes, Trains & Automobiles (1987) genre: Comedy
distance: 0.7847710847854614 name: Animal House (1978) genre: Comedy
distance: 0.7775080800056458 name: Monty Python's And Now for Something Completely Different (1971) genre: Comedy
distance: 0.7616171836853027 name: Dirty Rotten Scoundrels (1988) genre: Comedy
distance: 0.7320654988288879 name: Liar Liar (1997) genre: Comedy
distance: 0.7031007409095764 name: This Is Spinal Tap (1984) genre: Comedy
distance: 0.6963610649108887 name: I Heart Huckabees (2004) genre: Comedy


##### Сравним без ограничений

In [58]:
%%time
results = current_collection.search(
            data = [embs_list[70]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
            output_fields=['name', "genre"],
            limit = 10
            )

CPU times: user 6.09 ms, sys: 95 µs, total: 6.18 ms
Wall time: 12.6 ms


In [59]:
for hits in results:
    for hit in hits:
        print("distance:", hit.distance, "name:", hit.entity.get('name'), "genre:", hit.entity.get('genre'))

distance: 0.9999999403953552 name: Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981) genre: Action|Adventure
distance: 0.9606975317001343 name: Star Wars: Episode V - The Empire Strikes Back (1980) genre: Action|Adventure|Sci-Fi
distance: 0.9568753242492676 name: Indiana Jones and the Last Crusade (1989) genre: Action|Adventure
distance: 0.926886260509491 name: Indiana Jones and the Temple of Doom (1984) genre: Action|Adventure|Fantasy
distance: 0.9210113286972046 name: Star Wars: Episode IV - A New Hope (1977) genre: Action|Adventure|Sci-Fi
distance: 0.918973982334137 name: Alien (1979) genre: Horror|Sci-Fi
distance: 0.9135994911193848 name: Sixth Sense, The (1999) genre: Drama|Horror|Mystery
distance: 0.9086208343505859 name: Godfather, The (1972) genre: Crime|Drama
distance: 0.9065264463424683 name: Aliens (1986) genre: Action|Adventure|Horror|Sci-Fi
distance: 0.903779923915863 name: Die Hard (1988) genre: Action|Crime|Thriller


##### Шаг 5.3.2: Ограничение на расстояние

In [62]:
%%time
results = current_collection.search(
            data = [embs_list[70]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE,
                # искать вектора с расстоянием более 0.8
                "radius": 0.9,
                # отфильтровать самые близкие вектора с расстоянием более 0.95
                "range_filter" : 0.95}},
            output_fields=['name', 'genre'],
            limit = 10,
            )


CPU times: user 2.76 ms, sys: 4.04 ms, total: 6.79 ms
Wall time: 14 ms


In [63]:
for hits in results:
    for hit in hits:
        print("distance:", hit.distance, "name:", hit.entity.get('name'), "genre:", hit.entity.get('genre'))

distance: 0.926886260509491 name: Indiana Jones and the Temple of Doom (1984) genre: Action|Adventure|Fantasy
distance: 0.9210113286972046 name: Star Wars: Episode IV - A New Hope (1977) genre: Action|Adventure|Sci-Fi
distance: 0.918973982334137 name: Alien (1979) genre: Horror|Sci-Fi
distance: 0.9135994911193848 name: Sixth Sense, The (1999) genre: Drama|Horror|Mystery
distance: 0.9086208343505859 name: Godfather, The (1972) genre: Crime|Drama
distance: 0.9065264463424683 name: Aliens (1986) genre: Action|Adventure|Horror|Sci-Fi
distance: 0.903779923915863 name: Die Hard (1988) genre: Action|Crime|Thriller
distance: 0.9025976061820984 name: Back to the Future (1985) genre: Adventure|Comedy|Sci-Fi


#### Шаг 6: Давайте сравним перфоманс разных поисков по времени

In [64]:
num_reqs = 1000

In [65]:
# Поиск по вектору
vec_times = []
for i in range(num_reqs):
    t1 = time.time()

    results = current_collection.search(
                data = [embs_list[70]],
                anns_field = "emb",
                param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
                output_fields=['name', 'genre'],
                limit = 10,
                )
    t2 = time.time()
    vec_times.append(t2-t1)


In [66]:
# Гибридный поиск
hyb_times = []
for i in range(num_reqs):
    t1 = time.time()

    results = current_collection.search(
                data = [embs_list[i]],
                anns_field = "emb",
                param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
                output_fields=['name', 'genre'],
                limit = 10,
                expr="genre == 'Comedy'",
                )
    t2 = time.time()
    hyb_times.append(t2-t1)


In [67]:
# Интервальный запрос
range_times = []
for i in range(num_reqs):
    t1 = time.time()

    results = current_collection.search(
                data = [embs_list[70]],
                anns_field = "emb",
                param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE,
                # искать вектора с расстоянием более 0.8
                "radius": 0.8,
                # отфильтровать самые близкие вектора с расстоянием более 0.95
                "range_filter" : 0.95}},
                output_fields=['name', 'genre'],
                limit = 10,
                )
    t2 = time.time()
    range_times.append(t2-t1)


In [68]:
def confidence_interval(data, confidence=0.95):
    # Размер выборки
    n = len(data)
    
    # Среднее значение выборки
    mean = np.mean(data)
    
    # Стандартное отклонение выборки
    std_err = np.std(data, ddof=1) / np.sqrt(n)  # ddof=1 для корректировки на n-1
    
    # Критическое значение (t-критерий для неизвестного стандартного отклонения)
    h = std_err * stats.t.ppf((1 + confidence) / 2, n - 1)
    
    return mean - h, mean, mean + h

In [69]:
confidence_interval(vec_times)

(0.0010285450538535868, 0.0010812833309173583, 0.0011340216079811299)

In [70]:
confidence_interval(hyb_times)

(0.0010964709221823923, 0.0011289124488830567, 0.0011613539755837212)

In [71]:
confidence_interval(range_times)

(0.0021660644095438893, 0.002207417011260986, 0.002248769612978083)

#### Шаг 7: Удаляем элементы

In [72]:
# удаляем только по айди (удаляем toy story и 

expr = "id in [0, 20]"

current_collection.delete(expr)

(insert count: 0, delete count: 2, upsert count: 0, timestamp: 453860902816186370, success count: 0, err count: 0)

In [75]:
getMovieName(0)

'Toy Story (1995)'

In [76]:
results = current_collection.search(
            data = [embs_list[0]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
            output_fields=['name', 'genre'],
            limit = 10,
            )
for hits in results:
    for hit in hits:
        print("distance:", hit.distance, "name:", hit.entity.get('name'), "genre:", hit.entity.get('genre'), "id", hit.id)

distance: 0.9352405667304993 name: Mask, The (1994) genre: Action|Comedy|Crime|Fantasy id 22
distance: 0.9291642904281616 name: Jurassic Park (1993) genre: Action|Adventure|Sci-Fi|Thriller id 26
distance: 0.9222590327262878 name: Mission: Impossible (1996) genre: Action|Adventure|Mystery|Thriller id 37
distance: 0.9092432856559753 name: Jerry Maguire (1996) genre: Drama|Romance id 2419
distance: 0.9043625593185425 name: Back to the Future (1985) genre: Adventure|Comedy|Sci-Fi id 85
distance: 0.9040169715881348 name: Willy Wonka & the Chocolate Factory (1971) genre: Children|Comedy|Fantasy|Musical id 60
distance: 0.9036213159561157 name: Star Wars: Episode IV - A New Hope (1977) genre: Action|Adventure|Sci-Fi id 15
distance: 0.9001542925834656 name: Princess Bride, The (1987) genre: Action|Adventure|Comedy|Fantasy|Romance id 69
distance: 0.8963816165924072 name: Star Wars: Episode VI - Return of the Jedi (1983) genre: Action|Adventure|Sci-Fi id 73
distance: 0.8896968364715576 name: Grou

In [195]:
getMovieName(0)

'Toy Story (1995)'

#### Хотим залить вектор обратно

In [78]:
data = [
    [0],
    [names_list[0]],
    [genres_list[0]],
    [embs_list[0]]
]

In [79]:
current_collection.insert(data)

(insert count: 1, delete count: 0, upsert count: 0, timestamp: 453860949032960001, success count: 1, err count: 0)

In [80]:
current_collection.flush()

#### Проверим, что залили

In [81]:
results = current_collection.search(
            data = [embs_list[0]],
            anns_field = "emb",
            param = {"metric_type": METRIC_TYPE, "params": {"nprobe": NPROBE}},
            output_fields=['name', 'genre'],
            limit = 10,
            )

In [82]:
for hits in results:
    for hit in hits:
        print("distance:", hit.distance, "name:", hit.entity.get('name'), "genre:", hit.entity.get('genre'), "id", hit.id)

distance: 1.0015816688537598 name: Toy Story (1995) genre: Adventure|Animation|Children|Comedy|Fantasy id 0
distance: 0.9364252090454102 name: Mask, The (1994) genre: Action|Comedy|Crime|Fantasy id 22
distance: 0.9277326464653015 name: Jurassic Park (1993) genre: Action|Adventure|Sci-Fi|Thriller id 26
distance: 0.9235677719116211 name: Mission: Impossible (1996) genre: Action|Adventure|Mystery|Thriller id 37
distance: 0.9121944904327393 name: Jerry Maguire (1996) genre: Drama|Romance id 2419
distance: 0.9042680263519287 name: Back to the Future (1985) genre: Adventure|Comedy|Sci-Fi id 85
distance: 0.903944194316864 name: Willy Wonka & the Chocolate Factory (1971) genre: Children|Comedy|Fantasy|Musical id 60
distance: 0.9018750786781311 name: Star Wars: Episode IV - A New Hope (1977) genre: Action|Adventure|Sci-Fi id 15
distance: 0.9010976552963257 name: Princess Bride, The (1987) genre: Action|Adventure|Comedy|Fantasy|Romance id 69
distance: 0.8961119055747986 name: Star Wars: Episode 

#### Шаг 8: Удаляем индекс 

In [83]:
current_collection.release() # выгружаем индекс из памяти
current_collection.drop_index() # удаляем индекс, но еще не удаляем данные

#### Шаг 9: Удаляем коллекцию 

In [84]:
from pymilvus import utility
utility.drop_collection("movies")

