In [1]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import re
import pandas as pd
import numpy as np
import torch
import pyarrow as pa
import pyarrow.compute as pc
from nn_rag import Knowledge
from ds_core.handlers.abstract_handlers import ConnectorContract
from tqdm.autonotebook import tqdm, trange
from sentence_transformers import SentenceTransformer, util


In [3]:
kn = Knowledge.from_memory()

In [4]:
text = ('You took too long. You are not easy to deal with. Payment Failure/Incorrect Payment. You provided '
        'me with incorrect information. Unhappy with delay. Unsuitable advice. You never answered my question. '
        'You did not understand my needs. I have been mis-sold. My details are not accurate. You have asked '
        'for too much information. You were not helpful. Payment not generated/received by customer. You did '
        'not keep me updated. Incorrect information given. The performance of my product was poor. No reply '
        'to customer contact. Requested documentation not issued. You did not explain the terms & conditions. '
        'Policy amendments not carried out. You did not explain the next steps/process to me. I cannot '
        'understand your letter/comms. Standard letter inappropriate. Customer payment processed incorrectly. '
        'All points not addressed. Could not understand the agent. Issue with terms and conditions. Misleading '
        'information. I can not use the customer portal. your customer portal is unhelpful')

## Milvus

In [5]:
from pymilvus import connections, db
from pymilvus import CollectionSchema, FieldSchema, DataType
from pymilvus import Collection, utility


In [6]:
database = 'rai'

In [7]:
conn = connections.connect(host="127.0.0.1", port=19530)
print(f"database list {db.list_database()}")

database list ['default']


In [8]:
# if database in db.list_database():
#     db.drop_database(database)

In [9]:
if database not in db.list_database():
    db.create_database(database)

### Collections

In [10]:
print(f"collection list {utility.list_collections()}")

collection list []


In [11]:
collection_name = 'demo'

In [15]:
if utility.has_collection(collection_name):
    collection = Collection(collection_name)
else:
    fields = [
        FieldSchema(name="id", dtype=DataType.VARCHAR, auto_id=False, is_primary=True, max_length=64),
        FieldSchema(name="metadata", dtype=DataType.VARCHAR, max_length=256),
        FieldSchema(name="source", dtype=DataType.VARCHAR, max_length=1024),
        FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=768)
    ]
    # schema
    schema = CollectionSchema(fields=fields)
    # collection
    collection = Collection(
        name=collection_name,
        schema=schema,
        num_shards=2,
        consistency_level='Strong')
    # index
    index_params = {
        "metric_type": 'l2',
        "index_type": "IVF_FLAT",
        "params": {}
    }
    collection.create_index("embeddings", index_params)


In [17]:
collection.describe()

{'collection_name': 'demo',
 'auto_id': False,
 'num_shards': 2,
 'description': '',
 'fields': [{'field_id': 100,
   'name': 'id',
   'description': '',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 64},
   'is_primary': True},
  {'field_id': 101,
   'name': 'metadata',
   'description': '',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 256}},
  {'field_id': 102,
   'name': 'source',
   'description': '',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 1024}},
  {'field_id': 103,
   'name': 'embeddings',
   'description': '',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 768}}],
 'aliases': [],
 'collection_id': 451058623362635325,
 'consistency_level': 0,
 'properties': {},
 'num_partitions': 1,
 'enable_dynamic_field': False}