## Connecting to Milvus

In [21]:
from pymilvus import connections
connections.connect(
  alias="default",
  user='username',
  password='password',
  host='0.0.0.0',
  port='19530'
)

## Creating Schema of VectorDB Collection

In [10]:
from pymilvus import CollectionSchema, FieldSchema, DataType
book_id = FieldSchema(
  name="book_id",
  dtype=DataType.INT64,
  is_primary=True,
)
book_name = FieldSchema(
  name="book_name",
  dtype=DataType.VARCHAR,
  max_length=200,
  # The default value will be used if this field is left empty during data inserts or upserts.
  # The data type of `default_value` must be the same as that specified in `dtype`.
  default_value="Unknown"
)
word_count = FieldSchema(
  name="word_count",
  dtype=DataType.INT64,
  # The default value will be used if this field is left empty during data inserts or upserts.
  # The data type of `default_value` must be the same as that specified in `dtype`.
  default_value=9999
)
book_intro = FieldSchema(
  name="book_intro",
  dtype=DataType.FLOAT_VECTOR,
  dim=2
)
schema = CollectionSchema(
  fields=[book_id, book_name, word_count, book_intro],
  description="Test book search",
  enable_dynamic_field=True
)
collection_name = "book"


## Creating a Collection

In [11]:
from pymilvus import Collection
collection = Collection(
    name=collection_name,
    schema=schema,
    using='default',
    shards_num=2
    )


## Milvus Collection Functions

In [12]:
from pymilvus import Collection
collection = Collection("book")  # Get an existing collection.

collection.schema                # Return the schema.CollectionSchema of the collection.
collection.description           # Return the description of the collection.
collection.name                  # Return the name of the collection.
collection.is_empty              # Return the boolean value that indicates if the collection is empty.
collection.num_entities          # Return the number of entities in the collection.
collection.primary_field         # Return the schema.FieldSchema of the primary key field.
collection.partitions            # Return the list[Partition] object.
collection.indexes               # Return the list[Index] object.
#collection.properties		# Return the expiration time of data in the collection.


[]

In [13]:
from pymilvus import utility
utility.list_collections()

['book']

In [16]:
from pymilvus import Collection
collection = Collection("book")  # Get an existing collection.

collection.schema                # Return the schema.CollectionSchema of the collection.


{'auto_id': False, 'description': 'Test book search', 'fields': [{'name': 'book_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'book_name', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'word_count', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'book_intro', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 2}}], 'enable_dynamic_field': True}

In [1]:
from pymilvus import connections
from pymilvus import CollectionSchema, FieldSchema, DataType
from pymilvus import utility
from pymilvus import Collection
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Milvus

connections.connect(
  alias="default",
  user='username',
  password='password',
  host='0.0.0.0',
  port='19530'
)

def create_milvus_collections(dim):
    transcribe_text_id = FieldSchema(name="text_id", dtype=DataType.VARCHAR, is_primary=True, max_length = 4096)
    summary_text_id = FieldSchema(name="text_id", dtype=DataType.VARCHAR, is_primary=True, max_length = 4096)
    transcribed_text = FieldSchema(name="transcribed_text", dtype=DataType.VARCHAR, max_length = 8192)
    transcribed_text_embeddings = FieldSchema(name="transcribed_text_embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
    summary_text = FieldSchema(name="summary_text", dtype=DataType.VARCHAR, max_length = 4096)
    summary_text_embedding = FieldSchema(name="summary_text_embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)

    transcribe_schema = CollectionSchema(fields=[transcribe_text_id, transcribed_text, transcribed_text_embeddings], description="Transcribed Text Collection", enable_dynamic_field=True)
    summary_schema = CollectionSchema(fields=[summary_text_id, summary_text, summary_text_embedding], description="Summarized Text Collection", enable_dynamic_field=True)

    transcribe_collection = "transcribed_text"
    summary_collection = "summarized_text"

    t_collection = Collection(
        name=transcribe_collection,
        schema=transcribe_schema,
        using='default',
        shards_num=2
        )

    s_collection = Collection(
        name=summary_collection,
        schema=summary_schema,
        using='default',
        shards_num=2
        )

def create_embeddings(input_text):
    model = SentenceTransformer("Muennighoff/SGPT-125M-weightedmean-nli-bitfit")
    embeddings = model.encode(input_text)
    
    return embeddings



In [2]:
from pymilvus import utility
utility.list_collections()

['transcribed_text', 'summarized_text', 'book']

In [3]:
from pymilvus import Collection
collection = Collection("transcribed_text")  # Get an existing collection.

collection.schema                # Return the schema.CollectionSchema of the collection.


{'auto_id': False, 'description': 'Transcribed Text Collection', 'fields': [{'name': 'text_id', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 4096}, 'is_primary': True, 'auto_id': False}, {'name': 'transcribed_text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 4096}}, {'name': 'transcribed_text_embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'enable_dynamic_field': True}

In [6]:
text = "Hello, this should be embedded"

In [8]:
embedded_text = create_embeddings(text)

[ 2.02105969e-01  1.84946692e+00 -1.47331035e+00 -5.13172865e-01
  6.94060773e-02  4.53420013e-01 -9.68684971e-01 -2.66722381e-01
 -6.21458888e-01 -9.83798876e-02 -6.63754880e-01  2.87147254e-01
  1.12723064e+00  8.14244211e-01  6.42336667e-01  1.17349219e+00
  1.73548404e-02 -4.09924090e-01  1.21874750e+00  2.47580141e-01
 -1.27779102e+00  4.93384957e-01  9.03274864e-02  1.55788016e+00
 -2.15529203e+00 -2.50851369e+00  1.11809932e-01  6.85017407e-01
  5.46949267e-01  2.30876222e-01 -2.06775331e+00 -6.91600859e-01
  2.45692298e-01 -1.77427292e-01 -1.33899879e+00  7.08162606e-01
 -7.36086965e-01 -2.92763710e-01  4.99359891e-02 -5.37186116e-02
 -1.19170821e+00 -1.10420537e+00 -3.59364003e-01  3.78641039e-01
  1.38330817e-01 -2.34214917e-01 -1.82530671e-01  4.07104157e-02
 -6.44343615e-01  1.55596614e+00  2.04011410e-01  8.03849638e-01
  1.65576589e+00  8.49263251e-01  8.98046017e-01 -1.18751526e-01
 -5.94457507e-01 -3.59719336e-01 -6.50382936e-02 -1.57453883e+00
  1.49545595e-01  5.21101