In [11]:
import openai
from pymilvus import DataType, Collection, connections, FieldSchema, CollectionSchema

In [27]:
openai.api_key = ''
# Connect to Milvus server
connections.connect("default", host="localhost", port="19530")

In [22]:
# Define the schema for the collection
fields = [
    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1536),
    FieldSchema(name="original_string", dtype=DataType.VARCHAR, max_length=256)
]
schema = CollectionSchema(fields, description="Collection for OpenAI Embeddings")

In [23]:
# Create the collection
collection = Collection("OpenAI_Embeddings", schema)

In [24]:
# Define your array of strings
string_array = ["string first", "string second", "string third"]

In [25]:
# Define two lists to store the embeddings and original strings
embeddings = []
original_strings = []

In [28]:
# Iterate through each string in the array
for string in string_array:
    # Embed the string using OpenAI
    response = openai.Embedding.create(
        input=string,
        model="text-embedding-ada-002"
    )
    # Append the embedding and the original string to the respective lists
    embeddings.append(response["data"][0]["embedding"])
    original_strings.append(string)

In [29]:
# Insert the embeddings and the original strings into the collection
entities = [
    embeddings,  # field embedding
    original_strings  # field original_string
]
insert_result = collection.insert(entities)

In [30]:
# After final entity is inserted, it's best to call flush to have no growing segments left in memory
collection.flush()

In [31]:
# Build indexes on the embeddings
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
collection.create_index("embedding", index)

Status(code=0, message=)

In [32]:
# Load the collection to memory before performing a search
collection.load()