# Vector Database for Snapshot Organizations

In [None]:
from pymilvus import MilvusClient, model

In [None]:
from folioclient import FolioClient

In [None]:
snapshot_client = FolioClient("https://folio-snapshot-okapi.dev.folio.org/",
                              "diku",
                              "diku_admin",
                              "admin")

In [None]:
orgs = snapshot_client.organizations

In [None]:
len(orgs)

In [None]:
names_addrs = []
for row in orgs:
    field = f"{row['name']}\n"
    if len(row['addresses']) > 0:
        address = row['addresses'][0]
        field += f"{address['addressLine1']}\n"
        if len(address.get('addressLine2', '')) > 0:
            field += f"{address['addressLine2']}\n"
        field += f"{address['city']},"
        if len(address['stateRegion']) > 0:
            field += f" {address['stateRegion']}, "
        field += f" {address['zipCode']} {address['country']}"
    names_addrs.append(field)

In [None]:
names_addrs[4]

## Milvus Client setup

In [None]:
from pymilvus import MilvusClient

In [None]:
embedding_fn = model.DefaultEmbeddingFunction()

In [None]:
client = MilvusClient("orgs.db")

### Create an Orgs Collection

In [None]:
client.create_collection(
    collection_name="folio_orgs",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

### Generate 769 dimension vector for each Org's Name and Addresses

In [None]:
org_vectors = embedding_fn.encode_documents(names_addrs)

In [None]:
org_vectors[0]

### Add Metadata for each Document

In [None]:
data = []
counter = 0
for org, vector in zip(orgs, org_vectors):
    data.append(
        { "id": counter,
          "uuid": org['id'],
          "name": org['name'],
          "vector": vector
        }
    )
    counter += 1

In [None]:
print(f"Data size is {len(data)} entities, each with the following keys: {data[0].keys()}")
print("Vector dim:", len(data[0]["vector"]))

### Add Organizations to the Collection

In [None]:
result = client.insert("folio_orgs", data=data)
print(result)

### Vector Search

In [None]:
query_vectors = embedding_fn.encode_queries(["Amazon Capital Services"])

In [None]:
query_result = client.search(
    collection_name="folio_orgs",
    data=query_vectors,
    limit=2,
    output_fields=["uuid", "name"]
)

In [None]:
query_result