In [1]:
!pip install -q -r 002-requirements.txt

In [2]:
!docker-compose -f docker-images/docker-compose-milvus.yml up -d

Creating network "milvus" with the default driver
Creating milvus-etcd ... 
Creating milvus-minio ... 
[1BCreating milvus-standalone ... [0m[1A[2K
[1Bting milvus-standalone ... [32mdone[0m

In [3]:
!docker run --rm --name postgres0 -d  -p 5438:5432 -e POSTGRES_HOST_AUTH_METHOD=trust postgres
#!docker run  --name redis -d -p 6379:6379 redis

1250c1414f6a62d2f59b27f681d9f94cb42896bc2157e09f2c87bdd1ab34b240


In [4]:
%%bash

# Give Milvus enough time to start
sleep 30

docker logs postgres0 --tail 6

2023-06-04 20:23:23.211 UTC [1] LOG:  starting PostgreSQL 15.3 (Debian 15.3-1.pgdg110+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2023-06-04 20:23:23.212 UTC [1] LOG:  listening on IPv4 address "0.0.0.0", port 5432
2023-06-04 20:23:23.212 UTC [1] LOG:  listening on IPv6 address "::", port 5432
2023-06-04 20:23:23.217 UTC [1] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432"
2023-06-04 20:23:23.224 UTC [64] LOG:  database system was shut down at 2023-06-04 20:23:23 UTC
2023-06-04 20:23:23.229 UTC [1] LOG:  database system is ready to accept connections


In [5]:
#Connectings to Milvus, BERT and Postgresql
from pymilvus import connections, utility
import psycopg2
import numpy as np

connections.connect(host='localhost', port='19530')
conn = psycopg2.connect(host='localhost', port='5438', user='postgres', password='postgres')

cursor = conn.cursor()

In [6]:
def get_vectors():
    with open("data/glove/glove.6B.50d.txt", 'r', encoding="utf-8") as f:
        lines = f.readlines()
            
    words = [line.split()[0] for line in lines]
    embeddings = [np.asarray(line.strip().split()[1:], 'float32').tolist() for line in lines]

    return words, embeddings

words, embeddings = get_vectors()

In [7]:
TABLE_NAME = 'wikipedia_embed'
FIELD_NAME = 'embed_vector'
PARTITION_NAME = 'wikipedia'
COLLECTION_NAME = 'wikipedia_collection'

dimension_size = len(embeddings[0])

from pymilvus import Collection, CollectionSchema, FieldSchema, DataType

pk = FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True)

field = FieldSchema(name=FIELD_NAME, dtype=DataType.FLOAT_VECTOR, dim=dimension_size)
schema = CollectionSchema(fields=[pk,field], description=COLLECTION_NAME)

if utility.get_connection().has_collection(COLLECTION_NAME): # drop the same collection created before
    collection = Collection(COLLECTION_NAME)
    collection.drop()

collection = Collection(name=COLLECTION_NAME, schema=schema)
partition = collection.create_partition(PARTITION_NAME)
print("Collection & partition are successfully created.")

Collection & partition are successfully created.


In [8]:
index_param = {
        "metric_type":"L2",
        "index_type":"IVF_SQ8",
        "params":{"nlist":1024}
    }

collection.create_index(field_name=FIELD_NAME, index_params=index_param)

Status(code=0, message='')

In [9]:
emb_array = np.array(embeddings)
emb_splits = np.array_split(emb_array, 4)

ids = list()
for emb_split in emb_splits:
    mr_tmp = collection.insert([emb_split.tolist()])
    ids.extend(mr_tmp.primary_keys)

dicts ={}

In [10]:
#Deleting previouslny stored table for clean run
drop_table = "DROP TABLE IF EXISTS " + TABLE_NAME
cursor.execute(drop_table)
conn.commit()

try:
    sql = "CREATE TABLE if not exists " + TABLE_NAME + " (ids bigint, word text);"
    cursor.execute(sql)
    conn.commit()
    print("create postgres table successfully!")
except Exception as e:
    print("can't create a postgres table: ", e)


create postgres table successfully!


In [11]:

import os

conn.commit()
def copy_data_to_pg(table_name, fname, conn, cur):
    fname = os.path.join(os.getcwd(),fname)
    try:
        sql = "COPY " + table_name + " FROM STDIN (QUOTE E'\u0007', FORMAT 'csv', DELIMITER '|')"
        cursor.copy_expert(sql, open(fname, "r"))
        conn.commit()
        print("Inserted into Postgress Sucessfully!")
    except Exception as e:
        print("Copy Data into Postgress failed: ", e)

filename = 'pgloadfile'

with open(filename, 'w+') as f:
    for i in range(len(ids)):
        if words[i] not in ("|", ":|"):
            line = str(ids[i]) + "|" + words[i] + "\n"
            f.write(line)
        
copy_data_to_pg(TABLE_NAME, filename, conn, cursor)

os.remove(filename)

Inserted into Postgress Sucessfully!


In [12]:
%%bash
L_END_DOCKER=0

if [[ ${L_END_DOCKER} -eq 1 ]]; then
    docker-compose -f docker-images/docker-compose-milvus.yml down --remove-orphans
    docker stop postgres0
fi

In [13]:
#!docker-compose -f docker-images/docker-compose.yml down --remove-orphans