In [None]:
! python3 -m pip install milvus pymilvus

In [None]:
# ここでランタイムを再起動する
# https://github.com/deepset-ai/haystack/issues/1462

In [None]:
from google.colab import drive
import pandas as pd
import random

In [None]:
drive.mount('/content/drive')

In [None]:
scores_tmp_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/userbase_matrix.csv', encoding='utf-8')
user_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/userbase_user_map.csv', encoding='utf-8')
game_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/userbase_game_map.csv', encoding='utf-8')
scores_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/score_df_pickupuser_202310.csv' ,encoding='utf-8')
target_ids = random.sample(scores_df["uid"].unique().tolist(),10)

In [None]:
if len(scores_tmp_df.query("uid == 0 & game_id == 1")["score"]) == 0:
  print('empty')
else:
  print('not empty')
  print(int(scores_tmp_df.query("uid == 0 & game_id == 1")["score"]))

In [None]:
len(scores_df["uid"].unique().tolist())

In [None]:
scores_df["game_id"].max()

In [None]:
from milvus import default_server
from pymilvus import connections, utility

# (OPTIONAL) Set if you want store all related data to specific location
# Default location:
#   %APPDATA%/milvus-io/milvus-server on windows
#   ~/.milvus-io/milvus-server on linux
# default_server.set_base_dir('milvus_data')

# (OPTIONAL) if you want cleanup previous data
# default_server.cleanup()

# Start your milvus server
default_server.start()

# Now you could connect with localhost and the given port
# Port is defined by default_server.listen_port
connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

In [None]:
# コレクションの作成
# データベースは default 利用
# https://github.com/milvus-io/pymilvus/blob/master/examples/example.py
from pymilvus import (
    FieldSchema, CollectionSchema, DataType,
    Collection
)
def create_collection(name, id_field, vector_field, dim):
    field1 = FieldSchema(name=id_field, dtype=DataType.INT64, description="int64", is_primary=True)
    field2 = FieldSchema(name=vector_field, dtype=DataType.FLOAT_VECTOR, description="float vector", dim=dim,
                         is_primary=False)
    schema = CollectionSchema(fields=[field1, field2], description="collection description")
    collection = Collection(name=name, data=None, schema=schema, properties={"collection.ttl.seconds": 15})
    print("\ncollection created:", name)
    return collection

def set_properties(collection):
    collection.set_properties(properties={"collection.ttl.seconds": 1800})

def create_index(collection, filed_name):
    index_params = {
      "metric_type":"L2",
      "index_type":"IVF_FLAT",
      "params":{"nlist":1024}
    }
    collection.create_index(filed_name, index_params)
    print("\nCreated index:\n{}".format(collection.index().params))

def insert(collection):
    ids = target_ids
    scores = []
    for id in ids:
      print("progress..... " + str(id))
      tmp_list = []
      for game_id in range(scores_df["game_id"].max()):
        tmp_score = scores_df.query("uid == "+str(id)+" & game_id == "+str(game_id))["score"]
        if len(tmp_score) == 0:
          tmp_list.append(0)
        else:
          tmp_list.append(int(tmp_score))
      scores.append(tmp_list)
    collection.insert([
        ids,
        scores
    ])
    return scores


def get_entity_num(collection):
    print("\nThe number of entity:")
    print(collection.num_entities)

def search(collection, vector_field, id_field, search_vectors):
    search_param = {
        "data": search_vectors,
        "anns_field": vector_field,
        "param": {"metric_type": "L2", "params": {"nprobe": 16}},
        "limit": 3,
        "expr": id_field+" >= 0"}
    results = collection.search(**search_param)
    for i, result in enumerate(results):
        print("\nSearch result for {}th vector: ".format(i))
        for j, res in enumerate(result):
            print("Top {}: {}".format(j, res))

In [None]:
# create collection
collection = create_collection("eroge_score", "uid", "game_id", scores_df["game_id"].max())

# alter ttl properties of collection level
set_properties(collection)

vectors = insert(collection)
collection.flush()

get_entity_num(collection)

create_index(collection, "game_id")

collection.load()

# search
print("target:" + target_ids[:3])
search(collection, "game_id", "uid", vectors[:3])

In [None]:
# Stop your milvus server
default_server.stop()