In [None]:
! python3 -m pip install milvus pymilvus

In [None]:
# ここでランタイムを再起動する
# https://github.com/deepset-ai/haystack/issues/1462

In [None]:
from google.colab import drive
import pandas as pd
import random
from milvus import default_server
from pymilvus import connections, utility

In [None]:
drive.mount('/content/drive')

In [None]:
scores_all_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/userbase_matrix.csv', encoding='utf-8')
user_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/userbase_user_map.csv', encoding='utf-8')
game_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/userbase_game_map.csv', encoding='utf-8')
scores_df = pd.read_csv('drive/My Drive/dev/20230424_recommend_erogame/score_df_pickupuser_202310.csv' ,encoding='utf-8')
user_df_index = scores_df["uid"].unique().tolist()
game_df_index = scores_df["game_id"].unique().tolist()

In [None]:
matrix_df = scores_df.pivot(index='uid', columns='game_id', values='score').astype('Int16')
# 0埋め
matrix_df.fillna(0,inplace=True)

In [None]:
# matrix_df.iloc[0].values

In [None]:
'''
if len(scores_all_df.query("uid == 0 & game_id == 1")["score"]) == 0:
  print('empty')
else:
  print('not empty')
  print(int(scores_all_df.query("uid == 0 & game_id == 1")["score"]))
'''

In [None]:

print("uid max : "+str(scores_df["uid"].max()))
print("uid max : "+str(len(scores_df["uid"].unique().tolist())))
print("game_id max : "+str(scores_df["game_id"].max()))
print("game_id count : "+str(len(scores_df["game_id"].unique().tolist())))

In [None]:
# (OPTIONAL) Set if you want store all related data to specific location
# Default location:
#   %APPDATA%/milvus-io/milvus-server on windows
#   ~/.milvus-io/milvus-server on linux
# default_server.set_base_dir('milvus_data')

# (OPTIONAL) if you want cleanup previous data
# default_server.cleanup()

# Start your milvus server
default_server.start()

# Now you could connect with localhost and the given port
# Port is defined by default_server.listen_port
connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

In [None]:
# コレクションの作成
# データベースは default 利用
# https://github.com/milvus-io/pymilvus/blob/master/examples/example.py
from pymilvus import (
    FieldSchema, CollectionSchema, DataType,
    Collection
)
def create_collection(name, id_field, vector_field, dim):
    field1 = FieldSchema(name=id_field, dtype=DataType.INT64, description="int64", is_primary=True)
    field2 = FieldSchema(name=vector_field, dtype=DataType.FLOAT_VECTOR, description="float vector", dim=dim,
                         is_primary=False)
    schema = CollectionSchema(fields=[field1, field2], description="collection description")
    collection = Collection(name=name, data=None, schema=schema, properties={"collection.ttl.seconds": 15})
    print("\ncollection created:", name)
    return collection

def set_properties(collection):
    collection.set_properties(properties={"collection.ttl.seconds": 1800})

def create_index(collection, filed_name):
    index_params = {
      "metric_type":"IP",
      "index_type":"IVF_FLAT",
      "params":{"nlist":1024}
    }
    collection.create_index(filed_name, index_params)
    print("\nCreated index:\n{}".format(collection.index().params))

def insert(collection):
    scores = []
    for index, row in matrix_df.iterrows():
      scores.append(row.values)
    collection.insert([
        user_df_index,
        scores
    ])
    return scores

def search(collection, vector_field, id_field, search_vectors):
    search_param = {
        "data": search_vectors,
        "anns_field": vector_field,
        "param": {"metric_type": "IP", "params": {"nprobe": 16}},
        "limit": 10,
        "expr": id_field+" >= 0"}
    results = collection.search(**search_param)
    for i, result in enumerate(results):
        print("\nSearch result for {}th vector: ".format(i))
        for j, res in enumerate(result):
            print("Top {}: {}".format(j, res))
            print("https://erogamescape.dyndns.org/~ap2/ero/toukei_kaiseki/user_infomation.php?user=" + user_df.iloc[res.id].uid)

In [None]:
# create collection
collection = create_collection("eroge_score", "uid", "game_id", len(game_df_index))

# alter ttl properties of collection level
set_properties(collection)

vectors = insert(collection)
collection.flush()

create_index(collection, "game_id")

collection.load()

In [None]:
# search
print("target:" + str(user_df_index[1900:1903]))
search(collection, "game_id", "uid", vectors[1900:1903])

In [None]:
# (OPTIONAL) if you want cleanup previous data
default_server.cleanup()

In [None]:
# ユーザーごとの比較
target1 = 4869
target2 = 288
index1 = user_df_index.index(target1)
index2 = user_df_index.index(target2)
df_t1 = scores_all_df.query("uid == "+str(index1))
df_t2 = scores_all_df.query("uid == "+str(index2))
check_df = df_t1.merge(df_t2, on="game_id", how='inner', suffixes=('_t1', '_t2'))
print(check_df)
for i, row in check_df.iterrows():
  print(game_df.iloc[row["game_id"]])

In [None]:
# Stop your milvus server
default_server.stop()
connections.remove_connection("default")