In [62]:
import json
import random
import pandas as pd
import sqlite3

"""
feature_table(ft):
    index, feature, image, label をカラムとしてDataFrame形式で格納したデータ.

feature_table_indexes(ftis): 
    feature_tableのindexをjson形式で格納したデータ. フィーチャの状態(train, trained, query, selected_query)を記録.
    train -> 次の学習で訓練するフィーチャのindex
    trained ->  1度以上学習済みのフィーチャのindex
    query -> 次の学習データを選択するために利用する近傍探索のクエリフィーチャのindex
    selected_query -> 1度以上選択されたクエリフィーチャのindex
"""

# feature_table(ft) をDataFrame形式で読み込み
def load_feature_table(dbpath, tablename="feature_table"):
    conn=sqlite3.connect(dbpath)
    c = conn.cursor()
    ft = pd.read_sql('SELECT * FROM ' + tablename, conn)
    return ft

# feature_table_indexes(ftis) を初期化
def init_feature_table_indexes(feature_table):
    ft = feature_table
    ftis = {} # feature_table_indexes
    ftis["train"],ftis["trained"], ftis["query"], ftis["selected_query"] = [], [], [], []
    labels = sorted(ft["label"].unique())
    ft_labelby = ft.groupby("label")
    for label in labels:
        df = ft_labelby.get_group(label)
        queries = df["index"].values.tolist()
        query = random.sample(queries, 1)[0]
        ftis["query"].append(query)
    return ftis

# feature_table_indexes(ftis) をjson形式で保存
def save_feature_table_indexes(ftis, savepath):
    with open(savepath, "w") as f:
        json.dump(ftis, f, indent=4)
        
# feature_table_indexes(ftis) を辞書形式で読み込み
def load_feature_table_indexes(ftis_path):
    with open(ftis_path, "r") as f:
        ftis = json.load(f)
    return ftis

In [64]:
import json
import faiss
import numpy as np

# 次に学習するフィーチャを持つデータを選択
class TrainFeatureSelector:
    
    def __init__(self, feature_table, feature_table_indexes):
        self.ft = self.__drop_trained_data(feature_table, feature_table_indexes)
        self.ftis = feature_table_indexes
        self.labels = sorted(feature_table["label"].unique())
        self.faiss_indexes = {} # ラベルごとのフィーチャ全体のfaissインデックス
              
    def __drop_trained_data(self, feature_table, feature_table_indexes):
        ft = feature_table.drop(index=feature_table_indexes["trained"])
        ft = ft.reset_index(drop=True)
        return ft
    
    # フィーチャを検索するための,フィーチャ全体のfaissインデックスを作成
    def make_faiss_indexes(self):
        ft_labelby = self.ft.groupby("label")
        for label in self.labels:
            features = []
            df = ft_labelby.get_group(label)
            for feature in df["feature"]:
                features.append(json.loads(feature))
            features = np.array(features).astype("float32")
            dim = len(features[0])
            index = faiss.IndexFlatL2(dim)
            index.add(features)
            self.faiss_indexes[label] = index
    
    # ラベルごとにクエリと最近傍(NN)のフィーチャをdataN分選択し、選択したフィーチャのftisをftis["train"]に追加
    def select_NN_train(self, dataN):
        pass

In [50]:
dbpath = "./assets/features_v1.db"
feature_table = load_feature_table(dbpath)

In [52]:
feature_table_indexes = init_feature_table_indexes(feature_table)
feature_table_indexes

{'query': [11601, 118, 14588],
 'selected_query': [],
 'train': [],
 'trained': []}

In [None]:
def select_NN_train(self, data_num, savepath):
    if self.feature_indexes == []:
        print("Please execute add_feature_indexes method in advance.")
        sys.exit()
    train_indexes = []
    selected_query_indexes = []
    query_indexes = self.selected[1]
    for i, index in enumerate(self.feature_indexes):
        k = index.ntotal
        query = self.table[self.table["index"] == query_indexes[i]]["feature"].iat[0]
        query = json.loads(query)
        query = np.array([query]).astype("float32")
        D, I = index.search(query, k)
        selected_query_indexes.append(query_indexes[i])
        for j in I[0][:data_num+1]:
            train_index = self.tables_labelby[i].iloc[j]["index"]
            if train_index == query_indexes[i]:
                continue
            train_indexes.append(train_index)
    self.selected[0] = self.selected[0] + train_indexes
    self.selected[2] = list(set(self.selected[2] + selected_query_indexes))
    np.save(savepath, np.array(train_indexes)) #次に学習するtrainのインデックスを保存

In [58]:
"""
あああ
"""

'\nあああ\n'