# basic setting

In [64]:
import numpy as np
import torch
from utils import (read_meta, read_probs, l2norm, knns2ordered_nbrs,
                   intdict2ndarray, Timer)
from mmcv import Config 
# from lgcn.models import build_model
# from lgcn import build_handler

In [65]:
config='lgcn/configs/cfg_train_lgcn_ms1m.py'
cfg = Config.fromfile(config)

In [66]:
print(cfg.train_data)

{'feat_path': './data/features/part0_train.bin', 'label_path': './data/labels/part0_train.meta', 'knn_graph_path': './data/knns/part0_train/faiss_k_80.npz', 'k_at_hop': [200, 10], 'active_connection': 10, 'is_norm_feat': True, 'is_sort_knns': True}


# Data class setting

In [67]:
class datacutting():
    def __init__(self, cfg):
        cut_start=0
        cut_end=500

        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg['knn_graph_path']

        self.k_at_hop = cfg['k_at_hop']
        self.depth = len(self.k_at_hop)
        self.active_connection = cfg['active_connection']
        self.feature_dim = 256
        self.is_norm_feat = cfg.get('is_norm_feat', True)
        self.is_sort_knns = cfg.get('is_sort_knns', True)
        self.is_test = cfg.get('is_test', False)

        if label_path is not None:
            _, idx2lb = read_meta(label_path)
            self.inst_num = len(idx2lb)
            self.labels = intdict2ndarray(idx2lb)
            self.ignore_label = False
        else:
            self.labels = None
            self.inst_num = -1
            self.ignore_label = True

        #feature
        self.features = read_probs(feat_path, self.inst_num,
                                self.feature_dim)[:cut_end,:]
        self.inst_num = self.features.shape[0]

        #knn
        knns = np.load(knn_graph_path)['data'][:cut_end,:,:]
        _, knns = knns2ordered_nbrs(knns, sort=self.is_sort_knns)
        self.knn_graph=self.knn_graph[:cut_end]



        #labels
        self.labels=self.labels[:cut_end]

        #result
        print('inst_num: {},knn_graph shape: {}, feature shape: {}, norm_feat: {}, sort_knns: {} '
             'k_at_hop: {}, active_connection: {}'.format(self.inst_num, self.knn_graph.shape, self.features.shape, self.is_norm_feat, self.is_sort_knns,
                  self.k_at_hop, self.active_connection))

In [68]:
data=datacutting(cfg.train_data)

[./data/labels/part0_train.meta] #cls: 8573, #inst: 576494
inst_num: 500,knn_graph shape: (500, 2, 80), feature shape: (500, 256), norm_feat: True, sort_knns: True k_at_hop: [200, 10], active_connection: 10


# read data in this.jpynb

In [69]:
#feature, label, and knn
features=data.features
inst_num=data.inst_num
knn_graph=data.knn_graph
labels=data.labels

# Save cut datasets as new files

In [70]:
#write feature data
feat_path='data_t/features/part0_train_t.bin'
label_path='data_t/labels/part0_train_t.meta'
knn_graph_path='data_t/knns/part0_train/faiss_k_80_t.npz'

features = np.concatenate(features, axis=0)
features.tofile(feat_path)

In [71]:
#write label data
def write_meta(label_path, labels):
    inst_num=labels.shape[0]

    print(inst_num)

    lb2dict={}

    with open(label_path, 'w') as of:
        for idx in range(inst_num):
            label=int(labels[idx])
            of.write(str(label)+'\n')

In [72]:
write_meta(label_path, labels)

500


In [73]:
#write knn
np.savez_compressed(knn_graph_path,data=knn_graph)

# read outputfile


In [74]:
#check feature data output file
class dataread():
    def __init__(self, cfg, feat_path, label_path, knn_graph_path):
        cut_start=0
        cut_end=100

        cut_start=0
        cut_end=100

        feat_path=feat_path
        label_path=label_path
        knn_graph_path=knn_graph_path

        self.k_at_hop = cfg['k_at_hop']
        self.depth = len(self.k_at_hop)
        self.active_connection = cfg['active_connection']
        self.feature_dim = 256
        self.is_norm_feat = cfg.get('is_norm_feat', True)
        self.is_sort_knns = cfg.get('is_sort_knns', True)
        self.is_test = cfg.get('is_test', False)

        if label_path is not None:
            _, idx2lb = read_meta(label_path)
            self.inst_num = len(idx2lb)
            self.labels = intdict2ndarray(idx2lb)
            self.ignore_label = False
        else:
            self.labels = None
            self.inst_num = -1
            self.ignore_label = True

        #feature
        self.features = read_probs(feat_path, self.inst_num,
                                self.feature_dim)#[:cut_end,:]
        self.inst_num = self.features.shape[0]

        #knn
        self.knn_graph = np.load(knn_graph_path)['data']#[:cut_end,:,:]
        # _, self.knn_graph = knns2ordered_nbrs(knns, sort=self.is_sort_knns)
        # self.knn_graph=self.knn_graph[:cut_end]



        #labels
        self.labels=self.labels#[:cut_end]

        #result
        print('inst_num: {},knn_graph shape: {}, feature shape: {}, norm_feat: {}, sort_knns: {} '
             'k_at_hop: {}, active_connection: {}'.format(self.inst_num, self.knn_graph.shape, self.features.shape, self.is_norm_feat, self.is_sort_knns,
                  self.k_at_hop, self.active_connection))

In [75]:
data_result=dataread(cfg.train_data,feat_path,label_path, knn_graph_path)

[data_t/labels/part0_train_t.meta] #cls: 7, #inst: 500
inst_num: 500,knn_graph shape: (500, 2, 80), feature shape: (500, 256), norm_feat: True, sort_knns: True k_at_hop: [200, 10], active_connection: 10
