# cutting

In [1]:
import torch
from utils import (create_logger, set_random_seed, rm_suffix,
                   mkdir_if_no_exists)
from utils import (read_meta, read_probs, l2norm, knns2ordered_nbrs,
                   intdict2ndarray, Timer)
from utils import (write_meta, write_feat)
from utils.knn import *

from mmcv import Config 
import numpy as np

!module purge
!module load cuda

In [2]:
config='/dcs/pg20/u2085214/fc/learn-to-cluster/train_cfg_trial.py'
cfg = Config.fromfile(config)


In [3]:
cfg.phase = 'train'
cfg.cuda = torch.cuda.is_available()
cfg.load_from = None
cfg.resume_from = None
cfg.gpus = 1
cfg.distributed = False
cfg.save_output = False
cfg.no_cuda = False
cfg.force = False
cfg.work_dir = './data/'
cfg.cut_name = '_cut'


In [4]:
from gae.models import build_model
from gae import build_handler

logger = create_logger()
model = build_model(cfg.model['type'], **cfg.model['kwargs'])
handler = build_handler(cfg.phase) 
print('model:\n',model)
print('\nhandler:\n',handler)

model:
 gae(
  (base_gcn): GraphConvSparse()
  (gcn_mean): GraphConvSparse()
  (gcn_logstddev): GraphConvSparse()
)

handler:
 <function train_gae at 0x7ff85b3504c0>


In [5]:
print(cfg.train_data['label_path'])
print(cfg.train_data['feat_path'])
print(cfg.train_data['knn_graph_path'])

./data/labels/part0_train.meta
./data/features/part0_train.bin
./data/knns/part0_train/faiss_k_80.npz


In [6]:
for k, v in cfg.model['kwargs'].items(): #kwargs=dict(feature_dim=256)
    setattr(cfg.train_data, k, v) #k? v?

setattr(cfg.train_data,'tr_st',0)
setattr(cfg.train_data,'tr_end',300)
setattr(cfg.train_data,'tt_st',301)
setattr(cfg.train_data,'tt_end',601)
setattr(cfg.train_data,'knn_method',cfg.knn_method)
setattr(cfg.train_data,'knn',cfg.knn)
setattr(cfg.train_data,'cut_name',cfg.cut_name)
setattr(cfg.train_data,'prefix',cfg.prefix)
setattr(cfg.train_data,'train_name',cfg.train_name)
setattr(cfg.train_data,'test_name',cfg.test_name)

#train
label_path_cut = cfg.prefix + '/labels/' + cfg.train_name +cfg.cut_name+'.meta'
feat_path_cut = cfg.prefix + '/features/' + cfg.train_name +cfg.cut_name+'.bin'
knn_graph_path_cut = cfg.prefix +'/knns/' + cfg.train_name + cfg.cut_name
# knn_graph_path_cut = cfg.prefix + '/features/' + cfg.train_name + '/'+cfg.knn_method+'_k_'+str(cfg.knn)+cfg.cut_name+'.npz'
setattr(cfg.train_data,'label_path_cut',label_path_cut)
setattr(cfg.train_data,'feat_path_cut',feat_path_cut)
setattr(cfg.train_data,'knn_graph_path_cut',knn_graph_path_cut)

print('label_path_cut:',label_path_cut)
print('feat_path_cut:',feat_path_cut)
print('knn_graph_path_cut:',knn_graph_path_cut)
print('\n')

#test
label_path_cut_tt = cfg.prefix + '/labels/' + cfg.test_name +cfg.cut_name+'.meta'
feat_path_cut_tt = cfg.prefix + '/features/' + cfg.test_name +cfg.cut_name+'.bin'
knn_graph_path_cut_tt = cfg.prefix +'/knns/' + cfg.test_name + cfg.cut_name
# knn_graph_path_cut = cfg.prefix + '/features/' + cfg.train_name + '/'+cfg.knn_method+'_k_'+str(cfg.knn)+cfg.cut_name+'.npz'
setattr(cfg.train_data,'label_path_cut_tt',label_path_cut_tt)
setattr(cfg.train_data,'feat_path_cut_tt',feat_path_cut_tt)
setattr(cfg.train_data,'knn_graph_path_cut_tt',knn_graph_path_cut_tt)


print('label_path_cut_tt:',label_path_cut_tt)
print('feat_path_cut_tt:',feat_path_cut_tt)
print('knn_graph_path_cut_tt:',knn_graph_path_cut_tt)



label_path_cut: ./data/labels/part0_train_cut.meta
feat_path_cut: ./data/features/part0_train_cut.bin
knn_graph_path_cut: ./data/knns/part0_train_cut


label_path_cut_tt: ./data/labels/part1_test_cut.meta
feat_path_cut_tt: ./data/features/part1_test_cut.bin
knn_graph_path_cut_tt: ./data/knns/part1_test_cut


In [7]:
def read_meta(fn_meta, end_pos, start_pos=0, verbose=True):
    lb2idxs = {}
    idx2lb = {}
    if(end_pos==None):
        with open(fn_meta) as f:
            for idx, x in enumerate(f.readlines()[start_pos:]):
                lb = int(x.strip())
                if lb not in lb2idxs:
                    lb2idxs[lb] = []
                lb2idxs[lb] += [idx]
                idx2lb[idx] = lb
    else:
        with open(fn_meta) as f:
            for idx, x in enumerate(f.readlines()[start_pos:end_pos]):
                lb = int(x.strip())
                if lb not in lb2idxs:
                    lb2idxs[lb] = []
                lb2idxs[lb] += [idx]
                idx2lb[idx] = lb

    inst_num = len(idx2lb)
    cls_num = len(lb2idxs)
    if verbose:
        print('[{}] #cls: {}, #inst: {}'.format(fn_meta, cls_num, inst_num))
    return lb2idxs, idx2lb

In [8]:
class Data_Cutting(object):
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg['knn_graph_path']
        feat_path_cut = cfg['feat_path_cut']
        label_path_cut = cfg.get('label_path_cut', None)
        knn_graph_path_cut = cfg['knn_graph_path_cut']
        feat_path_cut_tt = cfg['feat_path_cut_tt']
        label_path_cut_tt = cfg.get('label_path_cut_tt', None)
        knn_graph_path_cut_tt = cfg['knn_graph_path_cut_tt']

        knn_method = cfg['knn_method']
        knn = cfg['knn']
        cut_name = cfg['cut_name']
        prefix = cfg['prefix']
        train_name = cfg['train_name']
        test_name = cfg['test_name']

        tr_st = cfg['tr_st']
        tr_end = cfg['tr_end']
        tt_st = cfg['tt_st']
        tt_end = cfg['tt_end']

        self.k_at_hop = cfg['k_at_hop'] #k_at_hop=[100, 10], #200,10
        self.depth = len(self.k_at_hop) #e.g. 2 : legth for k_at_hop
        self.active_connection = cfg['active_connection']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True) #normalized
        self.is_sort_knns = cfg.get('is_sort_knns', True) #sorted knn
        self.is_test = cfg.get('is_test', False) #depends on the train or test
        
        print('\n')
        with Timer('read and write meta'):
            if label_path is not None:
                _, idx2lb = read_meta(label_path,tr_end, start_pos=tr_st)
                self.inst_num = len(idx2lb) #instance num = # of data 
                self.labels = intdict2ndarray(idx2lb) 
            write_meta(label_path_cut,idx2lb)
            print('shape label train', self.labels.shape)
            if label_path is not None:
                _, idx2lb = read_meta(label_path,tt_end,start_pos=tt_st)
                self.inst_num = len(idx2lb) #instance num = # of data 
                self.labels = intdict2ndarray(idx2lb) 
            write_meta(label_path_cut_tt,idx2lb)
            print('shape label test', self.labels.shape)

            if label_path is not None:
                _, idx2lb = read_meta(label_path,end_pos=None)
                self.inst_num = len(idx2lb) #instance num = # of data 
                self.labels = intdict2ndarray(idx2lb)
            print('inst_num:',self.inst_num)
            
        
        print('\n')
        with Timer('read and write features'):
            self.features = read_probs(feat_path, self.inst_num,self.feature_dim)
            print('shape features org:',self.features.shape)
            self.features_tr=self.features[tr_st:tr_end,:] 
            write_feat(feat_path_cut, self.features)
            print('shape features tr:',self.features_tr.shape)
            self.features_tt=self.features[tt_st:tt_end,:]
            write_feat(feat_path_cut_tt, self.features_tt)
            print('shape features tt:',self.features_tt.shape)

        print('\n')
        with Timer('read and write knn graph'):
            build_knns(cfg.knn_graph_path_cut,self.features_tr,knn_method,knn)
            build_knns(cfg.knn_graph_path_cut_tt,self.features_tt,knn_method,knn)

            knn_graph_path_train = prefix + '/knns/' + train_name+cut_name + '/'+knn_method+'_k_'+str(knn)+'.npz'
            knn_graph_path_test = prefix + '/knns/' + test_name+cut_name + '/'+knn_method+'_k_'+str(knn)+'.npz'
            print('train path:',knn_graph_path_train)
            print('test path:',knn_graph_path_test)

            knns = np.load(knn_graph_path_train)['data']
            _, self.knn_graph_tr = knns2ordered_nbrs(knns, sort=self.is_sort_knns)
            print('shape knns tr',knns.shape)
            print('shape knn_graph tr',self.knn_graph_tr.shape)


            knns = np.load(knn_graph_path_test)['data']
            _, self.knn_graph_tt = knns2ordered_nbrs(knns, sort=self.is_sort_knns)
            print('shape knns tt',knns.shape)
            print('shape knn_graph',self.knn_graph_tt.shape)

        



data=Data_Cutting(cfg.train_data)
features=data.features



[./data/labels/part0_train.meta] #cls: 4, #inst: 300
#discard: 0, #lbs: 4
#inst: 300, #class: 4
save label to ./data/labels/part0_train_cut.meta
shape label train (300,)
[./data/labels/part0_train.meta] #cls: 5, #inst: 300
#discard: 0, #lbs: 5
#inst: 300, #class: 5
save label to ./data/labels/part1_test_cut.meta
shape label test (300,)
[./data/labels/part0_train.meta] #cls: 8573, #inst: 576494
inst_num: 576494
[Time] read and write meta consumes 0.8174 s


shape features org: (576494, 256)
save features to ./data/features/part0_train_cut.bin
shape features tr: (300, 256)
save features to ./data/features/part1_test_cut.bin
shape features tt: (300, 256)
[Time] read and write features consumes 7.6988 s


read knn from ./data/knns/part0_train_cut/faiss_k_80.npz
read knn from ./data/knns/part1_test_cut/faiss_k_80.npz
train path: ./data/knns/part0_train_cut/faiss_k_80.npz
test path: ./data/knns/part1_test_cut/faiss_k_80.npz
shape knns tr (300, 2, 80)
shape knn_graph tr (300, 80)
shape knns

In [9]:
knn_graph_path_train = cfg.prefix + '/knns/' + cfg.train_name +cfg.cut_name+ '/'+cfg.knn_method+'_k_'+str(cfg.knn)+'.npz'
knn_graph_path_test = cfg.prefix + '/knns/' + cfg.test_name +cfg.cut_name+ '/'+cfg.knn_method+'_k_'+str(cfg.knn)+'.npz'
print(knn_graph_path_train)
print(knn_graph_path_test)

knns = np.load(knn_graph_path_train)['data']
_, knn_graph_tr = knns2ordered_nbrs(knns, sort=True)
print('shape knn_graph',knn_graph_tr.shape)
print(knn_graph_tr)


knns = np.load(knn_graph_path_test)['data']
_, knn_graph_tt = knns2ordered_nbrs(knns, sort=True)
print('shape knn_graph',knn_graph_tt.shape)
print(knn_graph_tt)

./data/knns/part0_train_cut/faiss_k_80.npz
./data/knns/part1_test_cut/faiss_k_80.npz
shape knn_graph (300, 80)
[[280  64   0 ... 289 209 228]
 [  1  97  65 ...  71  74 103]
 [218 234  98 ... 158  86 109]
 ...
 [113   1 297 ... 151 271 119]
 [186 226 298 ... 141 145 101]
 [299 195 203 ...  69  79 285]]
shape knn_graph (300, 80)
[[  0   7  17 ...  85 190 144]
 [  1   2   7 ...  31  88  68]
 [  2  12   1 ... 294  24 265]
 ...
 [297 292 287 ... 150 154 179]
 [298 295 291 ...  59  80  12]
 [299 295 286 ...  31  24 174]]
