In [2]:
import pandas as pd
from tqdm import trange, tqdm
import numpy as np
import torch
import sys
from collections import defaultdict
sys.path.append('/mnt/nfs/zhangtl/utils/')
from util import myout
import pickle as pkl
import json

import dgl

Using backend: pytorch


## load

In [3]:
papers = pd.read_csv('../../raw_data/sbm_50t_1000n_adj.csv')
papers = papers.sort_values(by=['time', 'source', 'target'])
papers

Unnamed: 0,source,target,weight,time
0,0,2,1,0
1,0,3,1,0
2,0,8,1,0
3,0,15,1,0
4,0,16,1,0
...,...,...,...,...
4870858,999,945,1,49
4870859,999,951,1,49
4870860,999,959,1,49
4870861,999,970,1,49


## build graph

In [4]:
def update_idx(idx, dic, cnt, feats, feat_dim, no_emb):
    if idx not in dic:
        dic[idx] = cnt
        cnt += 1
        feats.append(torch.rand(feat_dim).to(torch.float32))
    return dic, cnt, feats, no_emb

In [7]:
start_year, end_year = 0, 50
feat_dim = 128

id2nid, cnt, no_emb = {}, 0, 0
lst, feats = [], []

for ii in trange(len(papers)):
    year = int(papers['time'].iloc[ii])
    source = int(papers['source'].iloc[ii])
    target = int(papers['target'].iloc[ii])
    weight = int(papers['weight'].iloc[ii])
    
    id2nid, cnt, feats, no_emb = update_idx(source, id2nid, cnt, feats, feat_dim, no_emb)
    id2nid, cnt, feats, no_emb = update_idx(target, id2nid, cnt, feats, feat_dim, no_emb)
    
    lst.append((id2nid[source], id2nid[target], weight, year))

feat = torch.stack(feats)
src = torch.tensor([item[0] for item in lst])
tgt = torch.tensor([item[1] for item in lst])
rel = torch.tensor([item[2] for item in lst])
tsp = torch.tensor([item[3] for item in lst])

myout(feat, src, tgt, rel, tsp, id2nid)

  0%|          | 0/4870863 [00:00<?, ?it/s]  0%|          | 3022/4870863 [00:00<02:41, 30202.53it/s]  0%|          | 6043/4870863 [00:00<03:00, 26891.51it/s]  0%|          | 9705/4870863 [00:00<02:36, 31053.61it/s]  0%|          | 13258/4870863 [00:00<02:28, 32748.21it/s]  0%|          | 16942/4870863 [00:00<02:21, 34185.16it/s]  0%|          | 20385/4870863 [00:00<02:23, 33916.29it/s]  0%|          | 23793/4870863 [00:00<02:24, 33537.80it/s]  1%|          | 27330/4870863 [00:00<02:22, 34105.92it/s]  1%|          | 31006/4870863 [00:00<02:18, 34922.35it/s]  1%|          | 34740/4870863 [00:01<02:15, 35658.41it/s]  1%|          | 38312/4870863 [00:01<02:20, 34516.54it/s]  1%|          | 41906/4870863 [00:01<02:18, 34934.93it/s]  1%|          | 45409/4870863 [00:01<02:19, 34647.72it/s]  1%|          | 49049/4870863 [00:01<02:17, 35164.10it/s]  1%|          | 52652/4870863 [00:01<02:16, 35420.20it/s]  1%|          | 56348/4870863 [00:01<02:14, 35878.30it/s]  1%|          

feat : shape=torch.Size([1000, 128])
tensor([[9.5677e-01, 9.9973e-01, 1.5496e-01,  ..., 8.3490e-01, 4.7333e-01,
         9.6531e-02],
        [2.9627e-02, 2.4462e-01, 3.8081e-01,  ..., 8.2928e-01, 6.2850e-01,
         4.7088e-01],
        [6.8378e-01, 2.2981e-01, 5.6938e-01,  ..., 5.7500e-01, 4.0184e-01,
         7.3300e-01],
        ...,
        [8.3367e-01, 2.4207e-01, 7.4085e-01,  ..., 7.0266e-01, 8.4406e-01,
         8.5244e-01],
        [1.2750e-01, 1.4908e-01, 9.8412e-01,  ..., 5.7244e-04, 8.7485e-01,
         5.5075e-01],
        [2.1846e-01, 9.7331e-01, 7.8855e-01,  ..., 5.4291e-01, 8.1132e-01,
         1.3333e-01]])
src : shape=torch.Size([4870863]), tensor([  0,   0,   0,  ..., 873, 873, 873])
tgt : shape=torch.Size([4870863]), tensor([  1,   2,   3,  ..., 850, 955, 640])
rel : shape=torch.Size([4870863]), tensor([1, 1, 1,  ..., 1, 1, 1])
tsp : shape=torch.Size([4870863]), tensor([ 0,  0,  0,  ..., 49, 49, 49])
id2nid : len=1000, dict([0: 0, 2: 1, 3: 2, 8: 3, 15: 4, 16: 5, ..

In [8]:
graph = dgl.graph((src, tgt), num_nodes=len(feat))
graph.ndata['feat'] = feat

nid2id = {vv: kk for kk, vv in id2nid.items()}
graph.ndata['raw_nid'] = torch.arange(len(feat))

graph.edata['rel'] = rel
graph.edata['ts'] = tsp
graph

Graph(num_nodes=1000, num_edges=4870863,
      ndata_schemes={'feat': Scheme(shape=(128,), dtype=torch.float32), 'raw_nid': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'rel': Scheme(shape=(), dtype=torch.int64), 'ts': Scheme(shape=(), dtype=torch.int64)})

In [9]:
dataset = 'sbm'
dgl.save_graphs(f'../data/{dataset}/graph.bin', [graph])

In [10]:
json.dump(id2nid, open(f'../data/{dataset}/id2nid.json', 'w'))

## gen cites

In [12]:
start_year, end_year = 0, 50
cites = {}
for year in range(start_year, end_year):
    cites[year] = defaultdict(int)

for ii in trange(len(papers)):
    year = int(papers['time'].iloc[ii])
    target = int(papers['target'].iloc[ii])
    cites[year][target] += 1
myout(cites[5])

  0%|          | 0/4870863 [00:00<?, ?it/s]  0%|          | 6937/4870863 [00:00<01:10, 69360.31it/s]  0%|          | 14006/4870863 [00:00<01:09, 70137.66it/s]  0%|          | 21020/4870863 [00:00<01:13, 65542.57it/s]  1%|          | 27608/4870863 [00:00<01:18, 61552.12it/s]  1%|          | 34766/4870863 [00:00<01:14, 64923.80it/s]  1%|          | 41650/4870863 [00:00<01:12, 66191.91it/s]  1%|          | 48573/4870863 [00:00<01:11, 67151.53it/s]  1%|          | 55318/4870863 [00:00<01:16, 62693.22it/s]  1%|▏         | 61658/4870863 [00:00<01:17, 61820.87it/s]  1%|▏         | 68745/4870863 [00:01<01:14, 64458.15it/s]  2%|▏         | 75322/4870863 [00:01<01:13, 64842.04it/s]  2%|▏         | 81874/4870863 [00:01<01:13, 65039.42it/s]  2%|▏         | 89131/4870863 [00:01<01:11, 67274.60it/s]  2%|▏         | 95880/4870863 [00:01<01:15, 63633.89it/s]  2%|▏         | 102719/4870863 [00:01<01:13, 64995.30it/s]  2%|▏         | 109293/4870863 [00:01<01:13, 65209.47it/s]  2%|▏     

 : len=1000, dict([2: 122, 3: 118, 8: 137, 15: 126, 16: 114, 24: 99, ...])





In [13]:
tsp = graph.edata['ts']
ts_vals, ts_cuts = np.unique(tsp.numpy(), return_index=True)
ts_cuts = list(ts_cuts) + [len(tsp.numpy())]

num_ts = len(ts_vals)
ts_infos = np.stack([ts_vals, ts_cuts[0:num_ts], ts_cuts[1:num_ts+1]]).transpose()
myout(ts_cuts, ts_vals, ts_infos)

ts_cuts : len=51, list([0, 105358, 210156, ..., 4682713, 4776746, 4870863])
ts_vals : shape=(50,), [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
ts_infos : shape=(50, 3)
[[      0       0  105358]
 [      1  105358  210156]
 [      2  210156  314402]
 [      3  314402  418238]
 [      4  418238  521610]
 [      5  521610  624544]
 [      6  624544  727026]
 [      7  727026  829064]
 [      8  829064  930678]
 [      9  930678 1031932]
 [     10 1031932 1132744]
 [     11 1132744 1233148]
 [     12 1233148 1333142]
 [     13 1333142 1432762]
 [     14 1432762 1531990]
 [     15 1531990 1630906]
 [     16 1630906 1729528]
 [     17 1729528 1827788]
 [     18 1827788 1925662]
 [     19 1925662 2023292]
 [     20 2023292 2120646]
 [     21 2120646 2217706]
 [     22 2217706 2314482]
 [     23 2314482 2411034]
 [     24 2411034 2507302]
 [     25 2507302 2603316]
 [     26 2603316 26

In [14]:
labels = {}
nid2id = {v:k for k,v in id2nid.items()}
for year in range(start_year, end_year):
    left, right = ts_infos[np.where(ts_infos[:, 0]==year)[0][0], 1:]
    nids = graph.edges()[0][left:right].unique().tolist()
    ids = [nid2id[nid] for nid in nids]
    
    pdf = pd.DataFrame({'id': ids, 'nid': nids})
    tbar = trange(year+1, end_year, desc=str(year))
    for yy in tbar:
        cdf = pd.DataFrame({'id': list(cites[yy].keys()), str(yy): list(cites[yy].values())})
        cdf[str(yy)] = cdf[str(yy)].astype('float32')
        
        pdf = pd.merge(pdf, cdf, how='left', on='id')
        tbar.set_postfix(year=year, pdf=len(pdf))
    pdf.fillna(0, inplace=True)
    labels[year] = pdf

0:   0%|          | 0/49 [00:00<?, ?it/s]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0]0:   0%|          | 0/49 [00:00<?, ?it/s, pdf=1000, year=0

In [15]:
labels[5]

Unnamed: 0,id,nid,6,7,8,9,10,11,12,13,...,40,41,42,43,44,45,46,47,48,49
0,0,0,115.0,115.0,116.0,116.0,117.0,117.0,117.0,117.0,...,135.0,137.0,137.0,137.0,138.0,138.0,138.0,139.0,141.0,142.0
1,2,1,124.0,124.0,124.0,127.0,127.0,127.0,130.0,130.0,...,143.0,144.0,146.0,146.0,147.0,147.0,147.0,147.0,147.0,147.0
2,3,2,119.0,120.0,120.0,120.0,121.0,121.0,121.0,122.0,...,132.0,132.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0
3,8,3,138.0,139.0,139.0,139.0,139.0,140.0,141.0,141.0,...,155.0,156.0,156.0,157.0,157.0,158.0,159.0,159.0,160.0,160.0
4,15,4,127.0,127.0,128.0,128.0,128.0,128.0,128.0,128.0,...,140.0,140.0,140.0,140.0,140.0,141.0,142.0,142.0,143.0,143.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,766,995,83.0,83.0,81.0,80.0,77.0,76.0,76.0,74.0,...,51.0,50.0,49.0,48.0,48.0,47.0,46.0,46.0,45.0,45.0
996,567,996,97.0,96.0,94.0,94.0,91.0,89.0,87.0,85.0,...,59.0,58.0,57.0,57.0,57.0,56.0,55.0,53.0,52.0,51.0
997,775,997,97.0,96.0,95.0,95.0,94.0,94.0,93.0,92.0,...,57.0,57.0,56.0,56.0,54.0,52.0,52.0,51.0,50.0,49.0
998,964,998,81.0,81.0,81.0,80.0,80.0,80.0,79.0,79.0,...,58.0,58.0,58.0,58.0,57.0,56.0,55.0,55.0,54.0,53.0


In [16]:
pkl.dump(labels, open(f'../data/{dataset}/labels.pkl', 'wb'))

## cum log labels

In [17]:
def cumulative_log(df):
    colsn = list(df.columns)
    for i in range(3, len(colsn)):
        df[colsn[i]] += df[colsn[i-1]]
    df.iloc[:, 2:] = np.log(df.iloc[:, 2:] + 1)
    return df

labels_cum_log = {}
for year in range(start_year, end_year-1):
    labels_cum_log[year] = cumulative_log(labels[year])
labels_cum_log[end_year-2] = labels[end_year-2]
print(len(labels_cum_log))
labels_cum_log[5]

49


Unnamed: 0,id,nid,6,7,8,9,10,11,12,13,...,40,41,42,43,44,45,46,47,48,49
0,0,0,4.753590,5.442418,5.849325,6.137727,6.363028,6.546785,6.701961,6.836259,...,8.384119,8.414939,8.444838,8.473868,8.502283,8.529912,8.556799,8.583168,8.609225,8.634798
1,2,1,4.828314,5.517453,5.921578,6.214608,6.440947,6.625392,6.784457,6.921658,...,8.467373,8.497194,8.526549,8.555067,8.582981,8.610137,8.636575,8.662332,8.687442,8.711937
2,3,2,4.787492,5.480639,5.886104,6.173786,6.398595,6.582025,6.736967,6.872128,...,8.394121,8.423542,8.452334,8.480322,8.507546,8.534050,8.559870,8.585039,8.609591,8.633553
3,8,3,4.934474,5.627621,6.033086,6.320768,6.543912,6.727432,6.883462,7.018402,...,8.534837,8.565030,8.594339,8.622993,8.650849,8.678121,8.704834,8.730852,8.756368,8.781248
4,15,4,4.852030,5.541264,5.948035,6.236370,6.459905,6.642487,6.796824,6.930495,...,8.454041,8.483430,8.511980,8.539737,8.566745,8.593228,8.619208,8.644530,8.669399,8.693665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,766,995,4.430817,5.117994,5.513429,5.793014,6.003887,6.175867,6.322565,6.447306,...,7.720462,7.742402,7.763446,7.783640,7.803435,7.822445,7.840706,7.858641,7.875879,7.892826
996,567,996,4.584968,5.267858,5.662961,5.945421,6.159095,6.331502,6.475433,6.598509,...,7.877776,7.899525,7.920446,7.940940,7.961021,7.980366,7.999007,8.016648,8.033658,8.050065
997,775,997,4.584968,5.267858,5.666427,5.950643,6.169611,6.349139,6.499787,6.629363,...,7.910957,7.931644,7.951560,7.971086,7.989561,8.007034,8.024207,8.040769,8.056744,8.072155
998,964,998,4.406719,5.093750,5.497168,5.780744,6.001415,6.182085,6.333280,6.464588,...,7.822044,7.845025,7.867488,7.889459,7.910591,7.930925,7.950502,7.969704,7.988204,8.006034


In [None]:
pkl.dump(labels_cum_log, open(f'../data/{dataset}/labels_cum_log.pkl', 'wb'))

## tmp

In [32]:
start_ts, end_ts = torch.min(tsp).item(), torch.max(tsp).item()+1
triplet = torch.vstack([torch.stack(graph.edges()), graph.edata['ts']])
myout(start_ts, end_ts, triplet)

no_emb = 0
end_ts = 50
triplet : shape=torch.Size([3, 4870863])
tensor([[  0,   0,   0,  ..., 873, 873, 873],
        [  1,   2,   3,  ..., 850, 955, 640],
        [  0,   0,   0,  ...,  49,  49,  49]])


In [33]:
ts2ncites = {}
for tt in range(start_ts, end_ts):
    ts2ncites[tt] = defaultdict(int)

for ii in trange(triplet.shape[1]):
    tgt = triplet[1, ii].item()
    tt = triplet[2, ii].item()
    ts2ncites[tt][tgt] += 1

  0%|          | 0/4870863 [00:00<?, ?it/s]  0%|          | 14244/4870863 [00:00<00:34, 142420.78it/s]  1%|          | 29207/4870863 [00:00<00:33, 146657.44it/s]  1%|          | 44454/4870863 [00:00<00:32, 149306.48it/s]  1%|          | 59385/4870863 [00:00<00:32, 149255.39it/s]  2%|▏         | 74540/4870863 [00:00<00:31, 150077.78it/s]  2%|▏         | 89548/4870863 [00:00<00:31, 149865.90it/s]  2%|▏         | 104535/4870863 [00:00<00:31, 149727.63it/s]  2%|▏         | 119773/4870863 [00:00<00:31, 150567.25it/s]  3%|▎         | 134830/4870863 [00:00<00:31, 150313.52it/s]  3%|▎         | 149985/4870863 [00:01<00:31, 150692.71it/s]  3%|▎         | 165266/4870863 [00:01<00:31, 151335.51it/s]  4%|▎         | 180400/4870863 [00:01<00:31, 150627.67it/s]  4%|▍         | 195667/4870863 [00:01<00:30, 151239.42it/s]  4%|▍         | 211019/4870863 [00:01<00:30, 151922.05it/s]  5%|▍         | 226212/4870863 [00:01<00:30, 150861.87it/s]  5%|▍         | 241336/4870863 [00:01<00:30, 1

## ncites assign to ndata

In [None]:
num_ts = end_ts - start_ts - 1
ncites = torch.zeros((len(feat), num_ts), dtype=torch.int32)

for cur_tt in range(start_ts, end_ts-1): # calc future ncite from cur_ts to end_ts-1 
    ts_eids = graph.filter_edges(lambda x: x.data['ts'] < cur_tt)
    ts_graph = dgl.edge_subgraph(graph, ts_eids)
    cur_nids = ts_graph.ndata['_ID']
    
    for fut_tt in range(cur_tt+1, end_ts):
        ncites[cur_nids, ]
    graph.in_degrees(torch.tensor([0,1]))



In [34]:
ts_eids = graph.filter_edges(lambda x: x.data['ts'] < 1)
ts_graph = dgl.edge_subgraph(graph, ts_eids)
cur_nids = ts_graph.ndata['_ID']
myout(ts_graph, cur_nids)

ts_graph = Graph(num_nodes=1000, num_edges=105358,
      ndata_schemes={'feat': Scheme(shape=(128,), dtype=torch.float32), 'raw_id': Scheme(shape=(), dtype=torch.int64), 'ncites': Scheme(shape=(49,), dtype=torch.int32), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'rel': Scheme(shape=(), dtype=torch.int64), 'ts': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)})
cur_nids : shape=torch.Size([1000]), tensor([  0, 114,   1,   2, 329, 377, 190, 191,   3, 192, 193, 115, 266, 473,
        412,   4,   5, 524, 116, 267, 194, 413, 378, 414,   6, 117, 330,   7,
        379, 268, 269, 118, 444, 331, 195, 270, 196,   8, 332, 445, 197,   9,
        119, 333, 198, 497, 120, 271, 199, 334, 121, 122, 123, 124, 200, 534,
         10, 535, 380, 201,  11, 381, 382, 202,  12, 272, 474, 446, 203, 204,
        498,  13, 383, 335, 273,  14, 205, 336,  15, 274, 384, 337, 576, 338,
        275, 206, 385,  16,  17, 276, 447, 277, 475, 491, 125, 549,  18, 126,


In [29]:
myout(ts_graph.ndata['_ID'], ts_graph.ndata['raw_id'])

__ : shape=torch.Size([1000]), tensor([  0, 114,   1,   2, 329, 377, 190, 191,   3, 192, 193, 115, 266, 473,
        412,   4,   5, 524, 116, 267, 194, 413, 378, 414,   6, 117, 330,   7,
        379, 268, 269, 118, 444, 331, 195, 270, 196,   8, 332, 445, 197,   9,
        119, 333, 198, 497, 120, 271, 199, 334, 121, 122, 123, 124, 200, 534,
         10, 535, 380, 201,  11, 381, 382, 202,  12, 272, 474, 446, 203, 204,
        498,  13, 383, 335, 273,  14, 205, 336,  15, 274, 384, 337, 576, 338,
        275, 206, 385,  16,  17, 276, 447, 277, 475, 491, 125, 549,  18, 126,
        386, 127, 128, 339,  19, 129, 340,  20, 387, 278, 279, 130, 280, 341,
         21,  22, 388, 131, 513, 342, 343,  23, 132, 476, 133, 134, 135, 344,
         24, 207, 389, 208,  25, 136, 209, 281,  26, 210, 282,  27, 415,  28,
        137,  29,  30, 138, 499, 345, 500, 416, 390, 346, 347,  31, 283,  32,
        348, 501,  33, 139, 502, 448,  34, 284, 477,  35,  36, 211, 140, 349,
        557, 285,  37,  38, 514, 

## choose year

In [14]:
train_start, train_end= 0, 10
valid_start = 10; valid_end = 15

ts_eids = graph.filter_edges(lambda x: (x.data['ts'] >= train_start) & (x.data['ts']<valid_end))
ts_graph = dgl.edge_subgraph(graph, ts_eids)
ts_graph

Graph(num_nodes=1000, num_edges=1531990,
      ndata_schemes={'feat': Scheme(shape=(128,), dtype=torch.float32), 'raw_id': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'rel': Scheme(shape=(), dtype=torch.int64), 'ts': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)})

In [18]:
triplet = torch.vstack([torch.stack(ts_graph.edges()), ts_graph.edata['ts']])
myout(triplet)

triplet : shape=torch.Size([3, 1531990])
tensor([[  0,   0,   0,  ..., 999, 999, 999],
        [  2,   3,   8,  ..., 970, 974, 991],
        [  0,   0,   0,  ...,  14,  14,  14]])


In [19]:
cites = {}
for tt in range(train_start, valid_end):
    cites[tt] = defaultdict(int)

for ii in trange(triplet.shape[1]):
    tgt = triplet[1, ii].item()
    tt = triplet[2, ii].item()
    cites[tt][tgt] += 1
myout(cites[5])

  0%|          | 0/1531990 [00:00<?, ?it/s]  1%|          | 14754/1531990 [00:00<00:10, 147522.20it/s]  2%|▏         | 29892/1531990 [00:00<00:10, 149783.90it/s]  3%|▎         | 45076/1531990 [00:00<00:09, 150719.82it/s]  4%|▍         | 60148/1531990 [00:00<00:09, 149369.32it/s]  5%|▍         | 75324/1531990 [00:00<00:09, 150221.10it/s]  6%|▌         | 90604/1531990 [00:00<00:09, 151091.31it/s]  7%|▋         | 105891/1531990 [00:00<00:09, 151668.04it/s]  8%|▊         | 121261/1531990 [00:00<00:09, 152310.23it/s]  9%|▉         | 136765/1531990 [00:00<00:09, 153158.70it/s] 10%|▉         | 152273/1531990 [00:01<00:08, 153746.51it/s] 11%|█         | 167649/1531990 [00:01<00:08, 153328.26it/s] 12%|█▏        | 182983/1531990 [00:01<00:08, 153038.19it/s] 13%|█▎        | 198288/1531990 [00:01<00:08, 152740.43it/s] 14%|█▍        | 213563/1531990 [00:01<00:08, 152481.17it/s] 15%|█▍        | 228828/1531990 [00:01<00:08, 152527.11it/s] 16%|█▌        | 244142/1531990 [00:01<00:08, 1

 : len=1000, dict([2: 122, 3: 118, 8: 137, 15: 126, 16: 114, 24: 99, ...])



