In [1]:
import torch
import torch.nn as nn
from zytlib import vector
import math
from zytlib.classfunc import save_args
from torch.nn.utils.rnn import pad_sequence
from zytlib.wrapper import second_argument
from zytlib.table import table
import math
from torchfunction.utils import seed_torch
import random

In [2]:
from dataset import Emulater, emulate, generate_in_train_label, dataset, SimulatedDataset

In [3]:
delta_t = 20
mean_delay = 300
mean_rank = 200
mean_last_delay = 1000
num_items = 2
random_seed = 1024

info = table(delta_t=delta_t, mean_delay=mean_delay, mean_rank=mean_rank, mean_last_delay=mean_last_delay, num_items=num_items, random_seed=random_seed)

# generate training dataset

In [4]:
seed_torch(random_seed)
rank3_items_list = vector.range(num_items) ** 3
rank2_items_list = vector.range(num_items) ** 2
rank1_items_list = vector.range(num_items) ** 1

def map_delay(delay):
    return delay[:-1].map(lambda x: int(x * (random.random() + 0.5))) + delay[-1:].map(lambda x: int(x * (random.random() * 0.5 + 1)))

In [5]:
map_delay(vector(100,100,100))

[129, 98, 119]

In [6]:
rank1_items_list

[(0,), (1,)]

In [7]:
rank2_items_list

[(0, 0), (0, 1), (1, 0), (1, 1)]

In [8]:
rank3_items_list

[(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1)]

In [9]:
rank3_dataset = vector()
for items in rank3_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank3_dataset.append((items, delay, rank))

rank2_dataset = vector()
for items in rank2_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank2_dataset.append((items, delay, rank))

rank1_dataset = vector()
for items in rank1_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank1_dataset.append((items, delay, rank))

In [10]:
final_rank2_dataset_train = vector()
final_rank2_dataset_test = dict()
final_rank2_dataset_test["L1"] = vector()
final_rank2_dataset_test["L2"] = vector()

# for items, delay, rank in rank2_dataset:
#     final_rank2_dataset_train.append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5)))))
#     final_rank2_dataset_test["rank{}".format(len(items))].append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

for items, delay, rank in rank1_dataset:
    final_rank2_dataset_test["L{}".format(len(items))].append((items, delay, rank, 1))     

for items, delay, rank in rank2_dataset:
    final_rank2_dataset_test["L{}".format(len(items))].append((items, delay, rank, 1))     
 
table(info=info, train=vector(), test=final_rank2_dataset_test, hyper=table(num_items=num_items)).save(f"dataset/dataset_item_{num_items}_test_rank12.db")

In [11]:
final_rank3_dataset_train = vector()
final_rank3_dataset_test = dict()
final_rank3_dataset_test["L1"] = vector()
final_rank3_dataset_test["L2"] = vector()
final_rank3_dataset_test["L3"] = vector()

# for items, delay, rank in rank2_dataset:
#     final_rank2_dataset_train.append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5)))))
#     final_rank2_dataset_test["rank{}".format(len(items))].append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

for items, delay, rank in rank1_dataset:
    final_rank3_dataset_test["L{}".format(len(items))].append((items, delay, rank, 1))     

for items, delay, rank in rank2_dataset:
    final_rank3_dataset_test["L{}".format(len(items))].append((items, delay, rank, 1))     
    
for items, delay, rank in rank3_dataset:
    final_rank3_dataset_test["L{}".format(len(items))].append((items, delay, rank, 1))     
 
table(info=info, train=vector(), test=final_rank3_dataset_test, hyper=table(num_items=num_items)).save(f"dataset/dataset_item_{num_items}_test_rank123.db")

In [12]:
final_rank2_dataset_test

{'L1': [((0,), [15, 50], [10], 1), ((1,), [15, 50], [10], 1)],
 'L2': [((0, 0), [15, 15, 50], [10, 10], 1), ((0, 1), [15, 15, 50], [10, 10], 1), ((1, 0), [15, 15, 50], [10, 10], 1), ((1, 1), [15, 15, 50], [10, 10], 1)]}

In [13]:
rank2_dataset = vector()
for items in rank2_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank2_dataset.append((items, delay, rank))

rank1_dataset = vector()
for items in rank1_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank1_dataset.append((items, delay, rank))

In [14]:
final_rank2_dataset_train = vector()
final_rank2_dataset_test = dict()
final_rank2_dataset_test["L1"] = vector()
final_rank2_dataset_test["L2"] = vector()

for items, delay, rank in rank2_dataset:
    for _ in range(70 * 3):
        final_rank2_dataset_train.append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5)))))
    for _ in range(30 * 3):
        final_rank2_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

for items, delay, rank in rank1_dataset:
    for _ in range(30 * 3):
        final_rank2_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     
 
table(info=info, train=final_rank2_dataset_train, test=final_rank2_dataset_test, hyper=table(num_items=num_items)).save(f"dataset/dataset_item_{num_items}_train_rank2.db")

In [15]:
print(final_rank2_dataset_train.length)
final_rank2_dataset_train.sample(10)

840


[((1, 0), [21, 20, 69], [9, 14]), ((1, 0), [10, 10, 34], [9, 7]), ((1, 0), [13, 11, 27], [11, 6]), ((1, 0), [16, 9, 67], [11, 13]), ((0, 0), [18, 16, 40], [8, 6]), ((0, 1), [18, 13, 71], [6, 13]), ((1, 0), [16, 14, 41], [13, 8]), ((0, 1), [19, 13, 30], [14, 11]), ((1, 0), [13, 22, 38], [12, 6]), ((1, 0), [15, 10, 25], [10, 11])], with index mapping

In [16]:
rank2_dataset = vector()
for items in rank2_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank2_dataset.append((items, delay, rank))

rank1_dataset = vector()
for items in rank1_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank1_dataset.append((items, delay, rank))
    
rank3_dataset = vector()
for items in rank3_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank3_dataset.append((items, delay, rank))

In [17]:
final_rank3_dataset_train = vector()
final_rank3_dataset_test = dict()
final_rank3_dataset_test["L1"] = vector()
final_rank3_dataset_test["L2"] = vector()
final_rank3_dataset_test["L3"] = vector()
seed_torch(random_seed)
length = 3

for items, delay, rank in rank3_dataset:
    for _ in range(70 * 3):
        final_rank3_dataset_train.append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5)))))
    for _ in range(30 * 3):
        final_rank3_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

for items, delay, rank in rank2_dataset:
    for _ in range(30 * 3):
        final_rank3_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

for items, delay, rank in rank1_dataset:
    for _ in range(30 * 3):
        final_rank3_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     
 
table(info=info, train=final_rank3_dataset_train, test=final_rank3_dataset_test, hyper=table(num_items=num_items)).save(f"dataset/dataset_item_{num_items}_train_length_{length}.db")

In [18]:
final_rank3_dataset_train = vector()
final_rank3_dataset_test = dict()
final_rank3_dataset_test["L1"] = vector()
final_rank3_dataset_test["L2"] = vector()
final_rank3_dataset_test["L3"] = vector()
seed_torch(random_seed)
length = 3

for items, delay, rank in rank3_dataset + rank2_dataset + rank1_dataset:
    for _ in range(70 * 3):
        final_rank3_dataset_train.append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5)))))
    for _ in range(30 * 3):
        final_rank3_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

# for items, delay, rank in rank2_dataset:
#     for _ in range(30 * 3):
#         final_rank3_dataset_test["rank{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

# for items, delay, rank in rank1_dataset:
#     for _ in range(30 * 3):
#         final_rank3_dataset_test["rank{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     
 
table(info=info, train=final_rank3_dataset_train, test=final_rank3_dataset_test, hyper=table(num_items=num_items)).save(f"dataset/dataset_item_{num_items}_train_length_123.db")

In [19]:
sd = SimulatedDataset(f"dataset/dataset_item_{num_items}_train_length_123.db", 10, train_items_crop=2)

In [24]:
sd

Dataset: 
	train:	2940	[((0, 1, 0), [11, 11, 13, 57], [9, 14, 6])]
	test:
	L1	180	[((1,), [16, 56], [9], 1)]
	L2	360	[((1, 0), [21, 20, 66], [6, 5], 1)]
	L3	720	[((1, 0, 1), [21, 17, 18, 51], [14, 12, 12], 1)]

In [25]:
seed_torch(random_seed)
rank12_dataset = vector()
for items in rank1_items_list:
    # for _ in range(num_items):
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank12_dataset.append((items, delay, rank))
    
for items in rank2_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank12_dataset.append((items, delay, rank))

In [26]:
vector.range(10).map(lambda x: x * (random.random() + 0.5))

[0.0, 0.9834988702079559, 1.7788556266310196, 3.988229664995395, 5.481733665459814, 3.002175481808213, 6.077275933290929, 10.196949464498463, 9.77455638689112, 10.943720656033125]

In [34]:
final_rank12_dataset_train = vector()
final_rank12_dataset_test = dict()
final_rank12_dataset_test["L1"] = vector()
final_rank12_dataset_test["L2"] = vector()

for items, delay, rank in rank12_dataset:
    for _ in range(70 * 3):
        final_rank12_dataset_train.append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5)))))
    for _ in range(30 * 3):
        final_rank12_dataset_test["L{}".format(len(items))].append((items, map_delay(delay), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))       
table(info=info, train=final_rank12_dataset_train, test=final_rank12_dataset_test, hyper=table(num_items=num_items)).save(f"dataset/dataset_item_{num_items}_rank12.db")

In [35]:
print(len(final_rank12_dataset_train))
print(final_rank12_dataset_train.map(lambda x: x[0]).count_all())

1260
Counter({(0,): 210, (1,): 210, (0, 0): 210, (0, 1): 210, (1, 0): 210, (1, 1): 210})


In [36]:
sd = SimulatedDataset(f"dataset/dataset_item_{num_items}_rank12.db", 10, train_items_crop=2)

In [37]:
sd

Dataset: 
	train:	1260	[((0, 0), [10, 14, 53], [10, 7])]
	test:
	L1	180	[((1,), [17, 61], [13], 1)]
	L2	360	[((1, 0), [20, 22, 63], [12, 11], 1)]

# generate testing dataset

In [None]:
items_list = vector.range(num_items) ** 2
rank2_dataset = vector()
for items in items_list:
    rank = vector([10]) * 2
    delay = vector([15]) * 2
    for d in range(25, 101):
        rank2_dataset.append((items, delay + [d], rank, d))

In [None]:
rank2_dataset.sample(5).apply(print)

In [None]:
rank2_dataset.length

In [None]:
t = table(train=vector(), test=rank2_dataset)
t.save(f"dataset/dataset_only_rank2_with_lastdelay_25_100.db")

In [None]:
items_list = vector.range(num_items) ** 2
rank2_dataset = vector()
for items in items_list:
    rank = vector([10]) * 2
    delay = vector([15]) * 2
    d = 50
    rank2_dataset.append((items, delay + [d], rank, d))

In [None]:
rank2_dataset.sample(5).apply(print)

In [None]:
rank2_dataset.length

In [None]:
t.save(f"dataset/dataset_only_rank2_with_lastdelay_50.db")

In [None]:
items_list = vector.range(num_items) ** 1
rank1_dataset = vector()
for items in items_list:
    rank = vector([10])
    delay = vector([15])
    for d in range(25, 101):
        rank1_dataset.append((items, delay + [d], rank, d))

In [None]:
len(rank1_dataset), rank1_dataset.sample()

In [None]:
t1 = table(train=vector(), test=rank1_dataset)
t1.save(f"dataset/dataset_only_rank1_with_lastdelay_25_100.db")

In [None]:
items_list = vector.range(num_items) ** 1
rank1_dataset_new= vector()
for items in items_list:
    rank = vector([10])
    delay = vector([15])
    for r in range(5, 16):
        rank1_dataset_new.append((items, vector([15, 50]), vector([r]), r))

In [None]:
len(rank1_dataset_new), rank1_dataset_new.sample()

In [None]:
t2 = table(train=vector(), test=rank1_dataset_new)
t2.save(f"dataset/dataset_only_rank1_with_last_delay_50_rank_5_15.db")

In [None]:
items_list = vector.range(num_items) ** 3
# rank3_dataset = vector()
delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t) * 2
    
rank3_dataset = items_list.map(lambda items: (items, vector([delay, delay, delay, last_delay]), vector([rank, rank, rank]), 0))

In [None]:
len(rank3_dataset), rank3_dataset.sample()

In [None]:
t3 = table(train=vector(), test=rank3_dataset)
t3.save("dataset/dataset_only_rank3_with_rank_10_delay_15_last_delay_100.db")

In [None]:
items_list = vector.range(num_items) ** 1

delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t) * 3

rank4_dataset = items_list.map(lambda items: (items, vector([delay, last_delay]), vector([rank]), 0))

In [None]:
len(rank4_dataset), rank4_dataset.sample()

In [None]:
t4 = table(train=vector(), test=rank4_dataset)
t4.save("dataset/dataset_only_rank1_with_rank_10_delay_15_last_delay_150.db")

In [None]:
items_list = vector.range(num_items) ** 4
# rank3_dataset = vector()
delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t) * 2
    
length4_dataset = items_list.map(lambda items: (items, vector([delay, delay, delay, delay, last_delay]), vector([rank, rank, rank, rank]), 0))

In [None]:
len(length4_dataset), length4_dataset.sample()

In [None]:
t5 = table(train=vector(), test=length4_dataset)
t5.save("dataset/dataset_only_rank4_with_rank_10_delay_15_last_delay_100.db")

In [None]:
items_list = vector.range(num_items) ** 3
# rank3_dataset = vector()
delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t)
    
rank3_dataset = items_list.map(lambda items: (items, vector([delay, delay, delay, last_delay]), vector([rank, rank, rank]), 0))

print(len(rank3_dataset), rank3_dataset.sample())

t6 = table(train=vector(), test=rank3_dataset)
t6.save("dataset/dataset_only_rank3_with_rank_10_delay_15_last_delay_50.db")

In [None]:
hyper=table(datapath='dataset/dataset_item_2_train_length_3.db', learning_rate=0.001, l2_reg=1e-5, encoder_max_rank=2, max_epochs=3000, embedding='dataset/embedding_inputdim_2_embeddingdim_4096_round_without_normalize.db', residual_loss=1e-2, train_items_crop=2)

In [None]:
hyper