In [1]:
import torch
import torch.nn as nn
from zytlib import vector
import math
from zytlib.classfunc import save_args
from torch.nn.utils.rnn import pad_sequence
from zytlib.wrapper import second_argument
from zytlib.table import table
import math
from torchfunction.utils import seed_torch
import random

In [2]:
from dataset import Emulater, emulate, generate_in_train_label, dataset

In [30]:
delta_t = 20
mean_delay = 300
mean_rank = 200
mean_last_delay = 1000
num_items = 6
random_seed = 1024

info = table(delta_t=delta_t, mean_delay=mean_delay, mean_rank=mean_rank, mean_last_delay=mean_last_delay, num_items=num_items, random_seed=random_seed)

# generate training dataset

In [31]:
seed_torch(random_seed)
rank3_items_list = vector.range(num_items) ** 3
rank2_items_list = vector.range(num_items) ** 2
rank1_items_list = vector.range(num_items) ** 1


In [32]:
rank1_items_list

[(0,), (1,), (2,), (3,), (4,), (5,)]

In [33]:
rank2_items_list

[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5), (5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5)]

In [34]:
rank2_dataset = vector()
for items in rank2_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank2_dataset.append((items, delay, rank))

rank1_dataset = vector()
for items in rank1_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank1_dataset.append((items, delay, rank))

In [35]:
final_rank2_dataset_train = vector()
final_rank2_dataset_test = dict()
final_rank2_dataset_test["rank1"] = vector()
final_rank2_dataset_test["rank2"] = vector()

for items, delay, rank in rank2_dataset:
    for _ in range(70 * 3):
        final_rank2_dataset_train.append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5)))))
    for _ in range(30 * 3):
        final_rank2_dataset_test["rank{}".format(len(items))].append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     

for items, delay, rank in rank1_dataset:
    for _ in range(30 * 3):
        final_rank2_dataset_test["rank{}".format(len(items))].append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))     
 
table(info=info, train=final_rank2_dataset_train, test=final_rank2_dataset_test).save(f"dataset/dataset_item_{num_items}_train_rank2.db")

In [36]:
final_rank2_dataset_train.length
final_rank2_dataset_train.sample(10)

[((4, 4), [19, 12, 62], [6, 11]), ((0, 4), [14, 18, 59], [13, 14]), ((5, 2), [10, 12, 62], [13, 13]), ((2, 0), [19, 17, 49], [5, 13]), ((2, 0), [20, 10, 67], [8, 14]), ((0, 2), [14, 19, 44], [11, 6]), ((3, 5), [12, 8, 30], [10, 8]), ((5, 0), [20, 14, 36], [8, 14]), ((5, 0), [10, 14, 25], [7, 8]), ((2, 0), [16, 17, 25], [11, 11])], with index mapping

In [37]:
seed_torch(random_seed)
rank12_dataset = vector()
for items in rank1_items_list:
    for _ in range(6):
        rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
        delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
        rank12_dataset.append((items, delay, rank))
    
for items in rank2_items_list:
    rank = vector([math.ceil(mean_rank / delta_t)] * len(items))
    delay = vector([math.ceil(mean_delay / delta_t)] * len(items)) + [math.ceil(mean_last_delay / delta_t)]
    rank12_dataset.append((items, delay, rank))

In [38]:
vector.range(10).map(lambda x: x * (random.random() + 0.5))

[0.0, 0.9834988702079559, 1.7788556266310196, 3.988229664995395, 5.481733665459814, 3.002175481808213, 6.077275933290929, 10.196949464498463, 9.77455638689112, 10.943720656033125]

In [39]:
final_rank12_dataset_train = vector()
final_rank12_dataset_test = dict()
final_rank12_dataset_test["rank1"] = vector()
final_rank12_dataset_test["rank2"] = vector()

for items, delay, rank in rank12_dataset:
    for _ in range(70 * 3):
        final_rank12_dataset_train.append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5)))))
    for _ in range(30 * 3):
        final_rank12_dataset_test["rank{}".format(len(items))].append((items, delay.map(lambda x: int(x * (random.random() + 0.5))), rank.map(lambda x: int(x * (random.random() + 0.5))), 1))       
table(info=info, train=final_rank12_dataset_train, test=final_rank12_dataset_test).save(f"dataset/dataset_item_{num_items}_rank12.db")

In [40]:
len(final_rank12_dataset_train)

15120

# generate testing dataset

In [18]:
items_list = vector.range(num_items) ** 2
rank2_dataset = vector()
for items in items_list:
    rank = vector([10]) * 2
    delay = vector([15]) * 2
    for d in range(25, 101):
        rank2_dataset.append((items, delay + [d], rank, d))

In [19]:
rank2_dataset.sample(5).apply(print)

((0, 5), [15, 15, 49], [10, 10], 49)
((1, 3), [15, 15, 33], [10, 10], 33)
((2, 1), [15, 15, 45], [10, 10], 45)
((0, 5), [15, 15, 44], [10, 10], 44)
((1, 1), [15, 15, 70], [10, 10], 70)


In [20]:
rank2_dataset.length

2736

In [21]:
t = table(train=vector(), test=rank2_dataset)
t.save(f"dataset/dataset_only_rank2_with_lastdelay_25_100.db")

In [22]:
items_list = vector.range(num_items) ** 2
rank2_dataset = vector()
for items in items_list:
    rank = vector([10]) * 2
    delay = vector([15]) * 2
    d = 50
    rank2_dataset.append((items, delay + [d], rank, d))

In [16]:
rank2_dataset.sample(5).apply(print)

((2, 1), [15, 15, 50], [10, 10], 50)
((5, 0), [15, 15, 50], [10, 10], 50)
((2, 0), [15, 15, 50], [10, 10], 50)
((0, 5), [15, 15, 50], [10, 10], 50)
((1, 0), [15, 15, 50], [10, 10], 50)


In [23]:
rank2_dataset.length

36

In [24]:
t.save(f"dataset/dataset_only_rank2_with_lastdelay_50.db")

In [5]:
items_list = vector.range(num_items) ** 1
rank1_dataset = vector()
for items in items_list:
    rank = vector([10])
    delay = vector([15])
    for d in range(25, 101):
        rank1_dataset.append((items, delay + [d], rank, d))

In [6]:
len(rank1_dataset), rank1_dataset.sample()

(456, ((5,), [15, 88], [10], 88))

In [7]:
t1 = table(train=vector(), test=rank1_dataset)
t1.save(f"dataset/dataset_only_rank1_with_lastdelay_25_100.db")

In [8]:
items_list = vector.range(num_items) ** 1
rank1_dataset_new= vector()
for items in items_list:
    rank = vector([10])
    delay = vector([15])
    for r in range(5, 16):
        rank1_dataset_new.append((items, vector([15, 50]), vector([r]), r))

In [9]:
len(rank1_dataset_new), rank1_dataset_new.sample()

(66, ((1,), [15, 50], [11], 11))

In [10]:
t2 = table(train=vector(), test=rank1_dataset_new)
t2.save(f"dataset/dataset_only_rank1_with_last_delay_50_rank_5_15.db")

In [26]:
items_list = vector.range(num_items) ** 3
# rank3_dataset = vector()
delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t) * 2
    
rank3_dataset = items_list.map(lambda items: (items, vector([delay, delay, delay, last_delay]), vector([rank, rank, rank]), 0))

In [27]:
len(rank3_dataset), rank3_dataset.sample()

(216, ((2, 2, 5), [15, 15, 15, 100], [10, 10, 10], 0))

In [29]:
t3 = table(train=vector(), test=rank3_dataset)
t3.save("dataset/dataset_only_rank3_with_rank_10_delay_15_last_delay_100.db")

In [30]:
items_list = vector.range(num_items) ** 1

delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t) * 3

rank4_dataset = items_list.map(lambda items: (items, vector([delay, last_delay]), vector([rank]), 0))

In [31]:
len(rank4_dataset), rank4_dataset.sample()

(6, ((1,), [15, 150], [10], 0))

In [32]:
t4 = table(train=vector(), test=rank4_dataset)
t4.save("dataset/dataset_only_rank1_with_rank_10_delay_15_last_delay_150.db")

In [33]:
items_list = vector.range(num_items) ** 4
# rank3_dataset = vector()
delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t) * 2
    
length4_dataset = items_list.map(lambda items: (items, vector([delay, delay, delay, delay, last_delay]), vector([rank, rank, rank, rank]), 0))

In [34]:
len(length4_dataset), length4_dataset.sample()

(1296, ((0, 5, 4, 4), [15, 15, 15, 15, 100], [10, 10, 10, 10], 0))

In [35]:
t5 = table(train=vector(), test=length4_dataset)
t5.save("dataset/dataset_only_rank4_with_rank_10_delay_15_last_delay_100.db")

In [5]:
items_list = vector.range(num_items) ** 3
# rank3_dataset = vector()
delay = int(mean_delay / delta_t)
rank = int(mean_rank / delta_t)
last_delay = int(mean_last_delay / delta_t)
    
rank3_dataset = items_list.map(lambda items: (items, vector([delay, delay, delay, last_delay]), vector([rank, rank, rank]), 0))

print(len(rank3_dataset), rank3_dataset.sample())

t6 = table(train=vector(), test=rank3_dataset)
t6.save("dataset/dataset_only_rank3_with_rank_10_delay_15_last_delay_50.db")

216 ((5, 1, 1), [15, 15, 15, 50], [10, 10, 10], 0)
