In [2]:
import os
import sys
import re
import time 
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from generation_utils import *
if '..' not in sys.path: sys.path.insert(0, '..')
from encore.model import Decoder, Sequential
from utils.initialization import *
from utils.distribution_utils import *
from utils.eval import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

ImportError: cannot import name 'Decoder' from 'encore.model' (/mnt/ssd1/encore/open-source/evaluation/../encore/model.py)

In [3]:
os.chdir(sys.path[0])
dataset_dir = './data/raw/'
size_dir = './data/size/'
interval_dir = './data/interval/'
metadata_dir = './data/metadata/'
size_cdf = pd.read_csv('./data/cdf/size_cdf.csv')
interval_cdf = pd.read_csv('./data/cdf/interval_cdf.csv')
n_size = len(size_cdf) - 1
n_interval = len(interval_cdf) - 1
files = os.listdir(size_dir)
block_size = 30

file = 'app_182.txt'
app = file.strip('.txt')
data = get_data(size_dir, interval_dir, file, n_interval)

In [4]:
def get_sorted_files(app_dir):
    def extract_number(file_path):
        match = re.search(r'/(\d+)\.csv$', file_path)
        if match:
            return int(match.group(1))
        return None
    files = os.listdir(app_dir)
    files = [os.path.join(app_dir, f) for f in files]
    files = sorted(files, key=extract_number)
    return files


def save_trace_data(trace, save_file):
    trace.sort_values(by='time', inplace=True)
    trace.reset_index(drop=True, inplace=True)
    if save_file is not None:
        with open(save_file, 'w') as f:
            f.write('{}\n'.format(len(trace)))
            for row in trace.itertuples():
                f.write('{:d} {:.6f} {:d}\n'.format(row.pair, row.time, int(row.size)))


def get_trace(trace_id, size_index, metadata, interval_index=None):
    size_sequence = sample_sequence(size_index, size_cdf['size'].values)
    size_sequence = np.array(size_sequence, dtype=int)
    if interval_index is not None:
        interval_sequence = sample_sequence(interval_index, interval_cdf['interval'].values)
        interval_sequence = np.array(interval_sequence, dtype=float)
    else:
        mean_size = np.mean(size_sequence)
        mean_interval = mean_size / metadata['load']
        interval_sequence = np.random.exponential(mean_interval, 999)
    interval_sequence = interval_sequence.astype(float)
    time_sequence = np.cumsum(interval_sequence)
    time_sequence = np.concatenate(([0], time_sequence))
    time_sequence += metadata['start_time']
    flow_num = int(metadata['flow_num'])
    pair_trace = pd.DataFrame({'pair': np.zeros(flow_num), 'time': time_sequence[0:flow_num], 'size': size_sequence[0:flow_num]})
    pair_trace['pair'] = trace_id
    pair_trace = pair_trace[pair_trace['time'] <= metadata['end_time']]
    return pair_trace

In [None]:
def generate_real_trace(app, save_file=None):
    app_dir = os.path.join(dataset_dir, app)
    files = get_sorted_files(app_dir)
    trace = pd.DataFrame()
    for i, f in enumerate(files):
        df = pd.read_csv(f, sep=',')
        df['pair'] = i
        trace = pd.concat([trace, df])
    save_trace_data(trace, save_file)
    return trace

In [None]:
def generate_common_practice(app, save_file=None):
    file = app + '.txt'
    data = get_data(size_dir, interval_dir, file, n_interval)
    all_data = np.concatenate(data)
    all_sizes = all_data // n_interval
    size_dist = compute_probability_distribution(all_sizes, n_size)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    trace = pd.DataFrame()
    for i, item in metadata.iterrows():
        np.random.seed(i)
        size_index = np.random.choice(n_size, 1000, p=size_dist)
        pair_trace = get_trace(i, size_index, item)
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [5]:
def generate_sample(app, save_file=None):
    file = app + '.txt'
    data = get_data(size_dir, interval_dir, file, n_interval)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    trace = pd.DataFrame()
    for i, item in metadata.iterrows():
        np.random.seed(i)
        seq = data[i]
        seq = np.append(seq[:-1], seq[0:block_size - 1])
        size_index = seq // n_interval
        interval_index = seq % n_interval
        interval_index = np.random.permutation(interval_index)
        pair_trace = get_trace(i, size_index, item, interval_index)
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [None]:
def generate_size_sample(app, save_file=None):
    file = app + '.txt'
    data = get_data(size_dir, interval_dir, file, n_interval)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    app_dir = os.path.join(dataset_dir, app)
    files = get_sorted_files(app_dir)
    trace = pd.DataFrame()
    for i, item in metadata.iterrows():
        size_index = data[i] // n_interval
        size_sequence = sample_sequence(size_index, size_cdf['size'].values)
        size_sequence = np.array(size_sequence, dtype=int)
        real_data = pd.read_csv(files[i], sep=',')
        time_sequence = real_data['time'].values
        flow_num = int(item['flow_num'])
        size_sequence = size_sequence[0:flow_num]
        time_sequence = time_sequence[0:flow_num]
        pair_trace = pd.DataFrame({'pair': np.zeros(flow_num), 'time': time_sequence, 'size': size_sequence})
        pair_trace['pair'] = i
        pair_trace = pair_trace[pair_trace['time'] <= item['end_time']]
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [6]:
def generate_lomas(app, save_file=None):
    dists_lomas, lomas_word_prob = gen_dists_lomas('checkpoints/lomas', n_size, n_interval, app)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    trace = pd.DataFrame()
    for i, item in metadata.iterrows():
        lomas_word_prob[i] = lomas_word_prob[i] / np.sum(lomas_word_prob[i])
        seq = generate_sequence_lomas(lomas_word_prob[i], 1000, i)
        size_index = seq // n_interval
        interval_index = seq % n_interval
        pair_trace = get_trace(i, size_index, item, interval_index)
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [None]:
def generate_cvae(app, model_path, save_file=None):
    decoder = load_cvae(model_path, n_size, n_interval, device)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    loads = get_loads(metadata_dir + app + '.csv', normalize=True)
    num_sample = len(metadata)
    size_recon, interval_recon = gen_dists_encore(decoder, loads, device)
    trace = pd.DataFrame()
    seq_len = 1000
    for i, item in metadata.iterrows():
        np.random.seed(i)
        size_index = np.random.choice(n_size, seq_len, p=size_recon[i])
        interval_index = np.random.choice(n_interval, seq_len, p=interval_recon[i])
        pair_trace = get_trace(i, size_index, item, interval_index)
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [None]:
def generate_gru(app, model_path, save_file=None):
    model = load_gru(model_path, device)
    file = app + '.txt'
    data = get_data(size_dir, interval_dir, file, n_interval)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    trace = pd.DataFrame()
    seq_len = 1000
    initial_seed = 0
    for i, item in tqdm(metadata.iterrows()):
        np.random.seed(i)
        seq = data[i]
        seq = np.append(seq[:-1], seq[0:block_size-1])
        size_dist, interval_dist = get_probability_distributions_from_sequence(seq, n_size, n_interval)
        sequence_gen = generate_sequence_encore(model, size_dist, interval_dist, block_size, device, seq_len, initial_seed)
        size_index = np.array(sequence_gen) // n_interval
        interval_index = np.array(sequence_gen) % n_interval
        pair_trace = get_trace(i, size_index, item, interval_index)
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [7]:
def generate_encore(app, gru_path, cvae_path, save_file=None):
    sequential = load_gru(gru_path, device)
    decoder = load_cvae(cvae_path, n_size, n_interval, device)
    metadata = pd.read_csv(metadata_dir + app + '.csv')
    loads = get_loads(metadata_dir + app + '.csv', normalize=True)
    size_recon, interval_recon = gen_dists_encore(decoder, loads, device)
    trace = pd.DataFrame()
    seq_len = 1000
    initial_seed = 0
    for i, item in tqdm(metadata.iterrows()):
        np.random.seed(i)
        sequence_gen = generate_sequence_encore(sequential, size_recon[i], interval_recon[i], block_size, device, seq_len, initial_seed)
        size_index = np.array(sequence_gen) // n_interval
        interval_index = np.array(sequence_gen) % n_interval
        pair_trace = get_trace(i, size_index, item, interval_index)
        trace = pd.concat([trace, pair_trace], ignore_index=True)
    save_trace_data(trace, save_file)
    return trace

In [8]:
app = 'app_182'
model_date = '2024-5-14-20'
gru_path = 'gru-{date}/{app}'.format(app=app, date=model_date)
cvae_path = 'cvae-{date}/{app}'.format(app=app, date=model_date)
real_trace_file = './simulation/data/trace/real_' + app + '.txt'
common_practice_file = './simulation/data/trace/common_' + app + '.txt'
sample_file = './simulation/data/trace/sample_' + app + '.txt'
size_sample_file = './simulation/data/trace/size_sample_' + app + '.txt'
gru_file = './simulation/data/trace/gru_' + app + '.txt'
cvae_file = './simulation/data/trace/cvae_' + app + '.txt'
encore_file = './simulation/data/trace/encore_' + app + '.txt'
lomas_file = './simulation/data/trace/lomas_' + app + '.txt'

real_trace = generate_real_trace(app, real_trace_file)
common_practice = generate_common_practice(app, common_practice_file)
sample = generate_sample(app, sample_file)
size_sample = generate_size_sample(app, size_sample_file)
lomas = generate_lomas(app, lomas_file)
gru = generate_gru(app, gru_path, gru_file)
cvae = generate_cvae(app, cvae_path, cvae_file)
encore = generate_encore(app, gru_path, cvae_path, encore_file)

115it [01:47,  1.07it/s]
115it [01:48,  1.06it/s]
