In [None]:
# Personally I had to add the root folder of the repo to the sys.path.  If certain imports do not work you should uncomment and set the following.
# import sys
# sys.path.append('/root/of/repo/folder/')

# The Density Experiment

The main reason for the introduction of different strategies is because of an issue, noted by previous works, where due to a large content churn and small population per edge node, reactive caching becomes unfeasible on smaller nodes. 

In this experiment we evaluate 4 different setups of edge nodes while keeping the number of users equal.  This means that in setups with more nodes the amount of users per node will decrease as will the number of requests.  We will use the LRU and Belady strategies to prove that this is indeed a problem.  To improve our confidence in the results we only plot the results after 10 runs with a confidence interval based on the standard deviation.

In [None]:
from .utils import load_or_generate_trace, read_resource_map, setup_stats_file_writers, make_dir, read_node_map, TraceIteratorProxy, calc_variance
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.lru import LRUStrategy

resource_file = "../dataset/out/dataset-resources-stats.csv"
pagemap_file = "../dataset/out/page-map-clean.csv"

node_maps = { 
    1: read_node_map('./node_setups/1node.json'), 
    3: read_node_map('./node_setups/3nodes.json'), 
    8: read_node_map('./node_setups/8nodes.json'), 
    14: read_node_map('./node_setups/14nodes.json') 
}

out_dir = make_dir('./out/experiment-hybrid/')

lru_out_dir =  make_dir(f"{out_dir}lru/")
cooplru_out_dir = make_dir(f"{out_dir}/cooplru/")
profiles_out_dir = make_dir(f"{out_dir}/profiles/")

In [None]:
no_users = 1000
no_iterations = 5000
no_runs = 8
trace_seeds = [ str(i) for i in range(no_runs) ]

## Traces
We generate our traces using the largest `node_map`.  Then later on we use a `TraceIteratorProxy` to make sure that we map the right nodes to match the smaller edge setups.  The traces are generated using 5000 users over 1000 iterations for three different generators: _zipf of 0.75_, _zipf of 1.3_ and _page map_.

In [None]:
from simulation.generator.main_zipf import TraceConfig, Simulation
from simulation.generator.main_page_map import UserTraceConfig, UserSimulation

def generate_zipf_trace(seed: str, zipf_exponent: float):
    trace_config = TraceConfig(node_map=node_maps[14], seed=seed, no_users=no_users, no_iterations=no_iterations, zipf_exponent=zipf_exponent)
    simulation = Simulation(trace_config, resource_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=simulation)

def generate_page_map_trace(seed: str):
    trace_config = UserTraceConfig(node_map=node_maps[14], seed=seed, no_users=no_users, no_iterations=no_iterations)
    user_simulation = UserSimulation(trace_config, pagemap_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=user_simulation)

## Proxy Setups
We set up a proxy map that matches the grouping of our nodes outlined in the Hybrid Experiment in the associated publication.

In [None]:
proxy_map = { "cdn1": "cdn1", "cdn2": "cdn1", "cdn3": "cdn3", "cdn4": "cdn1", "cdn5": "cdn2", "cdn6": "cdn1", "cdn7": "cdn3", "cdn8": "cdn3", "cdn9": "cdn2", "cdn10": "cdn1", "cdn11": "cdn2", "cdn12": "cdn2", "cdn13": "cdn3", "cdn14": "cdn3" 
}

## Strategies

In [None]:
from .utils import setup_stats_file_writers, read_resource_map
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.strategy import CacheStrategy
from simulation.evaluator.strategy.lru import LRUStrategy
from simulation.evaluator.strategy.cooperative_lru import CooperativeLRUStrategy
from simulation.evaluator.strategy.profiles import ProfilesStrategy
from typing import Callable

create_lru_setup = lambda nodes: (LRUStrategy(nodes), lru_out_dir)
create_cooplru_setup = lambda nodes: (CooperativeLRUStrategy(nodes, node_trail_length=3), cooplru_out_dir)
create_profiles_setup = lambda nodes: (ProfilesStrategy(nodes, ranking_timeout=5, profile_size=10000), profiles_out_dir)

setups = [ create_lru_setup, create_cooplru_setup, create_profiles_setup ]

node_capacity = 1024*1024*1024
node_setup = {
    "cdn1": { "capacity": 5*node_capacity },
    "cdn2": { "capacity": 4*node_capacity },
    "cdn3": { "capacity": 5*node_capacity }
}

def run_strategy_experiment(trace, strategy_setup: Callable[[dict[str, dict[str, int]]], CacheStrategy], marker: str = ""):
    trace_proxy = TraceIteratorProxy(trace.instructions, 
                                     proxy_map=proxy_map)
    strategy, strat_out_dir = strategy_setup(node_setup)    
    stats_writers = setup_stats_file_writers(node_setup, strat_out_dir, marker=f"n{len(node_setup)}-{marker}")
    StrategyRunner(strategy, trace_proxy, read_resource_map(resource_file), stats_writers=stats_writers).perform()

## The Experiment

In [None]:
print("Pre-generating Traces")
for seed in trace_seeds:
    generate_zipf_trace(seed=seed, zipf_exponent=0.75)
    generate_page_map_trace(seed=seed)
    generate_zipf_trace(seed=seed, zipf_exponent=1.30)
print("All traces generated")

In [None]:
load_075_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=0.75), '075')
load_130_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=1.30), '130')
load_page_map_trace = lambda seed: (generate_page_map_trace(seed=seed), 'page-map')

trace_options = [ load_075_trace, load_130_trace, load_page_map_trace ]

def run_experiment(trace_seed: str, trace_loader):
    trace, trace_marker = trace_loader(trace_seed)
    print(trace_seed, trace_marker, 'STARTED')
    for setup in setups:
        run_strategy_experiment(trace, setup, marker=f"{trace_marker}-{trace_seed}")
    print(trace_seed, trace_marker, 'DONE')

In [None]:
# Make use of multiprocess (over multiprocessing) if an "AttributeError" says it couldn't find `run_experiment`.
from multiprocessing import Pool

if __name__ == '__main__':
    options = [ (seed, trace) 
                for seed in trace_seeds
                for trace in trace_options ]
    print(f"Executing {len(options)} experiments...")
    with Pool(8) as p:
        p.starmap(run_experiment, options, chunksize=1)

## Plots
To visualise the results we create two plots that show the main performance metrics: hit ratio and backhaul cost.

But first we have to load in all the data from the runs above.

In [None]:
from .utils import load_runs_in_dir, calc_ratio

def filter_runs_by(runs, match: str):
    return [ r for r in runs if match in str(r["source"]) ]

lru_runs = load_runs_in_dir(lru_out_dir)
# Load non-hybrid runs with 128MiB for comparison, these are loaded from the storage experiment.
cooplru_nh_runs = filter_runs_by(load_runs_in_dir(f"./out/experiment-storage/cooplru/"), '-134217728b-')
profiles_nh_runs = filter_runs_by(load_runs_in_dir(f"./out/experiment-storage/profiles/"), '-134217728b-')
# Load official runs.
cooplru_runs = load_runs_in_dir(cooplru_out_dir)
profiles_runs = load_runs_in_dir(profiles_out_dir)

In [None]:
def calc_over_setups(runs, strategy: str, calculation) -> list[float]:
    filtered_runs = filter_runs_by(runs, strategy)
    return [ calculation(run) for run in filtered_runs ]

calc_average_hit_ratio = lambda run: calc_ratio(run['hits_total'][-1], run['misses_total'][-1])
calc_average_byte_ratio = lambda run: calc_ratio(run['cache_bytes_total'][-1], run['origin_bytes_total'][-1])
calc_average_neighbour_to_total = lambda run: run['requests_to_neighbours'][-1] / (run['hits_total'][-1] + run['misses_total'][-1])
calc_average_neighbour_bytes = lambda run: run['neighbour_bytes_total'][-1] / (run['cache_bytes_total'][-1] + run['origin_bytes_total'][-1])
calc_average_neighbour_ratio = lambda run: calc_ratio(run['requests_to_neighbours_success'][-1], run['requests_to_neighbours'][-1])
norm_neighbour_success = lambda run: calc_average_neighbour_ratio(run) * calc_average_neighbour_to_total(run)

In [None]:
from typing import Tuple

def pretty_print_variance(variance: Tuple[float, float]) -> str:
    return f"{round(variance[0],3)}±{round(variance[1], 3)}"

generators = {
    'ZipF-0.75': '-075-',
    'Page-Map': '-page-map-',
    'ZipF-1.30': '-130-'
}

calculations = {
    'Average Hit Ratios': calc_average_hit_ratio,
    # 'Internal Requests': calc_average_neighbour_to_total,
    'Average Bandwidth Savings': calc_average_byte_ratio,
    # 'Internal Bandwidth': calc_average_neighbour_bytes,
}

for gen, generator in generators.items():
    for calc, calculation in calculations.items():
        strategies={
            "Co-Op LRU NonHybrid": calc_over_setups(cooplru_nh_runs, generator, calculation),
            "Profiles NonHybrid": calc_over_setups(profiles_nh_runs, generator, calculation),
            "LRU": calc_over_setups(lru_runs, generator, calculation),
            "Co-Op LRU": calc_over_setups(cooplru_runs, generator, calculation),
            "Profiles": calc_over_setups(profiles_runs, generator, calculation),
        }
        print(f"{calc} {gen}")
        # for key, values in strategies.items():
            # print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
        print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))