In [None]:
# Personally I had to add the root folder of the repo to the sys.path.  If certain imports do not work you should uncomment and set the following.
# import sys
# sys.path.append('/root/of/repo/folder/')

# The Density Experiment

The main reason for the introduction of different strategies is because of an issue, noted by previous works, where due to a large content churn and small population per edge node, reactive caching becomes unfeasible on smaller nodes. 

In this experiment we evaluate 4 different setups of edge nodes while keeping the number of users equal.  This means that in setups with more nodes the amount of users per node will decrease as will the number of requests.  We will use the LRU and Belady strategies to prove that this is indeed a problem.  To improve our confidence in the results we only plot the results after 10 runs with a confidence interval based on the standard deviation.

In [None]:
from .utils import load_or_generate_trace, read_resource_map, setup_nodes, setup_stats_file_writers, make_dir, read_node_map, TraceIteratorProxy, calc_variance
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.lru import LRUStrategy

resource_file = "../dataset/out/dataset-resources-stats.csv"
pagemap_file = "../dataset/out/page-map-clean.csv"

node_maps = { 
    1: read_node_map('./node_setups/1node.json'), 
    3: read_node_map('./node_setups/3nodes.json'), 
    8: read_node_map('./node_setups/8nodes.json'), 
    14: read_node_map('./node_setups/14nodes.json') 
}

out_dir = make_dir('./out/experiment-density/')

belady_out_dir = make_dir(f"{out_dir}beladys/")
lru_out_dir =  make_dir(f"{out_dir}lru/")
cooplru_out_dir = make_dir(f"{out_dir}cooplru/")
profiles_out_dir = make_dir(f"{out_dir}profiles/")
profiles_size100_out_dir = make_dir(f"{out_dir}profiles-size100/")
federated_out_dir = make_dir(f"{out_dir}federated/")

In [None]:
no_users = 1000
no_iterations = 5000
no_runs = 8
trace_seeds = [ str(i) for i in range(no_runs) ]

## Traces
We generate our traces using the largest `node_map`.  Then later on we use a `TraceIteratorProxy` to make sure that we map the right nodes to match the smaller edge setups.  The traces are generated using 5000 users over 1000 iterations for three different generators: _zipf of 0.75_, _zipf of 1.3_ and _page map_.

In [None]:
from simulation.generator.main_zipf import TraceConfig, Simulation
from simulation.generator.main_page_map import UserTraceConfig, UserSimulation

def generate_zipf_trace(seed: str, zipf_exponent: float):
    trace_config = TraceConfig(node_map=node_maps[14], seed=seed, no_users=no_users, no_iterations=no_iterations, zipf_exponent=zipf_exponent)
    simulation = Simulation(trace_config, resource_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=simulation)

def generate_page_map_trace(seed: str):
    trace_config = UserTraceConfig(node_map=node_maps[14], seed=seed, no_users=no_users, no_iterations=no_iterations)
    user_simulation = UserSimulation(trace_config, pagemap_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=user_simulation)

## Proxy Setups
We set up three proxy maps to allow us to use the 1 node, 3 nodes, or 8 nodes setup with the 14 nodes traces.

In [None]:
proxy_map = {
    1: { f"cdn{i + 1}": "cdn1" for i in range(14) },
    3: { "cdn1": "cdn1", "cdn2": "cdn2", "cdn3": "cdn3", "cdn4": "cdn2", "cdn5": "cdn2", "cdn6": "cdn1", "cdn7": "cdn3", "cdn8": "cdn3", "cdn9": "cdn1", "cdn10": "cdn2", "cdn11": "cdn1", "cdn12": "cdn3", "cdn13": "cdn3", "cdn14": "cdn3" },
    8: { "cdn1": "cdn1", "cdn2": "cdn2", "cdn3": "cdn3", "cdn4": "cdn4", "cdn5": "cdn5", "cdn6": "cdn6", "cdn7": "cdn7", "cdn8": "cdn8", "cdn9": "cdn5", "cdn10": "cdn4", "cdn11": "cdn1", "cdn12": "cdn8", "cdn13": "cdn7", "cdn14": "cdn3" },
    14: { f"cdn{i + 1}": f"cdn{i + 1}" for i in range(14) },
}

## Strategies

In [None]:
from simulation.evaluator.strategy.belady_min import run_belady
from .utils import read_resource_map

def run_belady_experiment(trace, no_nodes: int, marker: str = ""):
    trace_proxy = TraceIteratorProxy(trace.instructions, 
                                     proxy_map=proxy_map[no_nodes])
    run_belady(trace_proxy, read_resource_map(resource_file), int(1024*1024*1024 / no_nodes), belady_out_dir, marker=f"n{no_nodes}-{marker}")

In [None]:
from .utils import setup_nodes, setup_stats_file_writers, read_resource_map
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.strategy import CacheStrategy
from simulation.evaluator.strategy.lru import LRUStrategy
from simulation.evaluator.strategy.cooperative_lru import CooperativeLRUStrategy
from simulation.evaluator.strategy.profiles import ProfilesStrategy
from typing import Callable

create_lru_setup = lambda nodes: (LRUStrategy(nodes), lru_out_dir)
create_cooplru_setup = lambda nodes: (CooperativeLRUStrategy(nodes, node_trail_length=3), cooplru_out_dir)
create_profiles_setup = lambda nodes: (ProfilesStrategy(nodes, ranking_timeout=5, profile_size=10000), profiles_out_dir)
create_profiles_setup_100 = lambda nodes: (ProfilesStrategy(nodes, ranking_timeout=5, profile_size=100), profiles_size100_out_dir)

setups = [ create_lru_setup, create_cooplru_setup, create_profiles_setup_100, create_profiles_setup ]

def run_strategy_experiment(trace, strategy_setup: Callable[[dict[str, dict[str, int]]], CacheStrategy], no_nodes: int, marker: str = ""):
    trace_proxy = TraceIteratorProxy(trace.instructions, 
                                     proxy_map=proxy_map[no_nodes])
    nodes = setup_nodes(no_nodes, int(1024*1024*1024 / no_nodes))
    strategy, strat_out_dir = strategy_setup(nodes)    
    stats_writers = setup_stats_file_writers(nodes, strat_out_dir, marker=f"n{len(nodes)}-{marker}")
    StrategyRunner(strategy, trace_proxy, read_resource_map(resource_file), stats_writers=stats_writers).perform()

## The Experiment

Pregenerate the traces and save them to file so that they are available for each thread.

In [None]:
print("Pre-generating Traces")
for seed in trace_seeds:
    generate_zipf_trace(seed=seed, zipf_exponent=0.75)
    generate_page_map_trace(seed=seed)
    generate_zipf_trace(seed=seed, zipf_exponent=1.30)
print("All traces generated")

In [None]:
load_page_map_trace = lambda seed: (generate_page_map_trace(seed=seed), 'page-map')

trace_options = [ load_page_map_trace ]

def run_experiment(trace_seed: str, trace_loader, no_nodes: int):
    trace, trace_marker = trace_loader(trace_seed)
    print(trace_seed, trace_marker, no_nodes, 'STARTED')
    run_belady_experiment(trace, no_nodes, marker=f"{trace_marker}-{trace_seed}")
    for setup in setups:
        run_strategy_experiment(trace, setup, no_nodes, marker=f"{trace_marker}-{trace_seed}")
    print(trace_seed, trace_marker, no_nodes, 'DONE')

In [None]:
# Make use of multiprocess (over multiprocessing) if an "AttributeError" says it couldn't find `run_experiment`.
from multiprocessing import Pool

if __name__ == '__main__':
    options = [ (seed, trace, no_nodes) 
                for seed in trace_seeds
                for trace in trace_options 
                for no_nodes in node_maps.keys() ]
    print(f"Executing {len(options)} experiments...")
    with Pool(4) as p:
        p.starmap(run_experiment, options, chunksize=1)

## Plots
To visualise the results we create two plots that show the main performance metrics: hit ratio and backhaul cost.

But first we have to load in all the data from the runs above.

In [None]:
from .utils import load_runs_in_dir, calc_ratio
import matplotlib.pyplot as plt
from palettable.colorbrewer.diverging import PuOr_4
puor_4 = PuOr_4.mpl_colors
puor_4[3] = PuOr_4.mpl_colors[2]
puor_4[2] = PuOr_4.mpl_colors[3]
import experiments.plotter.neat_plotter

belady_runs = load_runs_in_dir(belady_out_dir)
lru_runs = load_runs_in_dir(lru_out_dir)
cooplru_runs = load_runs_in_dir(cooplru_out_dir)
profiles_runs = load_runs_in_dir(profiles_out_dir)
profiles_runs_size100 = load_runs_in_dir(profiles_size100_out_dir)

In [None]:
def filter_runs_by(runs, match: str):
    return [ r for r in runs if match in str(r["source"]) ]

def calc_over_setups(runs, strategy: str, calculation) -> list[float]:
    setups = [ "-n1-", "-n3-", "-n8-", "-n14-" ]
    datapoints = []
    for setup in setups:
        filtered_runs = filter_runs_by(filter_runs_by(runs, setup), strategy)
        values = [ calculation(run) for run in filtered_runs ]
        datapoints.append(calc_variance(values))
    return datapoints

calc_average_hit_ratio = lambda run: calc_ratio(run['hits_total'][-1], run['misses_total'][-1])
calc_average_byte_ratio = lambda run: calc_ratio(run['cache_bytes_total'][-1], run['origin_bytes_total'][-1])
calc_average_neighbour_to_total = lambda run: run['requests_to_neighbours'][-1] / (run['hits_total'][-1] + run['misses_total'][-1])
calc_average_neighbour_bytes = lambda run: run['neighbour_bytes_total'][-1] / (run['cache_bytes_total'][-1] + run['origin_bytes_total'][-1])
calc_average_neighbour_ratio = lambda run: calc_ratio(run['requests_to_neighbours_success'][-1], run['requests_to_neighbours'][-1])
norm_neighbour_success = lambda run: calc_average_neighbour_ratio(run) * calc_average_neighbour_to_total(run)

In [None]:
from typing import Tuple
from .utils import generate_comparison_plot

from palettable.colorbrewer.qualitative import Dark2_8
from palettable.colorbrewer.sequential import Greys_4
dark_8 = Dark2_8.mpl_colors
greys_4 = Greys_4.mpl_colors
colors = [ greys_4[2] ] + dark_8[:3] + [ dark_8[2] ]

def generate_plot(title: str, ylabel, strategies: dict[str, list[Tuple[float, float]]], colors, markers: list[str] = None, linestyles: list[str] = None):
    if markers == None:
        markers = ['.'] * len(strategies)
    if linestyles == None:
        linestyles = ['solid'] * len(strategies)
    x_labels = [ round(no_users / no_nodes, 1) for no_nodes in node_maps.keys() ]
    plt.figure(num=None, figsize=(3, 4), dpi=300)
    generate_comparison_plot(plt, x_labels, strategies, colors=colors, markers=markers, linestyles=linestyles)
    plt.ylim(bottom=0, top=1.0)
    plt.ylabel(ylabel)
    plt.xlabel('Users per Node')
    plt.title(title)
    plt.xticks(x_labels, rotation=60)
    plt.legend()
    plt.show()

In [None]:
generators = {
    'Page-Map': '-page-map-',
}

calculations = {
    'Average Hit Ratios': ('Hit Ratio', calc_average_hit_ratio),
    'Average Bandwidth Savings': ('Bandwidth Savings', calc_average_byte_ratio),
}

for gen, generator in generators.items():
    for calc, (ylabel, calculation) in calculations.items():
        generate_plot(title=f"{calc} {gen}", ylabel=ylabel, strategies={
            "Belady's": calc_over_setups(belady_runs, generator, calculation),
            "LRU": calc_over_setups(lru_runs, generator, calculation),
            "Co-Op LRU": calc_over_setups(cooplru_runs, generator, calculation),
            "Profiles (Size=10000)": calc_over_setups(profiles_runs, generator, calculation),
            "Profiles (Size=100)": calc_over_setups(profiles_runs_size100, generator, calculation),
        }, colors=colors, markers=['.', 'v', 's', 'p', 'p', 'P'], linestyles=['dashed', 'solid', 'solid', 'solid', 'dashed', 'solid'])

In [None]:
from palettable.colorbrewer.diverging import PuOr_4
puor_4 = PuOr_4.mpl_colors

generate_plot(title="Internal Requests Page Map", ylabel='Internal Request Ratio', strategies={
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-page-map-', calc_average_neighbour_to_total),
    "Co-Op LRU Success": calc_over_setups(cooplru_runs, '-page-map-', norm_neighbour_success),
    "Profiles": calc_over_setups(profiles_runs, '-page-map-', calc_average_neighbour_to_total),
    "Profiles Success": calc_over_setups(profiles_runs, '-page-map-', norm_neighbour_success),
}, markers=['s', 's', 'p', 'p', 'P', 'P'], colors=[ dark_8[1], greys_4[1], dark_8[2], greys_4[2], dark_8[3] ], linestyles=['solid', 'dashed', 'solid', 'dashed', 'solid', 'dashed'] )