In [None]:
# Personally I had to add the root folder of the repo to the sys.path.  If certain imports do not work you should uncomment and set the following.
# import sys
# sys.path.append('/root/of/repo/folder/')

# The relationship with storage capacity

In this experiment we evaluate the relationship between storage and performance for the different generators and strategies.  We perform a total of three runs for each setup to improve our confidence in the results.

The setup will use the worst performing edge node setup of 14 nodes to see what kind of improvements can be found by increasing the storage capacity.  In addition to the different strategies we also plot belady's min performance as a comparison.

In [None]:
from experiments.utils import make_dir, read_node_map

resource_file = "../dataset/out/dataset-resources-stats.csv"
pagemap_file = "../dataset/out/page-map-clean.csv"

node_map_14 = read_node_map('./node_setups/14nodes.json') 
out_dir = make_dir('./out/experiment-storage/')

belady_out_dir = make_dir(f"{out_dir}beladys/")
lru_out_dir =  make_dir(f"{out_dir}lru/")
cooplru_out_dir = make_dir(f"{out_dir}/cooplru/")
profiles_out_dir = make_dir(f"{out_dir}/profiles/")
federated_out_dir = make_dir(f"{out_dir}/federated/")

Some general configuration on the experiment.

In [None]:
no_users = 1000
no_iterations = 5000
no_runs = 10
trace_seeds = [ str(i) for i in range(no_runs) ]

def mib_to_bytes(mib: int):
    return mib * 1024 * 1024

capacities = [ mib_to_bytes(64), mib_to_bytes(128), mib_to_bytes(256), mib_to_bytes(512), mib_to_bytes(1024), mib_to_bytes(2048) ]

## Traces
In this experiment we want to look at the performance differences gained by increasing the storage capacity of our nodes to aid with the limited scope effect.  We use the worst performing setup with 14 edge nodes and evaluate the setup with different storage capacities per node.

In [None]:
from experiments.utils import load_or_generate_trace
from simulation.generator.main_zipf import TraceConfig, Simulation
from simulation.generator.main_page_map import UserTraceConfig, UserSimulation

def generate_zipf_trace(seed: str, zipf_exponent: float):
    trace_config = TraceConfig(node_map=node_map_14, seed=seed, no_users=no_users, no_iterations=no_iterations, zipf_exponent=zipf_exponent)
    simulation = Simulation(trace_config, resource_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=simulation)

def generate_page_map_trace(seed: str):
    trace_config = UserTraceConfig(node_map=node_map_14, seed=seed, no_users=no_users, no_iterations=no_iterations)
    user_simulation = UserSimulation(trace_config, pagemap_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=user_simulation)

## Strategies

### Belady's MIN

In [None]:
from simulation.evaluator.strategy.belady_min import run_belady
from experiments.utils import read_resource_map

def run_belady_experiment(trace, cache_capacity: int, marker: str = ""):
    run_belady(trace, read_resource_map(resource_file), cache_capacity, belady_out_dir, marker=f"n{len(node_map_14)}-{marker}")

### Custom Strategies

In [None]:
from experiments.utils import setup_nodes, setup_stats_file_writers, read_resource_map
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.strategy import CacheStrategy
from simulation.evaluator.strategy.lru import LRUStrategy
from simulation.evaluator.strategy.cooperative_lru import CooperativeLRUStrategy
from simulation.evaluator.strategy.profiles import ProfilesStrategy
from simulation.evaluator.strategy.federated import FederatedStrategy
from typing import Callable

create_lru_setup = lambda nodes: (LRUStrategy(nodes), lru_out_dir)
create_cooplru_setup = lambda nodes: (CooperativeLRUStrategy(nodes, node_trail_length=3), cooplru_out_dir)
create_profiles_setup = lambda nodes: (ProfilesStrategy(nodes, ranking_timeout=5, profile_size=10000), profiles_out_dir)
create_federated_setup = lambda nodes: (FederatedStrategy(nodes), federated_out_dir)

setups = [ create_lru_setup, create_cooplru_setup, create_profiles_setup, create_federated_setup ]

def run_strategy_experiment(trace, strategy_setup: Callable[[dict[str, dict[str, int]]], CacheStrategy], cache_capacity: int, marker: str = ""):
    nodes = setup_nodes(len(node_map_14), cache_capacity)
    strategy, strat_out_dir = strategy_setup(nodes)    
    stats_writers = setup_stats_file_writers(nodes, strat_out_dir, marker=f"n{len(nodes)}-{marker}")
    StrategyRunner(strategy, trace, read_resource_map(resource_file), stats_writers=stats_writers).perform()

## The Experiment

In [None]:
print("Pre-generating Traces")
for seed in trace_seeds:
    trace_075 = generate_zipf_trace(seed=seed, zipf_exponent=0.75)
    generate_page_map_trace(seed=seed)
    trace_130 = generate_zipf_trace(seed=seed, zipf_exponent=1.30)
print("All traces generated")

In [None]:
load_075_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=0.75), '075')
load_130_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=1.30), '130')
load_page_map_trace = lambda seed: (generate_page_map_trace(seed=seed), 'page-map')

trace_options = [ load_075_trace, load_130_trace, load_page_map_trace ]

def run_experiment(trace_seed: str, trace_loader, capacity: int):
    trace, trace_marker = trace_loader(trace_seed)
    print(trace_seed, trace_marker, capacity)
    run_belady_experiment(trace, capacity, marker=f"{trace_marker}-{capacity}b-{trace_seed}")
    for setup in setups:
        run_strategy_experiment(trace, setup, capacity, marker=f"{trace_marker}-{capacity}b-{trace_seed}")
    print(trace_seed, trace_marker, capacity, 'DONE')

In [None]:
# Make use of multiprocess (over multiprocessing) if an "AttributeError" says it couldn't find `run_experiment`.
from multiprocessing import Pool

if __name__ == '__main__':
    options = [ (seed, trace, capacity)
                for seed in trace_seeds 
                for trace in trace_options 
                for capacity in capacities ]
    print(f"Executing {len(options)} experiments...")
    with Pool(8) as p:
        p.starmap(run_experiment, options, chunksize=1)

## Plots


In [None]:
from experiments.utils import load_runs_in_dir
from palettable.colorbrewer.qualitative import Dark2_8
from palettable.colorbrewer.sequential import Greys_4
dark_8 = Dark2_8.mpl_colors
greys_4 = Greys_4.mpl_colors
import experiments.plotter.neat_plotter

belady_runs = load_runs_in_dir(belady_out_dir)
lru_runs = load_runs_in_dir(lru_out_dir)
cooplru_runs = load_runs_in_dir(cooplru_out_dir)
profiles_runs = load_runs_in_dir(profiles_out_dir)
federated_runs = load_runs_in_dir(federated_out_dir)

In [None]:
from experiments.utils import calc_ratio, calc_variance

def filter_runs_by(runs, match: str):
    return [ r for r in runs if match in str(r["source"]) ]

def calc_over_setups(runs, strategy: str, calculation) -> list[float]:
    setups = [ f"-{c}b-" for c in capacities ]
    datapoints = []
    for setup in setups:
        filtered_runs = filter_runs_by(filter_runs_by(runs, setup), strategy)
        values = [ calculation(run) for run in filtered_runs ]
        datapoints.append(calc_variance(values))
    return datapoints

calc_average_hit_ratio = lambda run: calc_ratio(run['hits_total'][-1], run['misses_total'][-1])
calc_average_byte_ratio = lambda run: calc_ratio(run['cache_bytes_total'][-1], run['origin_bytes_total'][-1])

In [None]:
calc_average_capacity_used = lambda run: run['cache_total'][-1] / 1024 / 1024

datapoints = []
for setup in [ '-075-', '-page-map-', '-130-' ]:
    filtered_runs = filter_runs_by(filter_runs_by(belady_runs, setup), '-4294967296b-')
    datapoints.append(calc_variance([ calc_average_capacity_used(run) for run in filtered_runs ]))
print(datapoints)

In [None]:
import matplotlib.pyplot as plt
from experiments.utils import generate_comparison_plot
from typing import Tuple
from palettable.colorbrewer.qualitative import Dark2_8
from palettable.colorbrewer.sequential import Greys_4
dark_8 = Dark2_8.mpl_colors
greys_4 = Greys_4.mpl_colors
colors = [ greys_4[2] ] + dark_8

def generate_plot(title: str, ylabel, strategies: dict[str, list[Tuple[float, float]]], ylim=(0, 1.0)):
    x_labels = [ c for c in capacities ]
    plt.figure(num=None, figsize=(3, 4), dpi=300)
    generate_comparison_plot(plt, x_labels, strategies, colors=colors, markers=['.', 'v', 's', 'p', 'P', '*', 'D'], linestyles=['dashed', 'solid', 'solid', 'solid', 'solid'])
    plt.ylim(ylim)
    plt.ylabel(ylabel)
    plt.xlabel('Cache Capacity (MiB, log base-2)')
    plt.title(title)
    plt.xscale('log', base=2)
    plt.xticks(x_labels, labels=['64', '128', '256', '512', '1024', '2048'], rotation=90)
    plt.tick_params(labelsize='10')
    plt.axis()
    plt.legend()
    plt.show()

def generate_plot_zipf130(title: str, ylabel, strategies: dict[str, list[Tuple[float, float]]], ylim=(0, 1.0)):
    x_labels = [ c for c in capacities ][:-1]
    plt.figure(num=None, figsize=(3, 4), dpi=300)
    generate_comparison_plot(plt, x_labels, strategies, colors=colors, markers=['.', 'v', 's', 'p', 'P', '*', 'D'], linestyles=['dashed', 'solid', 'solid', 'solid', 'solid'])
    plt.ylim(ylim)
    plt.ylabel(ylabel)
    plt.xlabel('Cache Capacity (MiB, log base-2)')
    plt.title(title)
    plt.xscale('log', base=2)
    plt.xticks(x_labels, labels=['64', '128', '256', '512', '1024', '2048'][:-1], rotation=90)
    plt.tick_params(labelsize='10')
    plt.axis()
    plt.legend()
    plt.show()

In [None]:
generate_plot(title="Average Hit Ratios Zipf-0.75", ylabel='Hit Ratio', strategies={
    "Belady's": calc_over_setups(belady_runs, '-075-', calc_average_hit_ratio),
    "LRU": calc_over_setups(lru_runs, '-075-', calc_average_hit_ratio),
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-075-', calc_average_hit_ratio),
    "Profiles": calc_over_setups(profiles_runs, '-075-', calc_average_hit_ratio),
    "Federated": calc_over_setups(federated_runs, '-075-', calc_average_hit_ratio)
})

In [None]:
generate_plot(title="Average Hit Ratios Page Map", ylabel='Hit Ratio', strategies={
    "Belady's": calc_over_setups(belady_runs, '-page-map-', calc_average_hit_ratio),
    "LRU": calc_over_setups(lru_runs, '-page-map-', calc_average_hit_ratio),
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-page-map-', calc_average_hit_ratio),
    "Profiles": calc_over_setups(profiles_runs, '-page-map-', calc_average_hit_ratio),
    "Federated": calc_over_setups(federated_runs, '-page-map-', calc_average_hit_ratio)
})

In [None]:
generate_plot_zipf130(title="Average Hit Ratios Zipf-1.30", ylabel='Hit Ratio', strategies={
    "Belady's": calc_over_setups(belady_runs, '-130-', calc_average_hit_ratio)[:-1],
    "LRU": calc_over_setups(lru_runs, '-130-', calc_average_hit_ratio)[:-1],
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-130-', calc_average_hit_ratio)[:-1],
    "Profiles": calc_over_setups(profiles_runs, '-130-', calc_average_hit_ratio)[:-1],
    "Federated": calc_over_setups(federated_runs, '-130-', calc_average_hit_ratio)[:-1]
}, ylim=(0.5, 1.0))

In [None]:
generate_plot(title="Average Bandwidth Saved Zipf-0.75", ylabel='Fraction of Bandwidth Saved', strategies={
    "Belady's": calc_over_setups(belady_runs, '-075-', calc_average_byte_ratio),
    "LRU": calc_over_setups(lru_runs, '-075-', calc_average_byte_ratio),
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-075-', calc_average_byte_ratio),
    "Profiles": calc_over_setups(profiles_runs, '-075-', calc_average_byte_ratio),
    "Federated": calc_over_setups(federated_runs, '-075-', calc_average_byte_ratio)
})

In [None]:
generate_plot(title="Average Bandwidth Saved Page-Map", ylabel='Fraction of Bandwidth Saved', strategies={
    "Belady's": calc_over_setups(belady_runs, '-page-map-', calc_average_byte_ratio),
    "LRU": calc_over_setups(lru_runs, '-page-map-', calc_average_byte_ratio),
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-page-map-', calc_average_byte_ratio),
    "Profiles": calc_over_setups(profiles_runs, '-page-map-', calc_average_byte_ratio),
    "Federated": calc_over_setups(federated_runs, '-page-map-', calc_average_byte_ratio)
})

In [None]:
generate_plot_zipf130(title="Average Bandwidth Saved Zipf-1.30", ylabel='Fraction of Bandwidth Saved', strategies={
    "Belady's": calc_over_setups(belady_runs, '-130-', calc_average_byte_ratio)[:-1],
    "LRU": calc_over_setups(lru_runs, '-130-', calc_average_byte_ratio)[:-1],
    "Co-Op LRU": calc_over_setups(cooplru_runs, '-130-', calc_average_byte_ratio)[:-1],
    "Profiles": calc_over_setups(profiles_runs, '-130-', calc_average_byte_ratio)[:-1],
    "Federated": calc_over_setups(federated_runs, '-130-', calc_average_byte_ratio)[:-1]
}, ylim=(0.5, 1.0))