In [None]:
# Personally I had to add the root folder of the repo to the sys.path.  If certain imports do not work you should uncomment and set the following.
# import sys
# sys.path.append('/root/of/repo/folder/')

# The workings of the Cooperative LRU Strategy

In this small scale experiment we try to expose the effect of a different `node_trail_length`.  We compare it against a baseline LRU strategy. We will be experimenting with a `node_trail_length` of `1`, `2`, and `3` as well as an adapted version that uses all neighbouring nodes.

### Experiment Configuration

In [None]:
no_users = 1000
no_iterations = 5000
no_runs = 10
trace_seeds = [ str(i) for i in range(no_runs) ]
trail_lengths = [ 1, 2, 3 ]

In [None]:
from experiments.utils import make_dir, read_node_map

resource_file = "../dataset/out/dataset-resources-stats.csv"
pagemap_file = "../dataset/out/page-map-clean.csv"

node_map_14 = read_node_map('./node_setups/14nodes.json') 
out_dir = make_dir('./out/experiment-cooplru/')

lru_out_dir =  make_dir(f"{out_dir}lru/")
cooplru_out_dir = make_dir(f"{out_dir}/cooplru/")
neighbouring_lru_out_dir = make_dir(f"{out_dir}/neighbouringlru/")

## Traces

For this experiment we will use both Zipf and Page-Map traces.

In [None]:
from experiments.utils import load_or_generate_trace
from simulation.generator.main_zipf import TraceConfig, Simulation
from simulation.generator.main_page_map import UserTraceConfig, UserSimulation

def generate_zipf_trace(seed: str, zipf_exponent: float):
    trace_config = TraceConfig(node_map=node_map_14, seed=seed, no_users=no_users, no_iterations=no_iterations, zipf_exponent=zipf_exponent)
    simulation = Simulation(trace_config, resource_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=simulation)

def generate_page_map_trace(seed: str):
    trace_config = UserTraceConfig(node_map=node_map_14, seed=seed, no_users=no_users, no_iterations=no_iterations)
    user_simulation = UserSimulation(trace_config, pagemap_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=user_simulation)

## Cooperative Strategy

In [None]:
from experiments.utils import setup_nodes, setup_stats_file_writers, read_resource_map
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.strategy import CacheStrategy
from simulation.evaluator.strategy.lru import LRUStrategy
from simulation.evaluator.strategy.neighbouring_lru import NeighbouringLRUStrategy
from simulation.evaluator.strategy.cooperative_lru import CooperativeLRUStrategy
from typing import Callable

for node_trail in trail_lengths:
    # To prevent two threads from trying to do the same, pre-make the dirs.
    make_dir(f"{cooplru_out_dir}/node_{node_trail}")

create_lru_setup = lambda nodes: (LRUStrategy(nodes), lru_out_dir)
create_neighbouring_lru_setup = lambda nodes: (NeighbouringLRUStrategy(nodes, node_map={ node.identifier: node.neighbours for node in node_map_14.values() }), neighbouring_lru_out_dir)
create_cooplru_setup = lambda node_trail_length: lambda nodes: (CooperativeLRUStrategy(nodes, node_trail_length=node_trail_length), f"{cooplru_out_dir}/node_{node_trail_length}")

setups = [ create_lru_setup, create_neighbouring_lru_setup ] + [ create_cooplru_setup(trail_length) for trail_length in trail_lengths ]

def run_strategy_experiment(trace, strategy_setup: Callable[[dict[str, dict[str, int]]], CacheStrategy], marker: str = ""):
    nodes = setup_nodes(len(node_map_14), 1024 * 1024 * 1024)
    strategy, strat_out_dir = strategy_setup(nodes)    
    stats_writers = setup_stats_file_writers(nodes, strat_out_dir, marker=f"n{len(nodes)}-{marker}")
    StrategyRunner(strategy, trace, read_resource_map(resource_file), stats_writers=stats_writers).perform()

If the traces do not exist, pre-generate them and save them to file so they are available to each thread later.

In [None]:
print("Pre-generating Traces")
for seed in trace_seeds:
    trace_075 = generate_zipf_trace(seed=seed, zipf_exponent=0.75)
    generate_page_map_trace(seed=seed)
    trace_130 = generate_zipf_trace(seed=seed, zipf_exponent=1.30)
print("All traces generated")

In [None]:
load_075_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=0.75), '075')
load_130_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=1.30), '130')
load_page_map_trace = lambda seed: (generate_page_map_trace(seed=seed), 'page-map')

trace_options = [ load_075_trace, load_130_trace, load_page_map_trace ]

def run_experiment(trace_seed: str, trace_loader ):
    trace, trace_marker = trace_loader(trace_seed)
    print(trace_seed, trace_marker)
    for setup in setups:
        run_strategy_experiment(trace, setup, marker=f"{trace_marker}-{trace_seed}")
    print(trace_seed, trace_marker, 'DONE')

Run the different experiments, you should set the pool size to the amount of experiments you can suppport on your machine.  In many cases this is limited by memory (instead of core count) due to the large size of the traces.  

In [None]:
# Make use of multiprocess (over multiprocessing) if an "AttributeError" says it couldn't find `run_experiment`.
from multiprocessing import Pool

if __name__ == '__main__':
    options = [ (seed, trace)
                for seed in trace_seeds 
                for trace in trace_options ]
    print(f"Executing {len(options)} experiments...")
    with Pool(4) as p:
        p.starmap(run_experiment, options, chunksize=1)

## Plots

In [None]:
from experiments.utils import load_runs_in_dir

lru_runs = load_runs_in_dir(lru_out_dir)
neighbouring_lru_runs = load_runs_in_dir(neighbouring_lru_out_dir)
cooplru_runs = [ load_runs_in_dir(f"{cooplru_out_dir}/node_{trail_length}") for trail_length in trail_lengths ]

In [None]:
from experiments.utils import calc_ratio, calc_variance

run_names = {
    'ZipF-0.75': '-075-',
    'Page-Map': '-page-map-',
    'ZipF-1.30': '-130-'
}

def filter_runs_by(runs, match: str):
    return [ r for r in runs if match in str(r["source"]) ]

def calc_over_setups(runs, strategy: str, calculation) -> list[float]:
    filtered_runs = filter_runs_by(runs, strategy)
    return [ calculation(run) for run in filtered_runs ]

calc_average_hit_ratio = lambda run: calc_ratio(run['hits_total'][-1], run['misses_total'][-1])
calc_average_byte_ratio = lambda run: calc_ratio(run['cache_bytes_total'][-1], run['origin_bytes_total'][-1])
calc_average_neighbour_ratio = lambda run: calc_ratio(run['requests_to_neighbours_success'][-1], run['requests_to_neighbours'][-1])
calc_average_neighbour_to_total = lambda run: run['requests_to_neighbours'][-1] / (run['hits_total'][-1] + run['misses_total'][-1])
calc_average_neighbour_bytes = lambda run: run['neighbour_bytes_total'][-1] / (run['cache_bytes_total'][-1] + run['origin_bytes_total'][-1])

In [None]:
from collections import defaultdict
from typing import Tuple
table = defaultdict(list)

def pretty_print_variance(variance: Tuple[float, float]) -> str:
    return f"{round(variance[0],3)}±{round(variance[1], 3)}"

## Average Hit Ratios

In [None]:
for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calc_average_hit_ratio),
        **{ f"Co-Op N={trail_lengths[i]}": calc_over_setups(cooplru_runs[i], generator_identifier, calc_average_hit_ratio)
        for i in range(len(cooplru_runs)) },
        "Neighbours": calc_over_setups(neighbouring_lru_runs, generator_identifier, calc_average_hit_ratio),
    }
    print(f"{name} Average Hit Ratio")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))

## Avarage Bandwidth Savings

In [None]:
calculation = calc_average_byte_ratio

for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calculation),
        **{f"Co-Op N={trail_lengths[i]}": calc_over_setups(cooplru_runs[i], generator_identifier, calculation)
        for i in range(len(cooplru_runs))},
        "Neighbours": calc_over_setups(neighbouring_lru_runs, generator_identifier, calculation),
    }
    print(f"{name} Average Bandwidth Savings")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))

## Average fraction of requests forwarded internally

In [None]:
for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calc_average_neighbour_to_total),
        **{f"Co-Op N={trail_lengths[i]}": calc_over_setups(cooplru_runs[i], generator_identifier, calc_average_neighbour_to_total)
        for i in range(len(cooplru_runs))},
        "Neighbours": calc_over_setups(neighbouring_lru_runs, generator_identifier, calc_average_neighbour_to_total),
    }
    print(f"{name} Fraction of Internal Requests")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))

## Average fraction of bandwidth used internally

In [None]:
for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calc_average_neighbour_bytes),
        **{f"Co-Op N={trail_lengths[i]}": calc_over_setups(cooplru_runs[i], generator_identifier, calc_average_neighbour_bytes)
        for i in range(len(cooplru_runs))},
        "Neighbours": calc_over_setups(neighbouring_lru_runs, generator_identifier, calc_average_neighbour_bytes),
    }
    print(f"{name} Fraction of Internal Bandwidth")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))