In [None]:
# Personally I had to add the root folder of the repo to the sys.path.  If certain imports do not work you should uncomment and set the following.
# import sys
# sys.path.append('/root/of/repo/folder/')

# The workings of the Profiles Strategy

In this small scale experiment we try to expose the effect of a different `profile size`.  We compare it against a baseline LRU strategy. We will be experimenting with a `profile size` of `1`, `2`, and `3`.

In [None]:
from .utils import make_dir, read_node_map

resource_file = "../dataset/out/dataset-resources-stats.csv"
pagemap_file = "../dataset/out/page-map-clean.csv"

node_map_14 = read_node_map('./node_setups/14nodes.json') 
out_dir = make_dir('./out/experiment-profiles/')

lru_out_dir =  make_dir(f"{out_dir}lru/")
profiles_out_dir = make_dir(f"{out_dir}/profiles/")

In [None]:
no_users = 1000
no_iterations = 5000
no_runs = 10
trace_seeds = [ str(i) for i in range(no_runs) ]
profile_sizes = [ 10, 100, 1000, 10000 ]

## Traces

In [None]:
from .utils import load_or_generate_trace
from simulation.generator.main_zipf import TraceConfig, Simulation
from simulation.generator.main_page_map import UserTraceConfig, UserSimulation

def generate_zipf_trace(seed: str, zipf_exponent: float):
    trace_config = TraceConfig(node_map=node_map_14, seed=seed, no_users=no_users, no_iterations=no_iterations, zipf_exponent=zipf_exponent)
    simulation = Simulation(trace_config, resource_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=simulation)

def generate_page_map_trace(seed: str):
    trace_config = UserTraceConfig(node_map=node_map_14, seed=seed, no_users=no_users, no_iterations=no_iterations)
    user_simulation = UserSimulation(trace_config, pagemap_file)
    return load_or_generate_trace(f"{out_dir}/{trace_config.to_filename()}.trace.gz", simulation=user_simulation)

## Profile Strategy

In [None]:
from .utils import setup_nodes, setup_stats_file_writers, read_resource_map
from simulation.evaluator.strategy.runner import StrategyRunner
from simulation.evaluator.strategy.strategy import CacheStrategy
from simulation.evaluator.strategy.lru import LRUStrategy
from simulation.evaluator.strategy.profiles import ProfilesStrategy
from typing import Callable

for size in profile_sizes:
    # To prevent two threads from trying to do the same, pre-make the dirs.
    make_dir(f"{profiles_out_dir}/size_{size}")

create_lru_setup = lambda nodes: (LRUStrategy(nodes), lru_out_dir)
create_profiles_setup = lambda profile_size: lambda nodes: (ProfilesStrategy(nodes, profile_size=profile_size), f"{profiles_out_dir}/size_{profile_size}")

setups = [ create_lru_setup ] + [ create_profiles_setup(size) for size in profile_sizes ]

def run_strategy_experiment(trace, strategy_setup: Callable[[dict[str, dict[str, int]]], CacheStrategy], marker: str = ""):
    nodes = setup_nodes(len(node_map_14), 1024 * 1024 * 1024)
    strategy, strat_out_dir = strategy_setup(nodes)    
    stats_writers = setup_stats_file_writers(nodes, strat_out_dir, marker=f"n{len(nodes)}-{marker}")
    StrategyRunner(strategy, trace, read_resource_map(resource_file), stats_writers=stats_writers).perform()

In [None]:
print("Pre-generating Traces")
for seed in trace_seeds:
    trace_075 = generate_zipf_trace(seed=seed, zipf_exponent=0.75)
    generate_page_map_trace(seed=seed)
    trace_130 = generate_zipf_trace(seed=seed, zipf_exponent=1.30)
print("All traces generated")

In [None]:
load_075_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=0.75), '075')
load_130_trace = lambda seed: (generate_zipf_trace(seed=seed, zipf_exponent=1.30), '130')
load_page_map_trace = lambda seed: (generate_page_map_trace(seed=seed), 'page-map')

trace_options = [ load_075_trace, load_130_trace, load_page_map_trace ]

def run_experiment(trace_seed: str, trace_loader, setup):
    trace, trace_marker = trace_loader(trace_seed)
    print(trace_seed, trace_marker)
    run_strategy_experiment(trace, setup, marker=f"{trace_marker}-{trace_seed}")
    print(trace_seed, trace_marker, 'DONE')

In [None]:
# Make use of multiprocess (over multiprocessing) if an "AttributeError" says it couldn't find `run_experiment`.
from multiprocessing import Pool

if __name__ == '__main__':
    options = [ (seed, trace, setup)
                for seed in trace_seeds 
                for trace in trace_options
                for setup in setups ]
    print(f"Executing {len(options)} experiments...")
    with Pool(4) as p:
        p.starmap(run_experiment, options, chunksize=1)

## Plots

First we take a quick detour and determine the average size for a single profile entry by taking the average length of our unique identifiers.

In [None]:
from .utils import read_resource_map
from sys import getsizeof

resource_map = read_resource_map(resource_file)
average_resource_uri_len = sum([ len(uri.encode('utf-8')) for uri in resource_map.keys() ]) / len(resource_map)
print(f"Average resource uri byte length: {average_resource_uri_len}")

In [None]:
l = []
print(f"Byte size of empty list: {getsizeof(l)}")
example_resource = list(resource_map.keys())[0]
example_resource_2 = list(resource_map.keys())[1]
l.append(example_resource)
print(f"Byte size of list with {len(example_resource.encode('utf-8'))}: {getsizeof(l)}")
l.append(example_resource_2)
print(f"Byte size of list with {len(example_resource_2.encode('utf-8'))}: {getsizeof(l)}")

In [None]:
from .utils import load_runs_in_dir

lru_runs = simulation.evaluator(lru_out_dir)
profiles_runs = [ simulation.evaluator(f"{profiles_out_dir}/size_{size}/") for size in profile_sizes ]

In [None]:
from .utils import calc_ratio, calc_variance

run_names = {
    'ZipF-0.75': '-075-',
    'Page-Map': '-page-map-',
    'ZipF-1.30': '-130-'
}

def filter_runs_by(runs, match: str):
    return [ r for r in runs if match in str(r["source"]) ]

def calc_over_setups(runs, strategy: str, calculation) -> list[float]:
    filtered_runs = filter_runs_by(runs, strategy)
    return [ calculation(run) for run in filtered_runs ]

calc_average_hit_ratio = lambda run: calc_ratio(run['hits_total'][-1], run['misses_total'][-1])
calc_average_byte_ratio = lambda run: calc_ratio(run['cache_bytes_total'][-1], run['origin_bytes_total'][-1])

In [None]:
from collections import defaultdict
from typing import Tuple
table = defaultdict(list)

def pretty_print_variance(variance: Tuple[float, float]) -> str:
    return f"{round(variance[0],3)}±{round(variance[1], 3)}"

In [None]:
for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calc_average_hit_ratio),
        **{ f"Profiles Size={profile_sizes[i]}": calc_over_setups(profiles_runs[i], generator_identifier, calc_average_hit_ratio)
        for i in range(len(profiles_runs)) },
    }
    print(f"{name} Average Hit Ratio")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))

In [None]:
calculation = calc_average_byte_ratio

for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calculation),
        **{f"Profiles Size={profile_sizes[i]}": calc_over_setups(profiles_runs[i], generator_identifier, calculation)
        for i in range(len(profiles_runs))},
    }
    print(f"{name} Average Bandwidth Savings")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))

In [None]:
calc_average_neighbour_ratio = lambda run: calc_ratio(run['requests_to_neighbours_success'][-1], run['requests_to_neighbours'][-1])
calc_average_neighbour_to_total = lambda run: run['requests_to_neighbours'][-1] / (run['hits_total'][-1] + run['misses_total'][-1])
calc_average_neighbour_bytes = lambda run: run['neighbour_bytes_total'][-1] / (run['cache_bytes_total'][-1] + run['origin_bytes_total'][-1])


In [None]:
for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calc_average_neighbour_to_total),
        **{f"Profiles Size={profile_sizes[i]}": calc_over_setups(profiles_runs[i], generator_identifier, calc_average_neighbour_to_total)
        for i in range(len(profiles_runs))},
    }
    print(f"{name} Fraction of Internal Requests")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))

In [None]:
for name, generator_identifier in run_names.items():
    strategies = {
        "LRU": calc_over_setups(lru_runs, generator_identifier, calc_average_neighbour_bytes),
        **{f"Profiles Size={profile_sizes[i]}": calc_over_setups(profiles_runs[i], generator_identifier, calc_average_neighbour_bytes)
        for i in range(len(profiles_runs))},
    }
    print(f"{name} Fraction of Internal Bandwidth")
    for key, values in strategies.items():
        print(f"\t{key}:\t{pretty_print_variance(calc_variance(values))}")
    print(" & ".join([ pretty_print_variance(calc_variance(values)) for values in strategies.values() ]))