In [1]:
# Import Libraries
import math
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
# Seaborn Styling
sns.set()
sns.set_context("poster", font_scale = 1.25)
sns.set_style("ticks")

In [6]:
# Experiment constants
modes = ['once-per-minute', 'once-per-five-minutes', 'once-per-hour']
platforms = {
            #  "pypy": ['bfs', 'compress', 'dfs', 'mst', 'dynamic-html', 'pagerank'],
             "pypy": ['bfs','dfs', 'mst', 'dynamic-html', 'pagerank', 'compress', 'upload', 'thumbnail', 'video'],
             "jvm": ['matrix-multiplication', 'word-count', 'simple-hash', 'html-rendering']
            }
strategies = ['cold', 'fixed&request_to_checkpoint=1', 'request_centric&max_capacity=12' ]
eviction_rates = [1, 4, 20]
mutabilities = [1]
df_columns = ['request_number', 'benchmark', 'mutability', 'strategy', 'rate', 'client', 'server', 'overhead']

In [7]:
def convergence(platform: str, benchmark: str, eviction_rate: np.int64):
    df = None
    # Select the appropriate file for the given eviction rate.
    if platform == "pypy":
        df = pd.read_csv(f'../data/python-evaluation.csv', names=df_columns)
    else:
        df = pd.read_csv(f'../data/java-evaluation.csv', names=df_columns)
        
    # Extract data only for the request centric strategy
    df = df[df['strategy'] == 'request_centric&max_capacity=12']
    df = df[df['rate'] == eviction_rate]
        
    # Extract the data for the provided benchmark and mutability.
    df = df[(df['benchmark'] == benchmark)]
    
    # Extract server-side latency from the data frame.
    latencies = df['client'].to_numpy()
    
    # Calculate the target latency
    target = np.median(df[df['request_number'] >= (0.8 * 500)]['client'].to_numpy())
    
    # Acceptable deviation from target: target ± error
    # Chosen Error Value: 2%
    target_l = target * (0.98)
    target_h = target * (1.02)
    
    # Slide a window over latencies of size N to find window closest to target
    window_size = 20
    
    if platform == "pypy":
        for index in range(0, len(latencies)):
            if index < 100:
                continue
            window = latencies[index : window_size + index]
            window_median = np.median(window)
            if target_l <= window_median <= target_h:
                return index
    
    else:
        for index in range(0, len(latencies)):
            if index < 200:
                continue
            window = latencies[index : window_size + index]
            window_median = np.median(window)
            if target_l <= window_median <= target_h:
                return index

In [8]:
table = {}
for platform in platforms:
    table[platform] = {}
    for benchmark in platforms[platform]:
        table[platform][benchmark] = {}
        for mutability in mutabilities:
            table[platform][benchmark][mutability] = {}
            for eviction_rate in eviction_rates:
                table[platform][benchmark][mutability][eviction_rate] = convergence(platform, benchmark, eviction_rate)
                
for platform in platforms:
    for benchmark in platforms[platform]:
        for mutability in mutabilities:
            for eviction_rate in eviction_rates:
                if eviction_rate == 4:
                    print(f"{benchmark}: {table[platform][benchmark][mutability][eviction_rate]}")

bfs: 113
dfs: 113
mst: 135
dynamic-html: 210
pagerank: 126
compress: 100
upload: 144
thumbnail: 100
video: 165
matrix-multiplication: 202
word-count: 213
simple-hash: 201
html-rendering: 203


# Performance Numbers (CDFs)

In [9]:
import pandas as pd

In [10]:
function_titles = {
"bfs": 'BFS',
"dfs": 'DFS',
"dynamic-html": 'DynamicHTML',
"mst": 'MST',
"pagerank": 'PageRank',
"compress": 'Compression',
"upload": 'Uploader',
"thumbnail": 'Thumbnailer',
"video":'Video',
"matrix-multiplication": 'MatrixMult',
"simple-hash": 'Hash',
"html-rendering": 'HTML Rendering',
"word-count": 'WordCount',
}
platforms = ["python", "java"]
eviction_rates = [1, 4, 20]
strategies = ['cold', 'fixed&request_to_checkpoint=1', 'request_centric&max_capacity=12' ]

## Orchestration Strategy

In [16]:
python_df = pd.read_csv("../data/python-evaluation.csv", names=df_columns)
java_df = pd.read_csv("../data/java-evaluation.csv", names=df_columns)
df = pd.concat([python_df, java_df])

df = df[df['rate'] == 1]

In [17]:
df_grouped = df.groupby(["benchmark", "strategy"]).median()["client"].reset_index()
df_pivot = df_grouped.pivot(index='benchmark', columns='strategy', values='client')

# Calculating improvement
df_pivot['improvement'] = (df_pivot['fixed&request_to_checkpoint=1'] - df_pivot['request_centric&max_capacity=12']) / df_pivot['fixed&request_to_checkpoint=1'] * 100

# Filter out rows where improvement is less than or equal to 0
df_pivot_positive = df_pivot[df_pivot['improvement'] > 5]

# Find the benchmark with the minimum positive improvement
min_positive_improvement_benchmark = df_pivot_positive['improvement'].idxmin()

print(f"The benchmark with the minimum positive improvement is: {min_positive_improvement_benchmark}")

max_improvement_benchmark = df_pivot['improvement'].idxmax()

print(f"The benchmark with the maximum improvement is: {max_improvement_benchmark}")

# print improvement of each benchmark
print(df_pivot['improvement'])

The benchmark with the minimum positive improvement is: compress
The benchmark with the maximum improvement is: simple-hash
benchmark
bfs                      24.957781
compress                  7.525384
dfs                      48.870966
dynamic-html              4.443416
html-rendering           55.755759
matrix-multiplication    13.745837
mst                      45.654389
pagerank                 14.634416
simple-hash              60.221936
thumbnail               -19.721264
upload                  -23.354426
video                    -2.645932
word-count               50.232988
Name: improvement, dtype: float64


In [19]:
# Geometric Mean

from scipy.stats import gmean

# List of specific benchmarks
benchmarks_list = ["bfs", "dfs", "dynamic-html", "mst", "pagerank", "compress", "thumbnail", "upload", "video", "matrix-multiplication", "simple-hash", "word-count", "html-rendering"]  # Replace with actual benchmarks
print(f"Number of benchmarks: {len(benchmarks_list)}")

# Filter df for these benchmarks
df_filtered = df[df["benchmark"].isin(benchmarks_list)]

# Calculate the median overhead for each benchmark and strategy
df_grouped = df_filtered.groupby(["benchmark", "strategy"]).median()["client"].reset_index()

# Pivot the table to have each strategy as a separate column
df_pivot = df_grouped.pivot(index='benchmark', columns='strategy', values='client')

# Calculate the improvement
df_pivot['improvement'] = (df_pivot['fixed&request_to_checkpoint=1'] - df_pivot['request_centric&max_capacity=12']) / df_pivot['fixed&request_to_checkpoint=1'] * 100

# Define thresholds for different categories
threshold_positive = 5.0  # 5% improvement threshold
threshold_negative = -5.0  # -5% improvement threshold

improved_benchmarks = df_pivot[df_pivot['improvement'] > threshold_positive].index
on_par_benchmarks = df_pivot[(df_pivot['improvement'] >= threshold_negative) & (df_pivot['improvement'] <= threshold_positive)].index
worsened_benchmarks = df_pivot[df_pivot['improvement'] < threshold_negative].index

print(f"Benchmarks (and corresponding rates) that improved: {improved_benchmarks.tolist()}")
print(f"Number of benchmarks that improved: {len(improved_benchmarks)}")
print(f"Benchmarks (and corresponding rates) that are on par: {on_par_benchmarks.tolist()}")
print(f"Number of benchmarks that are on par: {len(on_par_benchmarks)}")
print(f"Benchmarks (and corresponding rates) that worsened: {worsened_benchmarks.tolist()}")
print(f"Number of benchmarks that worsened: {len(worsened_benchmarks)}")

# Filter out rows where improvement is less than or equal to threshold_negative
df_pivot_positive = df_pivot[df_pivot['improvement'] > threshold_positive]

# Calculate geometric mean of the median improvements
geo_mean_improvement = gmean(df_pivot_positive['improvement'])

print(f"The geometric mean of the median percentage improvements for the benchmarks with positive improvements is: {geo_mean_improvement:.2f}%")


Number of benchmarks: 13
Benchmarks (and corresponding rates) that improved: ['bfs', 'compress', 'dfs', 'html-rendering', 'matrix-multiplication', 'mst', 'pagerank', 'simple-hash', 'word-count']
Number of benchmarks that improved: 9
Benchmarks (and corresponding rates) that are on par: ['dynamic-html', 'video']
Number of benchmarks that are on par: 2
Benchmarks (and corresponding rates) that worsened: ['thumbnail', 'upload']
Number of benchmarks that worsened: 2
The geometric mean of the median percentage improvements for the benchmarks with positive improvements is: 28.94%


## Request Rates

In [20]:
python_df = pd.read_csv("../data/python-evaluation.csv", names=df_columns)
java_df = pd.read_csv("../data/java-evaluation.csv", names=df_columns)
df = pd.concat([python_df, java_df])

In [21]:
from scipy.stats import gmean

# Calculate the median client times for each benchmark, strategy, and rate
df_grouped = df.groupby(["benchmark", "strategy", "rate"]).median()["client"].reset_index()

# Pivot the table to have each strategy as a separate column, while maintaining benchmark and rate in the index
df_pivot = df_grouped.pivot(index=['benchmark', 'rate'], columns='strategy', values='client')

# Calculate the improvement
df_pivot['improvement'] = (df_pivot['fixed&request_to_checkpoint=1'] - df_pivot['request_centric&max_capacity=12']) / df_pivot['fixed&request_to_checkpoint=1'] * 100

# Filter out rows where improvement is less than or equal to 5%
df_pivot_positive = df_pivot[df_pivot['improvement'] > 5].reset_index()

# Calculate geometric mean of the median improvements for each rate
geo_mean_improvement_per_rate = df_pivot_positive.groupby("rate")['improvement'].apply(gmean)

print(f"The geometric mean of the median improvements per rate is:\n{geo_mean_improvement_per_rate}")

The geometric mean of the median improvements per rate is:
rate
1     28.935272
4     21.320460
20    16.622072
Name: improvement, dtype: float64


In [22]:
from scipy.stats import gmean

# Calculate the median client times for each benchmark, strategy, and rate
df_grouped = df.groupby(["benchmark", "strategy", "rate"]).median()["client"].reset_index()

# Pivot the table to have each strategy as a separate column, while maintaining benchmark and rate in the index
df_pivot = df_grouped.pivot(index=['benchmark', 'rate'], columns='strategy', values='client')

# Calculate the percentage improvement
df_pivot['improvement'] = (df_pivot['fixed&request_to_checkpoint=1'] - df_pivot['request_centric&max_capacity=12']) / df_pivot['fixed&request_to_checkpoint=1'] * 100

# Define thresholds for different categories
threshold_positive = 5.0  # 5% improvement threshold
threshold_negative = -5.0  # -5% improvement threshold

# Find benchmarks that improve, worsen, and are on par
improved_benchmarks = df_pivot[df_pivot['improvement'] > threshold_positive].index
on_par_benchmarks = df_pivot[(df_pivot['improvement'] >= threshold_negative) & (df_pivot['improvement'] <= threshold_positive)].index
worsened_benchmarks = df_pivot[df_pivot['improvement'] < threshold_negative].index

print(f"Benchmarks (and corresponding rates) that improved: {improved_benchmarks.tolist()}")
print(f"Number of benchmarks that improved: {len(improved_benchmarks)}")
print(f"Benchmarks (and corresponding rates) that are on par: {on_par_benchmarks.tolist()}")
print(f"Number of benchmarks that are on par: {len(on_par_benchmarks)}")
print(f"Benchmarks (and corresponding rates) that worsened: {worsened_benchmarks.tolist()}")
print(f"Number of benchmarks that worsened: {len(worsened_benchmarks)}")

# Filter out rows where improvement is less than or equal to threshold_negative
df_pivot_positive = df_pivot[df_pivot['improvement'] > threshold_positive]

# Calculate geometric mean of the median improvements
geo_mean_improvement = gmean(df_pivot_positive['improvement'])

print(f"The geometric mean of the median percentage improvements for the benchmarks with positive improvements is: {geo_mean_improvement:.2f}%")

Benchmarks (and corresponding rates) that improved: [('bfs', 1), ('bfs', 4), ('bfs', 20), ('compress', 1), ('compress', 4), ('dfs', 1), ('dfs', 4), ('dfs', 20), ('dynamic-html', 4), ('dynamic-html', 20), ('html-rendering', 1), ('html-rendering', 4), ('html-rendering', 20), ('matrix-multiplication', 1), ('matrix-multiplication', 4), ('matrix-multiplication', 20), ('mst', 1), ('mst', 4), ('mst', 20), ('pagerank', 1), ('pagerank', 4), ('pagerank', 20), ('simple-hash', 1), ('simple-hash', 4), ('simple-hash', 20), ('thumbnail', 20), ('upload', 20), ('word-count', 1), ('word-count', 4), ('word-count', 20)]
Number of benchmarks that improved: 30
Benchmarks (and corresponding rates) that are on par: [('compress', 20), ('dynamic-html', 1), ('thumbnail', 4), ('video', 1), ('video', 20)]
Number of benchmarks that are on par: 5
Benchmarks (and corresponding rates) that worsened: [('thumbnail', 1), ('upload', 1), ('upload', 4), ('video', 4)]
Number of benchmarks that worsened: 4
The geometric mean 