# Content Delivery Network

Author: Sławomir Górawski

This notebook contains code supplementing my Master's thesis, "Exploring cloud application architectures: how architectural choices impact the scale-cost dynamics". It is used to calculate the costs of cloud architectures, depending on various parameters that can be customized.

This notebook corresponds to case study 4.3, "Content Delivery Network". For explanations of how the calculations work, please refer to the thesis.

---

How to run (in Google Colab):

1. Click "Connect" > "Connect to a hosted runtime" in the top-right corner. (You may be asked to log in to your Google account, this is ok, the service should be free.)
2. Select "Runtime" > "Run everything". If it doesn't work, run every cell one by one, top to bottom, using the ▶ button.
3. On the bottom, there should be inputs for parameters. Adjust them to your liking and click "Run Interact". This should give you results as a table and a chart. You can change the parameters and click the same button to re-run with the new parameters.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, Markdown

In [2]:
# Origin
# Taken from https://cloud.google.com/vpc/network-pricing
# Default tier; region: Europe, Warsaw; transfer within Europe only
# All in USD/GB, monthly
ORIGIN_BANDWIDTH_COST_BELOW_1_TB = 0.12
ORIGIN_BANDWIDTH_COST_FROM_1_TB_TO_10_TB = 0.11
ORIGIN_BANDWIDTH_COST_ABOVE_10_TB = 0.08

# CDN
# Taken from https://cloud.google.com/cdn/pricing
# Region: Europe
# The following are all in USD/GB, monthly
CDN_BANDWIDTH_COST_BELOW_10_TB = 0.08
CDN_BANDWIDTH_COST_FROM_10_TO_150_TB = 0.055
CDN_BANDWIDTH_COST_ABOVE_150_TB = 0.03
CDN_CACHE_FILL_COST = 0.01

# This one is in USD per 10 000 requests
CDN_REQUEST_COST_PER_10K = 0.0075

In [5]:
def calculate_origin_cost(bandwidth_gb: int) -> float:
    assert bandwidth_gb >= 0

    below_1_tb = min(bandwidth_gb, 1000)
    bandwidth_gb -= below_1_tb
    from_1_tb_to_10_tb = min(bandwidth_gb, 10_000 - 1000)
    bandwidth_gb -= from_1_tb_to_10_tb
    above_10_tb = bandwidth_gb

    return (
        below_1_tb * ORIGIN_BANDWIDTH_COST_BELOW_1_TB
        + from_1_tb_to_10_tb * ORIGIN_BANDWIDTH_COST_FROM_1_TB_TO_10_TB
        + above_10_tb * ORIGIN_BANDWIDTH_COST_ABOVE_10_TB
    )


def calculate_cdn_bandwidth_cost(bandwidth_gb: int) -> float:
    assert bandwidth_gb >= 0

    below_10_tb = min(bandwidth_gb, 10_000)
    bandwidth_gb -= below_10_tb
    from_10_tb_to_150_tb = min(bandwidth_gb, 150_000 - 10_000)
    bandwidth_gb -= from_10_tb_to_150_tb
    above_150_tb = bandwidth_gb

    return (
        below_10_tb * CDN_BANDWIDTH_COST_BELOW_10_TB
        + from_10_tb_to_150_tb * CDN_BANDWIDTH_COST_FROM_10_TO_150_TB
        + above_150_tb * CDN_BANDWIDTH_COST_ABOVE_150_TB
    )


def calculate_cdn_cost(bandwidth_gb: int, cache_hit_ratio: float, avg_request_size_gb: float) -> float:
    assert bandwidth_gb >= 0
    assert 0 <= cache_hit_ratio <= 1
    assert avg_request_size_gb > 0

    # Bandwidth costs
    cdn_bandwitdh_costs = calculate_cdn_bandwidth_cost(bandwidth_gb)

    # Cache fill costs
    cache_fill_gb = (1 - cache_hit_ratio) * bandwidth_gb
    cdn_cache_fill_costs = cache_fill_gb * CDN_CACHE_FILL_COST

    # Request costs
    num_requests = bandwidth_gb / avg_request_size_gb
    cdn_request_costs = (num_requests / 10_000) * CDN_REQUEST_COST_PER_10K

    return cdn_bandwitdh_costs + cdn_cache_fill_costs + cdn_request_costs

In [7]:
def calculate(bandwidths_tb_csv: str, cdn_cache_hit_ratio: float, avg_request_size_gb: float, log_scale: bool):
    index = [int(v.strip()) for v in bandwidths_tb_csv.split(',') if v]

    column_descs = {
        'origin': 'Total origin cost [$/mo]',
        'cdn': 'Total CDN cost [$/mo]',
    }

    # Calculate the results and put them into a DataFrame

    df = pd.DataFrame(columns=list(column_descs.keys()), index=index)

    for bandwidth_tb in index:
        bandwidth_gb = bandwidth_tb * 1000
        origin_cost = calculate_origin_cost(bandwidth_gb)
        cdn_cost = calculate_cdn_cost(bandwidth_gb, cdn_cache_hit_ratio, avg_request_size_gb)

        df.loc[bandwidth_tb] = [origin_cost, cdn_cost]

    display(df.rename_axis('Bandwidth [TB]').rename(columns=column_descs))

    # Plot the results

    # Define the width of the bars
    bar_width = 0.2

    # Set the positions of the bars on the x-axis
    index_positions = np.arange(len(df))

    # Create the figure and axes
    plt.figure(figsize=(8,6))

    # Plot the bars for both columns
    plt.bar(index_positions, df['origin'], bar_width, label=column_descs['origin'], color='lightgray', edgecolor='black', hatch='/')
    plt.bar(index_positions + bar_width, df['cdn'], bar_width, label=column_descs['cdn'], color='gray', edgecolor='black', hatch='.')

    # Add labels and title
    plt.xlabel('Bandwidth [TB]')
    plt.ylabel('Total costs [$/mo]' + (' (log scale)' if log_scale else ''))
    plt.title('Origin vs CDN: monthly costs comparison')

    # Add tick marks for the index
    plt.xticks(index_positions + bar_width / 2, df.index)

    if log_scale:
        # Set the y-axis to logarithmic scale
        plt.yscale('log')

    # Use plain decimal format for the y-axis labels
    ax = plt.gca()  # Get current axis
    ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
    ax.yaxis.get_major_formatter().set_scientific(False)
    ax.ticklabel_format(axis='y', style='plain')  # Ensure plain decimal format

    # Add legend
    plt.legend()

    # Display the chart
    plt.show()


bandwidths_tb_csv_widget = widgets.Text(value='1,10,100,1000', description='Bandwidths', placeholder='Add values, comma separated')
cdn_cache_hit_ratio_widget = widgets.BoundedFloatText(value=0.9, min=0, max=1, description='CDN hit ratio')
avg_request_size_gb_widget = widgets.BoundedFloatText(value=0.1, min=0, description='Avg req [GB]')
chart_log_scale_widget = widgets.Checkbox(value=True, description='Log scale (for the chart)')

display(Markdown('''
## Inputs

Adjust the values below and click "Run Interact" to run (or re-run) the calculation.

Parameters:

* Bandwidths: Bandwidth values to run the calculation for, in TB, as a comma-separated list (e.g. `1,10,100,1000`).
* CDN hit ratio: How often is the requested asset already cached? (E.g. `0.9` for 90%.)
* Avg req [GB]: Average requested asset size, in GB.

Warning: The inputs may be locale-dependent, so you can try with a comma if a dot doesn't seem to work (`0,9` instead of `0.9`).
'''))

widgets.interact_manual(
    calculate,
    bandwidths_tb_csv=bandwidths_tb_csv_widget,
    cdn_cache_hit_ratio=cdn_cache_hit_ratio_widget,
    avg_request_size_gb=avg_request_size_gb_widget,
    log_scale=chart_log_scale_widget,
);


## Inputs

Adjust the values below and click "Run Interact" to run (or re-run) the calculation.

Parameters:

* Bandwidths: Bandwidth values to run the calculation for, in TB, as a comma-separated list (e.g. `1,10,100,1000`).
* CDN hit ratio: How often is the requested asset already cached? (E.g. `0.9` for 90%.)
* Avg req [GB]: Average requested asset size, in GB.

Warning: The inputs may be locale-dependent, so you can try with a comma if a dot doesn't seem to work (`0,9` instead of `0.9`).


interactive(children=(Text(value='1,10,100,1000', description='Bandwidths', placeholder='Add values, comma sep…