# Autoscaling vs fixed provisioning

Author: Sławomir Górawski

This notebook contains code supplementing my Master's thesis, "Exploring cloud application architectures: how architectural choices impact the scale-cost dynamics". It is used to calculate the costs of cloud architectures, depending on various parameters that can be customized.

This notebook corresponds to case study 4.1, "Autoscaling vs fixed provisioning". For explanations of how the calculations work, please refer to the thesis.

---

How to run (in Google Colab):

1. Click "Connect" in the top-right corner. (You may be asked to log in to your Google account, this is ok, the service should be free.)
2. Select "Runtime" > "Run everything". If it doesn't work, run every cell one by one, top to bottom, using the ▶ button.
3. On the bottom, there should be inputs for parameters. Adjust them to your liking and click "Run Interact". This should give you results as a table and a chart. You can change the parameters and click the same button to re-run with the new parameters.

In [None]:
import math

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, Markdown

In [None]:
# Taken from https://cloud.google.com/compute/all-pricing
# All prices in USD.
# Machine: c4-standard-4 (4 vCPUs, 15 GB RAM), region Belgium.
SERVER_PRICE_PER_HOUR = 0.217654
SERVER_PRICE_PER_MONTH = 158.88742

In [None]:
def calculate_fixed_cost(dau: int, reqs_per_user: int, activity_window_h: int, peak_load_factor: float, server_capacity_reqs_per_s: int) -> float:
    assert dau > 0
    assert reqs_per_user > 0
    assert 0 < activity_window_h <= 24
    assert peak_load_factor >= 1
    assert server_capacity_reqs_per_s > 0

    daily_requests = dau * reqs_per_user
    avg_load = daily_requests / (activity_window_h * 3600)
    peak_load = avg_load * peak_load_factor
    num_servers_needed = math.ceil(peak_load / server_capacity_reqs_per_s)
    return num_servers_needed * SERVER_PRICE_PER_MONTH


def calculate_autoscaling_cost(dau: int, reqs_per_user: int, autoscaling_inefficiency_factor: float, server_capacity_reqs_per_s: int) -> float:
    assert dau > 0
    assert reqs_per_user > 0
    assert autoscaling_inefficiency_factor >= 1
    assert server_capacity_reqs_per_s > 0

    daily_requests = dau * reqs_per_user
    server_capacity_reqs_per_h = server_capacity_reqs_per_s * 3600
    server_hours_per_day = daily_requests / server_capacity_reqs_per_h
    return server_hours_per_day * autoscaling_inefficiency_factor * SERVER_PRICE_PER_HOUR * 30  # 30 days per

In [None]:
def calculate(daus_csv: str, reqs_per_user: int, server_capacity_reqs_per_s: int, activity_window_h: int, peak_load_factor: float, autoscaling_inefficiency_factor: float, log_scale: bool):
    index = [int(v.strip()) for v in daus_csv.split(',') if v]

    column_descs = {
        'fixed': 'Total fixed cost [$/mo]',
        'autoscaling': 'Total autoscaling cost [$/mo]',
    }

    # Calculate the results and put them into a DataFrame

    df = pd.DataFrame(columns=list(column_descs.keys()), index=index)

    for dau in index:
        fixed_cost = calculate_fixed_cost(dau, reqs_per_user, activity_window_h, peak_load_factor, server_capacity_reqs_per_s)
        autoscaling_cost = calculate_autoscaling_cost(dau, reqs_per_user, autoscaling_inefficiency_factor, server_capacity_reqs_per_s)

        df.loc[dau] = [fixed_cost, autoscaling_cost]

    display(df.rename_axis('DAU').rename(columns=column_descs))

    # Plot the results

    # Define the width of the bars
    bar_width = 0.2

    # Set the positions of the bars on the x-axis
    index_positions = np.arange(len(df))

    # Create the figure and axes
    plt.figure(figsize=(8,6))

    # Plot the bars for both columns
    plt.bar(index_positions, df['fixed'], bar_width, label=column_descs['fixed'], color='lightgray', edgecolor='black', hatch='/')
    plt.bar(index_positions + bar_width, df['autoscaling'], bar_width, label=column_descs['autoscaling'], color='gray', edgecolor='black', hatch='.')

    # Add labels and title
    plt.xlabel('Daily Active Users')
    plt.ylabel('Total costs [$/mo]' + (' (log scale)' if log_scale else ''))
    plt.title('Autoscaling vs fixed provisioning: monthly costs comparison')

    # Add tick marks for the index
    plt.xticks(index_positions + bar_width / 2, df.index)

    if log_scale:
        # Set the y-axis to logarithmic scale
        plt.yscale('log')

    # Use plain decimal format for the y-axis labels
    ax = plt.gca()  # Get current axis
    ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
    ax.yaxis.get_major_formatter().set_scientific(False)
    ax.ticklabel_format(axis='y', style='plain')  # Ensure plain decimal format

    # Add legend
    plt.legend()

    # Display the chart
    plt.show()


daus_csv_widget = widgets.Text(value='10_000,100_000,1_000_000,10_000_000', description='DAU', placeholder='Add values, comma separated')
reqs_per_user_widget = widgets.BoundedIntText(value=50, min=1, description='Reqs/user')
server_capacity_reqs_per_s_widget = widgets.BoundedIntText(value=100, min=1, description='Server req/s')
activity_window_h_widget = widgets.BoundedIntText(value=10, min=1, max=24, description='Activity w. [h]')
peak_load_factor_widget = widgets.BoundedFloatText(value=2, min=1, description='Peak load f.')
autoscaling_inefficiency_factor_widget = widgets.BoundedFloatText(value=2, min=1, description='Auto in. f.')
chart_log_scale_widget = widgets.Checkbox(value=True, description='Log scale (for the chart)')

display(Markdown('''
## Inputs

Adjust the values below and click "Run Interact" to run (or re-run) the calculation.

Parameters:

* DAU: Daily Active User values to run the calculation for, as a comma-separated list (e.g. `10_000,100_000,1_000_000,10_000_000`).
* Reqs/user: Request per user; an average number of requests that one user makes daily.
* Server req/s: Server capacity, in the number of handled requests per second.
* Activity w. [h]: Users activity window; the number of hours in a day where most of the user activity happens, from 1 to 24.
* Peak load f.: Peak load factor; how many times is the peak load higher than the average load?
* Auto in. f.: Autoscaling inefficiency factor; an arbitrary number to factor in autoscaler delays and half-used servers.

Warning: The inputs may be locale-dependent, so you can try with a comma if a dot doesn't seem to work (`0,9` instead of `0.9`).
'''))

widgets.interact_manual(
    calculate,
    daus_csv=daus_csv_widget,
    reqs_per_user=reqs_per_user_widget,
    server_capacity_reqs_per_s=server_capacity_reqs_per_s_widget,
    activity_window_h=activity_window_h_widget,
    peak_load_factor=peak_load_factor_widget,
    autoscaling_inefficiency_factor=autoscaling_inefficiency_factor_widget,
    log_scale=chart_log_scale_widget,
);


## Inputs

Adjust the values below and click "Run Interact" to run (or re-run) the calculation.

Parameters:

* DAU: Daily Active User values to run the calculation for, as a comma-separated list (e.g. `10_000,100_000,1_000_000,10_000_000`).
* Reqs/user: Request per user; an average number of requests that one user makes daily.
* Server req/s: Server capacity, in the number of handled requests per second.
* Activity w. [h]: Users activity window; the number of hours in a day where most of the user activity happens, from 1 to 24.
* Peak load f.: Peak load factor; how many times is the peak load higher than the average load?
* Auto in. f.: Autoscaling inefficiency factor; an arbitrary number to factor in autoscaler delays and half-used servers.

Warning: The inputs may be locale-dependent, so you can try with a comma if a dot doesn't seem to work (`0,9` instead of `0.9`).


interactive(children=(Text(value='10_000,100_000,1_000_000,10_000_000', description='DAU', placeholder='Add va…