# Storage class choices

Author: Sławomir Górawski

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, Markdown

In [2]:
# Taken from https://cloud.google.com/storage/pricing
# Data for region Warsaw (europe-central2) as of October 2024.
# All prices in USD per GB, monthly.
STANDARD_STORAGE_PRICE = 0.023
COLDLINE_STORAGE_PRICE = 0.006
ARCHIVAL_STORAGE_PRICE = 0.0025

In [8]:
def calculate(total_data_tb: str, coldline_ratio: float, archival_ratio: float, log_scale: bool):
    index = [int(v.strip()) for v in total_data_tb.split(',') if v]

    column_descs = {
        'opt1': 'Standard storage only cost [$/mo]',
        'opt2': 'Standard and coldline storage cost [$/mo]',
        'opt3': 'Standard, coldline and archival storage cost [$/mo]',
    }
    
    # Calculate the results and put them into a DataFrame
    
    df = pd.DataFrame(columns=column_descs.keys(), index=index)
    
    for data_tb in index:
        total_data_gb = data_tb * 1000
        opt1_total_cost = total_data_gb * STANDARD_STORAGE_PRICE
        
        opt2_standard_cost = total_data_gb * (1 - coldline_ratio) * STANDARD_STORAGE_PRICE
        opt2_coldline_cost = total_data_gb * coldline_ratio * COLDLINE_STORAGE_PRICE
        opt2_total_cost = opt2_standard_cost + opt2_coldline_cost
    
        opt3_standard_cost = total_data_gb * (1 - coldline_ratio) * STANDARD_STORAGE_PRICE
        opt3_coldline_cost = total_data_gb * (coldline_ratio - archival_ratio) * COLDLINE_STORAGE_PRICE
        opt3_archival_cost = total_data_gb * archival_ratio * ARCHIVAL_STORAGE_PRICE
        opt3_total_cost = opt3_standard_cost + opt3_coldline_cost + opt3_archival_cost
    
        df.loc[data_tb] = [opt1_total_cost, opt2_total_cost, opt3_total_cost]
    
    display(df.rename_axis('Total data [TB]').rename(columns=column_descs))
    
    # Plot the results
    
    # Define the width of the bars
    bar_width = 0.2
    
    # Set the positions of the bars on the x-axis
    index_positions = np.arange(len(df))
    
    # Create the figure and axes
    plt.figure(figsize=(8,6))
    
    # Plot the bars for both columns
    plt.bar(index_positions, df['opt1'], bar_width, label=column_descs['opt1'], color='lightgray', edgecolor='black', hatch='/')
    plt.bar(index_positions + bar_width, df['opt2'], bar_width, label=column_descs['opt2'], color='gray', edgecolor='black', hatch='.')
    plt.bar(index_positions + 2 * bar_width, df['opt3'], bar_width, label=column_descs['opt3'], color='darkgray', edgecolor='black', hatch='x')
    
    # Add labels and title
    plt.xlabel('Total data [TB]')
    plt.ylabel('Total costs [$/mo]' + (' (log scale)' if log_scale else ''))
    plt.title('Storage classes: monthly costs comparison')
    
    # Add tick marks for the index
    plt.xticks(index_positions + bar_width, df.index)
    
    if log_scale:
        # Set the y-axis to logarithmic scale
        plt.yscale('log')
    
    # Use plain decimal format for the y-axis labels
    ax = plt.gca()  # Get current axis
    ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
    ax.yaxis.get_major_formatter().set_scientific(False)
    ax.ticklabel_format(axis='y', style='plain')  # Ensure plain decimal format
    
    # Add legend
    plt.legend()
    
    # Display the chart
    plt.show()

total_data_tb_widget = widgets.Text(value='10,100,1000', description='Data [TB]', placeholder='Add values, comma separated')
coldline_data_ratio_widget = widgets.FloatText(value=0.9, description='Coldine ratio')
archival_data_ratio_widget = widgets.FloatText(value=0.5, description='Archival ratio')
chart_log_scale_widget = widgets.Checkbox(value=True, description='Log scale (for the chart)')

widgets.interact_manual(
    calculate,
    total_data_tb=total_data_tb_widget,
    coldline_ratio=coldline_data_ratio_widget,
    archival_ratio=archival_data_ratio_widget,
    log_scale=chart_log_scale_widget,
);

interactive(children=(Text(value='10,100,1000', continuous_update=False, description='Data [TB]', placeholder=…