In [11]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import norm
import seaborn as sns
import os

sns.set_theme()

In [12]:
csv_headers = [
    "new_order", "payment", 
    "order_status", "delivery", 
    "stock_level", "total_time"
]
avg_transactions = [47, 8, 13, 261, 3, 332]
dataframe_headers = ["framework", "warehouse", "cpu", "ram"]
dataframe_headers.extend(csv_headers)
path = './data/'

In [13]:
def change_lang_to_framework(lang):
    if lang == 'python':
        return 'Django'
    elif lang == 'js':
        return 'ExpressJS'
    elif lang == 'rust':
        return 'Actix-Web'
    else:
        return ''

def build_dataframe_by_lang():
    rows = []
    for fname in os.listdir(path):
        file_df = pd.read_csv(os.path.join(path, fname))
        _, warehouse, cpu, ram, tail = fname.split('-')
        lang, _ = tail.split('.')
        lang = change_lang_to_framework(lang)
        for idx, df_row in file_df.iterrows():
            row = []
            row.append(lang)
            row.append(int(warehouse[1:]))
            row.append(int(cpu[1:]))
            row.append(int(ram[1:]))
            for col in csv_headers:
                row.append(df_row[col])
            rows.append(row)
    return pd.DataFrame(rows, columns=dataframe_headers)


def default_setup(dataset):
    return dataset.loc[(dataset['cpu'] == 4) & (dataset['ram'] == 8)]


def difference_cpu(dataset):
    return dataset.loc[(dataset['warehouse'] == 10) & (dataset['ram'] == 8)]


def difference_ram(dataset):
    return dataset.loc[(dataset['warehouse'] == 10) & (dataset['cpu'] == 4)]

In [14]:
df = build_dataframe_by_lang()
python_df = df.loc[df['framework'] == 'Django']
js_df = df.loc[df['framework'] == 'ExpressJS']
rust_df = df.loc[df['framework'] == 'Actix-Web']

datas = [(python_df, 'Django'), (js_df, 'ExpressJS'), (rust_df, 'Actix-Web')]
fscale = 2

In [15]:
def describe_data(get_data):
    for dataset, lang in datas:
        data = get_data(dataset)
        display(data.loc[:, csv_headers].describe().style.set_table_attributes(
            "style='display:inline'").set_caption(lang))


def plot_latency(xs, get_data):
    for dataset, lang in datas:
        data = get_data(dataset)
        for transaction in csv_headers:
            plot_data = data.loc[:, [xs, transaction]]
            sns.set(rc={"figure.figsize": (15, 10)}, font_scale=fscale)
            sns.boxplot(x=xs, y=transaction, data=plot_data).set(
                title=f'{lang} {transaction} latency',
                ylabel='Response Time (seconds)')
            plt.show()


def plot_throughput(xs, get_data):
    for dataset, lang in datas:
        data = get_data(dataset)
        for idx, transaction in enumerate(csv_headers):
            transaction_amount = avg_transactions[idx]
            plot_data = data.loc[:, [xs, transaction]]
            plot_data[transaction] = (transaction_amount /
                                      plot_data[transaction])
            sns.set(rc={"figure.figsize": (15, 10)}, font_scale=fscale)
            sns.boxplot(x=xs, y=transaction, data=plot_data).set(
                title=f'{lang} {transaction} throughput',
                ylabel='Throughput (transaction per seconds)')
            plt.show()


def plot_framework_latency(xs, get_data):
    data = get_data(df)
    for transaction in csv_headers:
        plot_data = data.loc[:, ['framework', xs, transaction]]
        sns.set(rc={"figure.figsize": (15, 10)}, font_scale=fscale)
        sns.boxplot(x=xs, y=transaction, hue='framework',
                    data=plot_data).set(title=f'{transaction} latency',
                                        ylabel='Response Time (seconds)')
        plt.show()
        
def plot_framework_latency_pair_js_rust(xs, get_data):
    data = get_data(df)
    data = data.loc[data['framework'] != 'Django']
    for transaction in csv_headers:
        plot_data = data.loc[:, ['framework', xs, transaction]]
        sns.set(rc={"figure.figsize": (15, 10)}, font_scale=fscale)
        sns.boxplot(x=xs, y=transaction, hue='framework',
                    data=plot_data).set(title=f'{transaction} latency',
                                        ylabel='Response Time (seconds)')
        plt.show()


def plot_framework_throughput(xs, get_data):
    data = get_data(df)
    for idx, transaction in enumerate(csv_headers):
        transaction_amount = avg_transactions[idx]
        plot_data = data.loc[:, ['framework', xs, transaction]]
        plot_data[transaction] = (transaction_amount / plot_data[transaction])
        sns.set(rc={"figure.figsize": (15, 10)}, font_scale=fscale)
        sns.boxplot(x=xs, y=transaction, hue='framework', data=plot_data).set(
            title=f'{transaction} throughput',
            ylabel='Throughput (transaction per seconds)')
        plt.show()
        
def plot_framework_throughput_pair_js_rust(xs, get_data):
    data = get_data(df)
    data = data.loc[data['framework'] != 'Django']
    for idx, transaction in enumerate(csv_headers):
        transaction_amount = avg_transactions[idx]
        plot_data = data.loc[:, ['framework', xs, transaction]]
        plot_data[transaction] = (transaction_amount / plot_data[transaction])
        sns.set(rc={"figure.figsize": (15, 10)}, font_scale=fscale)
        sns.boxplot(x=xs, y=transaction, hue='framework', data=plot_data).set(
            title=f'{transaction} throughput',
            ylabel='Throughput (transaction per seconds)')
        plt.show()