In [1]:
import datetime
import os
import matplotlib
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from notebook_shared import utils

from statsmodels.tsa.seasonal import seasonal_decompose
from pandas import Series

import seaborn as sns
sns.set()
matplotlib.style.use('seaborn-colorblind')

INPUT_FILE = "dataset"
FSIZE = "full"

df = pd.read_parquet(utils.get_dataset_path(INPUT_FILE, FSIZE))

In [2]:
runtime_cov_df = df.groupby( ['provider'])['runtime'].apply(utils.cov).dropna().reset_index(name='Runtime CV').set_index(['provider'])
resp_df = df.groupby( ['provider', 'driver_invocation']).size().reset_index(name='counts')

In [None]:
sns.set(font_scale=1.4)

# Plot Coefficient Of Variation Per Cloud Provider:
fig, axes = plt.subplots(figsize=(16,6), ncols=4, nrows=1, frameon=False, sharex=True)

runtime_bx_ax = axes[0]
runtime_cov_ax = axes[1]
roundtrip_bx_ax = axes[2]
respcount_bx_ax = axes[3]

# Runtime COV
runtime_cov_df.plot(kind='bar', ax=runtime_cov_ax)
runtime_cov_ax.set_xlabel('')
runtime_cov_ax.set_ylabel('Runtime CV')
runtime_cov_ax.get_legend().remove()
plt.setp(runtime_cov_ax.get_xticklabels(), rotation=0)

# Boxplot Runtime
df_boxplot = df[['provider', 'runtime']].sort_values(by=['provider']).groupby(['provider'], observed=True)
utils.boxplot(runtime_bx_ax, df_boxplot, 'Runtime', utils.tick_get_1st)

df_boxplot = df[['provider', 'roundTripTime']].sort_values(by=['provider']).groupby(['provider'], observed=True)
utils.boxplot(roundtrip_bx_ax, df_boxplot, 'Round Trip Time', utils.tick_get_1st)

df_boxplot = resp_df[['provider', 'counts']].sort_values(by=['provider']).groupby(['provider'], observed=True)
utils.boxplot(respcount_bx_ax, df_boxplot, 'Response Count', utils.tick_get_1st)

fig.tight_layout()

data = df[['provider', 'runtime', 'roundTripTime']].groupby(['provider'], observed=True).describe()
utils.plot(['provider_analysis', 'pa_provider_mean_cov'], 
            data=data,
            respdata=resp_df.groupby(['provider']).describe(),
            runtime_cv=runtime_cov_df.groupby(['provider']).describe()
          )

In [None]:
rtt_cov_df = df.groupby( ['provider'], observed=True)['roundTripTime'].apply(utils.cov).dropna().reset_index(name='Round Trip Time CV').set_index(['provider'])
rtt_cov_df

In [None]:
rtt_cov_df.groupby('provider')
fig, axes = plt.subplots(figsize=(5,5))
    
bxdf = rtt_cov_df.reset_index().groupby(['provider'])
rtt_cov_df.plot.bar(ax=axes)

utils.savefig(fig, ['provider_analysis', f'rtt_cv_boxplot'], data=rtt_cov_df)