In [1]:
# %%javascript
# IPython.OutputArea.auto_scroll_threshold = 9999;

In [2]:
import datetime
import os
import matplotlib
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import os

from notebook_shared import decomposeutils as dcu
from notebook_shared import utils

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

from pandas import Series

import seaborn as sns
sns.set()
matplotlib.style.use('seaborn-colorblind')

INPUT_FILE = "dataset"
FSIZE = "full" #"tiny", "small" or "full"

df = pd.read_parquet(utils.get_dataset_path(INPUT_FILE, FSIZE))

In [3]:
from IPython.display import display

def output_decompose(provider, region, dt_rounding, decompose_result):
    display(decompose_result['decomposition']['fig'])
    display(decompose_result['seasonal_analysis']['fig'])
    print(decompose_result['stats_test']['case_desc'])
    print(decompose_result['stats_test']['adfuller']['result']['text'])
    print(decompose_result['stats_test']['kpss']['result']['text'])
    display(decompose_result['stats_test']['fig_corr'])
    display(decompose_result['stats_test']['fig_dist'])
    

In [4]:
def save_decompose(label, decompose_result):
    
    provider = decompose_result['parameters']['provider']
    region = decompose_result['parameters']['region']
    decompose_col = decompose_result['parameters']['decompose_col']
    
    if decompose_result['stats_test'] is None:
        utils.savefig(
            decompose_result['decomposition']['fig'],
            ['temporal_analysis', 'decomposition', decompose_col, provider, f'{region}_{label}']
        )
    else:            
        utils.savefig(
            decompose_result['decomposition']['fig'],
            ['temporal_analysis', 'decomposition', decompose_col, provider, f'{region}_{label}'],
            case=decompose_result['stats_test']['case_desc'],
            adfuller=decompose_result['stats_test']['adfuller']['result']['text'],
            kpss=decompose_result['stats_test']['kpss']['result']['text'])

        utils.savefig(
            decompose_result['stats_test']['fig_dist'],
            ['temporal_analysis', 'decomposition', decompose_col, provider, f'{region}_{label}_dist']
        )

        utils.savefig(
            decompose_result['stats_test']['fig_dist'],
            ['temporal_analysis', 'decomposition', decompose_col, provider, f'{region}_{label}_corr']
        )

    utils.savefig(
            decompose_result['seasonal_analysis']['fig'],
            ['temporal_analysis', 'decomposition', decompose_col, provider, f'{region}_{label}_seasonal']
        )

# Decomposition Runtime Half-Hourly, Hourly, Daily

In [5]:
decompositions_params = [
    {"label" : 'half-hourly', "dt_rounding" : '30min', "adflag" : 48, 'dtfmt':'%d/%m (%a)',  "providers" : ['AWS','AZURE', 'GCP'] },
    {"label" : 'hourly',      "dt_rounding" : 'H',     "adflag" : 24, 'dtfmt':'%d/%m (%a)',  "providers" : ['AWS','AZURE', 'GCP'] },
    {"label" : 'daily',       "dt_rounding" : 'D',     "adflag" : 7,  'dtfmt':'%d/%m (%a)',  "providers" : ['AWS','AZURE', 'GCP'] }
]

In [6]:
%matplotlib agg
from concurrent.futures import ThreadPoolExecutor 
import warnings
from statsmodels.tools.sm_exceptions import InterpolationWarning
warnings.simplefilter('ignore', InterpolationWarning)
import multiprocessing

decompose_runtime_results = {}
cpus = 4 # otherwise memory dies

sns.set(font_scale=1.5)
decomp_dataset = df
for decomp_param in decompositions_params:
    label, dt_rounding, adflag, dtfmt, providers = decomp_param.values()
    decompositions = []
    for provider in providers:
        regions = list(decomp_dataset[decomp_dataset['provider'] == provider].groupby(['provider'], observed=True)['region'].unique().values[0])
        
        def process(region):
            decomp = dcu.decompose(decomp_dataset, provider, region, 'runtime', dt_rounding, adflag=adflag, test_regression='ct', dtfmt=dtfmt)
            decompositions.append(decomp)
            return region
        
        with ThreadPoolExecutor(max_workers=cpus) as executor:
            results = list(executor.map(process, regions))
            
        executor.shutdown(wait=True)
        print('processed:', provider, results)
    decompose_runtime_results[label] = decompositions
%matplotlib inline

processed: AWS ['ap-northeast-1', 'ap-south-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'eu-west-2', 'sa-east-1', 'us-east-1', 'us-west-1', 'us-west-2']
processed: AZURE ['australiaeast', 'brazilsouth', 'centralindia', 'eastus', 'germanywestcentral', 'japaneast', 'uksouth', 'westus', 'westus2']
processed: GCP ['asia-northeast1', 'asia-south1', 'australia-southeast1', 'europe-west2', 'europe-west3', 'northamerica-northeast1', 'southamerica-east1', 'us-east4', 'us-west2', 'us-west4']
processed: AWS ['ap-northeast-1', 'ap-south-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'eu-west-2', 'sa-east-1', 'us-east-1', 'us-west-1', 'us-west-2']
processed: AZURE ['australiaeast', 'brazilsouth', 'centralindia', 'eastus', 'germanywestcentral', 'japaneast', 'uksouth', 'westus', 'westus2']
processed: GCP ['asia-northeast1', 'asia-south1', 'australia-southeast1', 'europe-west2', 'europe-west3', 'northamerica-northeast1', 'southamerica-east1', 'us-east4', 'us-west2', 'us-west4']
proces

In [7]:
fdir = os.path.join('pickles', 'temporal_analysis')
os.makedirs(fdir, exist_ok=True)

labels = ['half-hourly', 'hourly', 'daily']
for label in labels:
    for result in decompose_runtime_results[label]:
        decompose_col = result['parameters']['decompose_col']
        # output_decompose(provider, region, label, result)
        save_decompose(label, result)
        with open(os.path.join(fdir, f'decomposition-{decompose_col}-{FSIZE}-{label}.p'), 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)

# Decomposition No CPU per Invocation Half-Hourly, Hourly, Daily

In [8]:
# Create CPU Counts
df_cpus = df[['provider', 'region', 'timezone', 'cpuType', 'workload_invocation', 'driver_invocation']]
df_cpu_counts = df_cpus.groupby(['driver_invocation','provider', 'region', 'timezone', 'workload_invocation'], observed=True)['cpuType'].nunique().rename('NoCpuTypeInvocation')
df_cpu_counts = pd.DataFrame(df_cpu_counts).reset_index()
df_cpu_counts.groupby(['provider', 'region'], observed=True).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,NoCpuTypeInvocation,NoCpuTypeInvocation,NoCpuTypeInvocation,NoCpuTypeInvocation,NoCpuTypeInvocation,NoCpuTypeInvocation,NoCpuTypeInvocation,NoCpuTypeInvocation
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
provider,region,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
AWS,ap-northeast-1,2895.0,1.014508,0.119592,1.0,1.0,1.0,1.0,2.0
AWS,ap-south-1,2894.0,1.045957,0.209428,1.0,1.0,1.0,1.0,2.0
AWS,ap-southeast-2,2896.0,1.03453,0.182619,1.0,1.0,1.0,1.0,2.0
AWS,ca-central-1,2897.0,1.072144,0.25877,1.0,1.0,1.0,1.0,2.0
AWS,eu-central-1,2895.0,1.011744,0.107752,1.0,1.0,1.0,1.0,2.0
AWS,eu-west-2,2893.0,1.002765,0.052522,1.0,1.0,1.0,1.0,2.0
AWS,sa-east-1,2894.0,1.019696,0.138977,1.0,1.0,1.0,1.0,2.0
AWS,us-east-1,2887.0,1.200208,0.400225,1.0,1.0,1.0,1.0,2.0
AWS,us-west-1,2893.0,1.092637,0.289974,1.0,1.0,1.0,1.0,2.0
AWS,us-west-2,2886.0,1.001733,0.041594,1.0,1.0,1.0,1.0,2.0


In [9]:
%matplotlib agg
from concurrent.futures import ThreadPoolExecutor 
import warnings
from statsmodels.tools.sm_exceptions import InterpolationWarning
warnings.simplefilter('ignore', InterpolationWarning)
import multiprocessing

decompose_runtime_results = {}
cpus = 4 # otherwise memory dies
sns.set(font_scale=1.5)

decomp_dataset = df_cpu_counts
for decomp_param in decompositions_params:
    label, dt_rounding, adflag, dtfmt, providers = decomp_param.values()
    decompositions = []
    for provider in providers:
        regions = list(decomp_dataset[decomp_dataset['provider'] == provider].groupby(['provider'], observed=True)['region'].unique().values[0])
        
        def process(region):
            decomp = dcu.decompose(decomp_dataset, provider, region, 'NoCpuTypeInvocation', dt_rounding, 
                                   adflag=adflag, test_regression='ct', dtfmt=dtfmt,
                                   statsTest = False, agg='mean')
            decompositions.append(decomp)
            return region
        
        with ThreadPoolExecutor(max_workers=cpus) as executor:
            results = list(executor.map(process, regions))
            
        executor.shutdown(wait=True)
        print('processed:', provider, results)
    decompose_runtime_results[label] = decompositions
%matplotlib inline

processed: AWS ['ap-northeast-1', 'ap-south-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'eu-west-2', 'sa-east-1', 'us-east-1', 'us-west-1', 'us-west-2']
processed: AZURE ['australiaeast', 'brazilsouth', 'centralindia', 'eastus', 'germanywestcentral', 'japaneast', 'uksouth', 'westus', 'westus2']
processed: GCP ['asia-northeast1', 'asia-south1', 'australia-southeast1', 'europe-west2', 'europe-west3', 'northamerica-northeast1', 'southamerica-east1', 'us-east4', 'us-west2', 'us-west4']
processed: AWS ['ap-northeast-1', 'ap-south-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'eu-west-2', 'sa-east-1', 'us-east-1', 'us-west-1', 'us-west-2']
processed: AZURE ['australiaeast', 'brazilsouth', 'centralindia', 'eastus', 'germanywestcentral', 'japaneast', 'uksouth', 'westus', 'westus2']
processed: GCP ['asia-northeast1', 'asia-south1', 'australia-southeast1', 'europe-west2', 'europe-west3', 'northamerica-northeast1', 'southamerica-east1', 'us-east4', 'us-west2', 'us-west4']
proces

In [10]:
fdir = os.path.join('pickles', 'temporal_analysis')
os.makedirs(fdir, exist_ok=True)

labels = ['half-hourly', 'hourly', 'daily']
for label in labels:
    for result in decompose_runtime_results[label]:
        decompose_col = result['parameters']['decompose_col']
        # output_decompose(provider, region, label, result)
        save_decompose(label, result)
        with open(os.path.join(fdir, f'decomposition-{decompose_col}-{FSIZE}-{label}.p'), 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)