In [None]:
import boto3
import botocore
from iterdub import iterdub as ib
from iterpop import iterpop as ip
import itertools as it
from matplotlib import pyplot as plt
import pandas as pd
from pandas.util import hash_pandas_object
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
from dishpylib.pyhelpers import make_outattr_metadata
from dishpylib.pyhelpers import print_runtime


In [None]:
print_runtime()


# get data


In [None]:
s3_handle = boto3.resource(
    's3',
    region_name="us-east-2",
    config=botocore.config.Config(
        signature_version=botocore.UNSIGNED,
    ),
)
bucket_handle = s3_handle.Bucket('prq49')

series_profiles, = bucket_handle.objects.filter(
    Prefix=f'endeavor=16/thread-profiles/stage=8+what=elaborated/',
)


In [None]:
df = pd.read_csv(
    f's3://prq49/{series_profiles.key}',
    compression='xz',
)
dfdigest = '{:x}'.format(hash_pandas_object( df ).sum())
dfdigest


In [None]:
for stint in df['Stint'].unique():
    exec(f'df{stint} = df[ df["Stint"] == {stint} ]')


In [None]:
df['Normalized Elapsed Updates'] = df['Update'] / df.groupby([
    'Series',
    'Stint',
])['Update'].transform('mean')

df['Elapsed Update Std'] = df.groupby([
    'Series',
    'Stint',
])['Update'].transform('std') / df.groupby([
    'Series',
    'Stint',
])['Update'].transform('mean')

df['Elapsed Update Span'] = (df.groupby([
    'Series',
    'Stint',
])['Update'].transform('max') - df.groupby([
    'Series',
    'Stint',
])['Update'].transform('min')) / df.groupby([
    'Series',
    'Stint',
])['Update'].transform('mean')


# case study series 16005


In [None]:
dfx = df[ df['Series'] == 16005 ]


In [None]:
tp.tee(
    sns.lineplot,
    data=dfx,
    x='Stint',
    y='Elapsed Update Std',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'filter-Series-16005',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata()
    },
)


In [None]:
tp.tee(
    sns.lineplot,
    data=dfx,
    x='Stint',
    y='Elapsed Update Span',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'filter-Series-16005',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata()
    },
)


In [None]:
def boxstrip(*args, **kwargs):

    sns.boxplot(
        *args,
        **kwargs,
        whis=(0, 100),
    )
    sns.scatterplot(
        *args,
        **{
            k : v
            for k, v in kwargs.items()
            if k != 'hue'
        },
        marker='_',
        zorder=100,
        linewidth=3,
        color='white',
    )
    sns.scatterplot(
        *args,
        **kwargs,
        marker='_',
        zorder=100,
        linewidth=2,
    )


    plt.gca().get_legend().remove()
    for idx, label in enumerate(plt.gca().xaxis.get_ticklabels()):
        if idx % 10: label.set_visible(False)

tp.tee(
    boxstrip,
    data=dfx,
    x='Stint',
    y='Normalized Elapsed Updates',
    palette=list(it.islice(
        it.cycle(['r','g','b']),
        len(dfx['Stint'].unique())
    )),
    hue='Stint',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'filter-Series-16005',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata()
    },

)


In [None]:
print('mean update std', dfx['Elapsed Update Std'].mean())
print('mean update span', dfx['Elapsed Update Span'].mean())


In [None]:
max_update_std = dfx['Elapsed Update Std'].max()
print(
    'max update std', max_update_std,
    '@ Stint', ip.pophomogeneous(
        dfx[ dfx['Elapsed Update Std'] == max_update_std ]['Stint']
    )
)

max_update_span = dfx['Elapsed Update Span'].max()
print(
    'max update span', max_update_span,
    '@ Stint', ip.pophomogeneous(
        dfx[ dfx['Elapsed Update Span'] == max_update_span ]['Stint']
    )
)
