In [None]:
import boto3
import botocore
from iterdub import iterdub as ib
from matplotlib import pyplot as plt
import pandas as pd
from pandas.util import hash_pandas_object
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
from dishpylib.pyhelpers import make_outattr_metadata
from dishpylib.pyhelpers import print_runtime


In [None]:
print_runtime()


# get data


In [None]:
s3_handle = boto3.resource(
    's3',
    region_name="us-east-2",
    config=botocore.config.Config(
        signature_version=botocore.UNSIGNED,
    ),
)
bucket_handle = s3_handle.Bucket('prq49')

series_profiles, = bucket_handle.objects.filter(
    Prefix=f'endeavor=16/thread-profiles/stage=8+what=elaborated/',
)


In [None]:
df = pd.read_csv(
    f's3://prq49/{series_profiles.key}',
    compression='xz',
)
dfdigest = '{:x}'.format(hash_pandas_object( df ).sum())
dfdigest


In [None]:
for stint in df['Stint'].unique():
    exec(f'df{stint} = df[ df["Stint"] == {stint} ]')


# unique phylogenetic roots over evolutionary time


In [None]:
def logx_lineplot(*args, **kwargs):
    sns.lineplot(
        *args,
        **kwargs,
    )
    plt.gca().set_xscale('log')


tp.tee(
    logx_lineplot,
    data=df,
    x='Stint',
    y='Number Phylogenetic Roots',
    hue='Series',
    ci='sd',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


# within-stint phylogenetic roots over evolutionary time


In [None]:
tp.tee(
    sns.lineplot,
    data=df,
    x='Stint',
    y='Number Stint Phylogenetic Roots',
)


In [None]:
tp.tee(
    sns.lineplot,
    data=df.astype({'Series': 'str'}),
    x='Stint',
    y='Number Stint Phylogenetic Roots',
    hue='Series',
    ci='sd',
    legend=False,
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


# explanatory factor: updates elapsed per stint


In [None]:
tp.tee(
    sns.scatterplot,
    data=df,
    x='Update',
    y='Number Stint Phylogenetic Roots',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
tp.tee(
    sns.scatterplot,
    data=df100.astype({'Series': 'str'}),
    x='Update',
    y='Number Stint Phylogenetic Roots',
    hue='Series',
    legend=False,
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


# explanatory factor: elapsed generations


In [None]:
tp.tee(
    sns.scatterplot,
    data=df100.astype({'Series': 'str'}),
    x='Elapsed Generations',
    y='Number Stint Phylogenetic Roots',
    hue='Series',
    legend=False,
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
tp.tee(
    sns.lineplot,
    data=df,
    x='Stint',
    y='Elapsed Generations Delta',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
def dual_lineplot(*, data, x, y1, y2):

    sns.lineplot(
        data=data,
        x=x,
        y=y1,
        color='blue',
    )
    plt.gca().yaxis.label.set_color('blue')


    sns.lineplot(
        data=data,
        x=x,
        y=y2,
        ax=plt.gca().twinx(),
        color='red',
    )
    plt.gca().yaxis.label.set_color('red')


tp.tee(
    dual_lineplot,
    data=df,
    x='Stint',
    y1='Elapsed Generations Delta',
    y2='Update',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
def dual_lineplot(*, y1, y2, **kwargs):

    sns.lineplot(
        **kwargs,
        y=y1,
        color='blue',
    )
    plt.gca().yaxis.label.set_color('blue')

    sns.lineplot(
        **kwargs,
        y=y2,
        ax=plt.gca().twinx().invert_yaxis(),
        color='red',
    )
    plt.gca().yaxis.label.set_color('red')


tp.tee(
    dual_lineplot,
    data=df,
    x='Stint',
    y1='Number Stint Phylogenetic Roots',
    y2='Elapsed Generations Delta',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
tp.tee(
    sns.lineplot,
    data=df,
    x='Stint',
    y='Generations Per Update',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


# case study: series 16005


In [None]:
tp.tee(
    sns.lineplot,
    data=df[ df['Series'] == 16005 ],
    x='Stint',
    y='Number Stint Phylogenetic Roots',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
tp.tee(
    sns.lineplot,
    data=df[ df['Series'] == 16005 ],
    x='Stint',
    y='Number Stint Phylogenetic Roots',
    hue='thread',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
def logx_lineplot(*args, **kwargs):
    sns.lineplot(
        *args,
        **kwargs,
    )
    plt.gca().set_xscale('log')


tp.tee(
    logx_lineplot,
    data=df[ df['Series'] == 16005 ],
    x='Stint',
    y='Number Phylogenetic Roots',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
tp.tee(
    sns.lineplot,
    data=df[ df['Series'] == 16005 ],
    x='Stint',
    y='Update',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)


In [None]:
tp.tee(
    sns.lineplot,
    data=df[ df['Series'] == 16005 ],
    x='Stint',
    y='Elapsed Generations Delta',
    teeplot_outattrs={
        **{
            'bucket' : ib.dub( df['Bucket'] ),
            'endeavor' : ib.dub( df['Endeavor'].astype(int) ),
            'transform' : 'identity',
            '_dfdigest' : dfdigest,
        },
        **make_outattr_metadata(),
    },
)
