In [None]:
import boto3
import botocore
import functools
from IPython.core.display import display, HTML
from iterdub import iterdub as ib
from iterpop import iterpop as ip
import itertools as it
import json
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.util import hash_pandas_object
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
from dishpylib.pyanalysis import calc_loglikelihoods_by_num_sets
from dishpylib.pyanalysis import calc_loglikelihoods_over_set_sizes
from dishpylib.pyanalysis import count_hands_with_k_or_more_sets
from dishpylib.pyanalysis import count_hands_without_k_or_more_sets
from dishpylib.pyanalysis import estimate_interpolation_complexity
from dishpylib.pyhelpers import get_control_t_distns
from dishpylib.pyhelpers import make_outattr_metadata
from dishpylib.pyhelpers import NumpyEncoder
from dishpylib.pyhelpers import preprocess_competition_fitnesses
from dishpylib.pyhelpers import print_runtime


In [None]:
print_runtime()


# get control data


In [None]:
control_fits_df = get_control_t_distns('prq49', 16, 40)


# get data


In [None]:
s3_handle = boto3.resource(
    's3',
    region_name="us-east-2",
    config=botocore.config.Config(
        signature_version=botocore.UNSIGNED,
    ),
)
bucket_handle = s3_handle.Bucket('prq49')

series_profiles, = bucket_handle.objects.filter(
    Prefix=f'endeavor=16/noncritical-phenotypeneutral-nopinterpolation-competitions/stage=6+what=collated/stint=40',
)


In [None]:
df = pd.read_csv(
    f's3://prq49/{series_profiles.key}',
    compression='xz',
)
dfdigest = '{:x}'.format( hash_pandas_object( df ).sum() )
dfdigest


# preprocess data


In [None]:
df = preprocess_competition_fitnesses(df, control_fits_df)


# summarize data and model fitting


In [None]:
def log_lineplot(*args, **kwargs):
    sns.lineplot(*args, **kwargs)
    plt.yscale('log')
    plt.autoscale()

def lineplot_scatterplot(*args, **kwargs):
    sns.lineplot(
        *args,
        **{k : v for k, v in kwargs.items() if k != 'hue'},
        color='gray',
        zorder=1,
    )
    sns.scatterplot(
        *args,
        **kwargs,
        palette={
            'Significantly Advantageous' : sns.color_palette()[2],
            'Neutral' : sns.color_palette()[0],
            'Significantly Deleterious' : sns.color_palette()[1],
        },
        zorder=2,
    )
    plt.legend(
        handles=[
            matplotlib.patches.Patch(
                color=sns.color_palette()[2],
                label='Significantly Advantageous',
            ),
            matplotlib.patches.Patch(
                color=sns.color_palette()[0],
                label='Neutral',
            ),
            matplotlib.patches.Patch(
                color=sns.color_palette()[1],
                label='Significantly Deleterious',
            ),
        ],
    )


In [None]:
# display(HTML("<style>div.output_scroll { height: 1000em; }</style>"))

for series in df['genome series'].unique():

    display(HTML(f'<h1>series {series}</h1>'))


    display(HTML('<h2>interpolation competition</h2>'))
    tp.tee(
        lineplot_scatterplot,
        x='genome nop_interpolation_num_nopped',
        y='Fitness Differential',
        hue='Relative Fitness',
        data=df[df['genome series'] == series].dropna(subset=['genome nop_interpolation_num_nopped']),
        teeplot_outattrs={
            **{
                'bucket' : ib.dub( df['Treatment bucket'] ),
                'endeavor' : ib.dub( df['Competition Endeavor'] ),
                'transform' : 'filter-Stint-40',
                '_dfdigest' : dfdigest,
            },
            **make_outattr_metadata(),
        },
    )
    plt.show()



    display(HTML('<h2>num_sets fitting</h2>'))
    tp.tee(
        log_lineplot,
        x='num_sets',
        y='likelihood',
        hue='set_size',
        data=calc_loglikelihoods_over_set_sizes(
            series=series,
            interpolation_competitions_df=df,
        ).astype({
            # so seaborn will color as categorical, not quantitative
            'set_size': 'str',
        }),
        teeplot_outattrs={
            **{
                'bucket' : ib.dub( df['Treatment bucket'] ),
                'endeavor' : ib.dub( df['Competition Endeavor'] ),
                'transform' : 'filter-Stint-40',
                '_dfdigest' : dfdigest,
            },
            **make_outattr_metadata(),
        },
    )
    plt.show()

    display(HTML('<h2>model fit results</h2>'))
    print(json.dumps(
        estimate_interpolation_complexity(
            series=series,
            interpolation_competitions_df=df,
        ),
        sort_keys=True,
        indent=4,
        cls=NumpyEncoder,
    ))
