In [1]:
import pandas as pd
import altair as alt
from pathlib import Path

In [2]:
def latest_file(path: Path, pattern: str = "*"):
    files = path.glob(pattern)
    return max(files, key=lambda x: x.stat().st_ctime)

In [4]:
particle_path = latest_file(Path('results_csv/'),'particle_results*.csv')
print (f'Particle file used is:   {particle_path.name}')

Particle file used is:   particle_results_06-12-20_17-01.csv


In [7]:
# Particle data to Long format: Version B
#%%time

particle_results = pd.read_csv(particle_path)

particle_results.rename(columns={'Unnamed: 0': '_preIndex', 'wafer': '_wafer', 'polymer': '_polymer', 'treatment': '_treatment', 'postIndices': '_postIndex'}, inplace=True)

pa = pd.DataFrame(particle_results.values, columns=particle_results.columns.str.rsplit('_', 1, expand=True))
pa.rename(columns={'_area': 'area', '_perimeter': 'perimeter', '_mean_intensity': 'mean_intensity'}, level=0, inplace=True)

pam = pa.melt(id_vars=[
    ('', 'wafer'), 
    ('', 'polymer'), 
    ('', 'treatment'), 
    ('', 'preIndex'), 
    ('', 'postIndex')], 
                value_vars=[
                    ('area', 'pre'), 
                    ('area', 'post'), 
                    ('perimeter', 'pre'), 
                    ('perimeter', 'post'), 
                    ('mean_intensity', 'pre'), 
                    ('mean_intensity', 'post')])

postValuesSeries = pam.set_index('variable_1').loc['post', 'value']

pam.set_index('variable_1', inplace=True)
pam.drop(index='post', inplace=True)
pam['postValue'] = postValuesSeries.values
pam.reset_index(drop=True, inplace=True)
pam.columns = ['wafer', 'polymer', 'treatment', 'preIndex', 'postIndex', 'prop', 'preValue', 'postValue']

In [49]:
# Particle results in wide format are used for heatmap (melting to long by altair "transform_fold")
# first preparing particles DF:
particle_results = pd.read_csv(particle_path)
# Drop data to be excluded:
particle_results.drop(particle_results[particle_results.wafer=='w20'].index, inplace=True)  # acid calibration sample
bad_wafers = [32, 34, 37, 96, 97, 69, 70, 71, 72, 73, 94, 95, 99, 100, 44]
for w in bad_wafers:
    particle_results.drop(particle_results[particle_results.wafer=='w'+str(w)].index, inplace=True)

particle_results.rename(columns={'Unnamed: 0': 'preIndex', 'postIndices': 'postIndex'}, inplace=True)
paw = particle_results.copy()
paw['mean particle area'] = round((paw['area_post'] / paw['area_pre'] -1) *10) *10
paw['mean particle perimeter'] = round((paw['perimeter_post'] / paw['perimeter_pre'] -1) *10) *10
paw['mean particle brightness'] = round((paw['mean_intensity_post'] / paw['mean_intensity_pre'] -1) *10) *10

In [54]:
# ===================
# Altair result plots
# ===================
alt.data_transformers.disable_max_rows()  # make altair plot DFs with more than 5000 rows


# Interactive selections
# ----------------------
picked = alt.selection_single(encodings=['x', 'y'], empty='none')  # selection of type "single" allows clicking of one element in one plot to filter other plots
derivedColumns = ['particle_count', 'mean particle area', 'mean particle perimeter', 'mean particle brightness']
heatmapprop_dropdown = alt.binding_select(options=derivedColumns, name='Heatmap property')
heatmapprop_select = alt.selection_single(fields=['Property'], 
                                          bind=heatmapprop_dropdown, 
                                          init={'Property': 'particle_count'}, 
                                          clear=False, 
                                          on="click[event.shiftKey&!event.shiftKey]"
                                         )
particleprop_dropdown = alt.binding_select(options=['area', 'perimeter', 'mean_intensity'], name='Particle property')
particleprop_select = alt.selection_single(fields=['prop'], bind=particleprop_dropdown, init={'prop': 'area'})


# Quant Heatmap
# -------------
quantHM = alt.Chart(paw).transform_joinaggregate(
    numPre='valid(preIndex)',
    numPost='valid(postIndex)',
    groupby=['wafer']
).transform_calculate(
    particle_count='round((datum.numPost / datum.numPre -1) *10) *10'
).transform_fold(
    derivedColumns,
    as_=['Property', 'value']
).mark_rect().encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, orient="left", domain=False)),
    color=alt.Color('mean(value):Q', scale=alt.Scale(scheme='lighttealblue'), legend=alt.Legend(title=['Change', '+/- [%]'])),
    tooltip = alt.Tooltip(['wafer','mean(value):Q','Property:N'])
).add_selection(
    heatmapprop_select
).transform_filter(
    heatmapprop_select
).add_selection(
    picked
).properties(
    height=300
)


# Scatter plot
# ------------
particleScatter = alt.Chart(pam).mark_circle().encode(
    x='preValue',
    y='postValue',
    tooltip=['preIndex', 'postIndex', 'preValue', 'postValue']
)

linReg = particleScatter.transform_regression('preValue', 'postValue',method="linear"
).mark_line(color="orange", clip=True)

# linReg.mark_errorband(extent='ci').encode(
#     x='preValue',
#     y='postValue'
#     )
# band = alt.Chart(pam).mark_errorband(extent='ci').encode(
#     x='preValue',
#     y='postValue'
# )

identityDF = pd.DataFrame({'x': [0,9999999], 'y': [0, 9999999]})  # make a mock-up DF that is used to plot an x=y identity line
identityLine = alt.Chart(identityDF).mark_line(color= 'black', strokeDash=[3,8], clip=True).encode(
    x=alt.X('x', axis=alt.Axis(labels=False, title='', domain=False, ticks=False)),
    y=alt.Y('y', axis=alt.Axis(labels=False, title='', domain=False, ticks=False))
)

texts = alt.Chart().mark_text(dy=-180, size=12).encode(
    text='label:N'
).transform_calculate(label='datum.treatment + " on " + datum.polymer'
).transform_filter(
    picked
)

scatterReg = (particleScatter + linReg + texts
).add_selection(
    particleprop_select
).transform_filter(
    particleprop_select
).transform_filter(
    picked
)

scatterAll = alt.layer(scatterReg, identityLine).resolve_scale(y='independent', x='independent').properties(
    width=300,
    height=300
)


# Boxplot
# -------

boxPlot = alt.Chart(pam).transform_calculate(
    Change_in_percent='(datum.postValue / datum.preValue -1) *100'
).mark_boxplot(outliers=False).encode(
    y='Change_in_percent:Q'
).transform_filter(
    particleprop_select
).transform_filter(
    picked
)

# Putting plots together
# ----------------------
alt.hconcat(quantHM, scatterAll, boxPlot,
            padding={"left": 50, "top": 50, "right": 50, "bottom": 50},
            spacing=50
).configure_scale(
    bandPaddingInner=0.1
).configure_title(
    orient='bottom',
    offset=20
).configure_view(
    strokeWidth=0  # get rid of chart box
).configure_axis(
    domain=False,  # no axis line
    ticks=False
).configure_legend(
    gradientLength=250,
    orient='left',
    titlePadding = 20,
    gradientLabelOffset=10
).save('quant_results.html')


# Moving interactive selector boxes to better location
# ----------------------------------------------------
# from IPython.display import HTML
# display(HTML("""
# <style>
# .vega-bind {
#   text-align:right;
# }
# </style>
# """))

In [None]:
# TRY: Heatmap from "pam" (melted particle results instead of wide)

# Drop data to be excluded:
pam.drop(pam[particle_results.wafer=='w20'].index, inplace=True)  # acid calibration sample
bad_wafers = [69, 70, 71, 72, 73, 94, 95, 99, 100, 44]
for w in bad_wafers:
    pam.drop(pam[pam.wafer=='w'+str(w)].index, inplace=True)


#waferprop_radio = alt.binding_radio(options=['countRatios', 'areaRatios'], name='Wafer property:   ', empty=None)
#waferprop_select = alt.selection_single(fields=['variable'], bind=waferprop_radio)

#brush = alt.selection_interval(encodings=['x', 'y'])  # selection of type "interval"



alt.Chart(pam).transform_joinaggregate(
    numPre='valid(preIndex)',
    numPost='valid(postIndex)',
    groupby=['wafer', 'prop']
).transform_calculate(
    countRatios='round((datum.numPost / datum.numPre -1) *10) *10'
).mark_rect(size=18).encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, orient="left", domain=False)),
    color=alt.Color('countRatios:Q', scale=alt.Scale(scheme='lighttealblue'), legend=alt.Legend(title=['Change', '+/- [%]'])),
    tooltip = alt.Tooltip(['wafer','numPre:N','numPost:N','countRatios:Q'])
)#.add_selection(
#     brush
# ).add_selection(
#     waferprop_select
# ).transform_filter(
#     waferprop_select
# )

