In [1]:
import pandas as pd
import altair as alt
from altair_saver import save
from pathlib import Path

In [2]:
def latest_file(path: Path, pattern: str = "*"):
    files = path.glob(pattern)
    return max(files, key=lambda x: x.stat().st_ctime)

In [3]:
wafer_path = latest_file(Path('results_csv/'),'wafer_results*.csv')
print (f'Wafer file used is:   {wafer_path.name}')
wafer_results = pd.read_csv(wafer_path)

# OBS: something is wrong with w70 (PMMA, H2O2): huge area_ratio... so drop it for now...
wafer_results.drop(wafer_results[wafer_results.wafer == 'w70'].index,inplace=True)

particle_path = latest_file(Path('results_csv/'),'particle_results*.csv')
print (f'Particle file used is:   {particle_path.name}')
particle_results = pd.read_csv(particle_path)

wafer_results.replace({'Napoly':'SPT', '2c':'Acrylate', '4c':'Epoxy'}, inplace=True)
particle_results.replace({'Napoly':'SPT', '2c':'Acrylate', '4c':'Epoxy'}, inplace=True)

particle_results_matched_only = particle_results.dropna()

Wafer file used is:   wafer_results_26-11-20_03-15.csv
Particle file used is:   particle_results_26-11-20_03-15.csv


In [4]:
particle_wafer_groups = particle_results_matched_only.groupby('wafer')
for _, group_content in particle_wafer_groups:  # cycle through polymer groups
    pre_area_sum = group_content.area_x.sum()
    wafer_results.loc[(wafer_results.wafer == group_content.wafer.unique()[0]), 'pre_area'] = pre_area_sum
    post_area_sum = group_content.area_y.sum()
    wafer_results.loc[(wafer_results.wafer == group_content.wafer.unique()[0]), 'post_area'] = post_area_sum
    

In [5]:
wafer_results.dropna(inplace=True)  # drop any lines were there were no results
wafer_results.drop(wafer_results[wafer_results.treatment == 'HCl'].index, inplace=True)  # drop HCl treatment (this was only used for calibration)

wafer_results['count_ratios'] = abs(wafer_results.post_count / wafer_results.pre_count - 1) * 100  # Get normalised count ratios for each wafer: counts of particles in pre state divided by counts in post state. Subtract 1 to normalise. Take absolute value to treat particle loss and particle addition as the same. Multiply by 100 to get percent values.
wafer_results['area_ratios'] = abs(wafer_results.post_area / wafer_results.pre_area - 1) * 100  # same for area...

wafer_polymer_groups = wafer_results.groupby('polymer')  # group by polymers

wafer_results_wrangled =pd.DataFrame()  # create empty df to fill in results from loop
for _, group_content in wafer_polymer_groups:  # cycle through polymer groups
        
    group_content['count_error'] = group_content['count_ratios'] - group_content.loc[group_content.treatment == 'water', 'count_ratios'].iloc[0]  # subtract count ratio of water from count ratios of all other treatments to get the percentage point error (meaning loss OR addition) of particle numbers caused by each treatment 
    group_content['area_error'] = group_content['area_ratios'] - group_content.loc[group_content.treatment == 'water', 'area_ratios'].iloc[0]  # same for area
        
    group_content.drop(group_content[group_content.treatment == 'water'].index, inplace=True)  # the rows with the water treatments can now be deleted
        
    # following line is good example how to not get into "set with copy" warning. Also several bool conditions could be used like that: df.loc[(df['A'] == 'blue') & (df['B'] == 'red') & (df['C'] == 'square'),'D'] = 'M5' (found here: https://stackoverflow.com/questions/21263020/pandas-update-value-if-condition-in-3-columns-are-met)
    group_content.loc[(group_content['count_error'] < 0), 'count_error'] = 0  # set all ratios that were smaller than water ratio (and thus got now negative due to water ratio correction) to 0.
    group_content.loc[(group_content['area_error'] < 0), 'area_error'] = 0  # same for are
    
    wafer_results_wrangled = wafer_results_wrangled.append(group_content)  # save results to df


In [9]:
# useful for radio buttons: https://stackoverflow.com/questions/59025953/on-an-altair-plot-can-you-change-the-location-that-a-selection-e-g-dropdown
# interactive heatmap: https://towardsdatascience.com/altair-plot-deconstruction-visualizing-the-correlation-structure-of-weather-data-38fb5668c5b1
# alt.data_transformers.disable_max_rows()  # if MaxRowsError becomes a problem
quant_HM_counts = alt.Chart(wafer_results_wrangled).mark_rect().encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, orient="left", domain=False)),
    color=alt.Color('count_error:Q'),
    tooltip = alt.Tooltip('wafer:N')
).properties(
    title='Particle counts'
)



quant_HM_areas = alt.Chart(wafer_results_wrangled).mark_rect().encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, labels=False, orient="right", domain=False)),
    color=alt.Color('area_error:Q', scale=alt.Scale(scheme='lighttealblue'), legend=alt.Legend(title="Change [%]")),
    tooltip = alt.Tooltip('wafer:N')
).properties(
    title='Particle area'
)



qhm = alt.hconcat(quant_HM_counts, quant_HM_areas).configure_rect(
    size=18
).configure_title(
    orient='bottom',
    offset=20
).configure_view(
    strokeWidth=0  # get rid of chart box
).configure_axis(
    title=None,
    domain=False,  # no axis line
    ticks=False
).configure_legend(
    gradientLength=250,
    orient='left',
    titlePadding = 20,
    gradientLabelOffset=10
)

save(qhm,'figures/quant_HM.html')

#alt.Chart.show(quant_HM)

In [7]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import os
from IPython.display import Image

@interact
def show_images(file=os.listdir('.')):
    display(Image(file))

interactive(children=(Dropdown(description='file', options=('venv', '__pycache__', 'result_plots.ipynb', 'test…