In [1]:
import pandas as pd
import altair as alt
from pathlib import Path

In [2]:
def latest_file(path: Path, pattern: str = "*"):
    files = path.glob(pattern)
    return max(files, key=lambda x: x.stat().st_ctime)

In [3]:
wafer_path = latest_file(Path('results_csv/'),'wafer_results*.csv')
print (f'Wafer file used is:   {wafer_path.name}')
wafer_results = pd.read_csv(wafer_path)

# OBS: something is wrong with w70 (PMMA, H2O2): huge area_ratio... so drop it for now...
wafer_results.drop(wafer_results[wafer_results.wafer == 'w70'].index,inplace=True)

particle_path = latest_file(Path('results_csv/'),'particle_results*.csv')
print (f'Particle file used is:   {particle_path.name}')
particle_results = pd.read_csv(particle_path)
particle_results.rename(columns={'Unnamed: 0': 'ID_pre'}, inplace=True)

wafer_results.replace({'Napoly':'SPT', '2c':'Acrylate', '4c':'Epoxy'}, inplace=True)
particle_results.replace({'Napoly':'SPT', '2c':'Acrylate', '4c':'Epoxy'}, inplace=True)

particle_results_matched_only = particle_results.dropna()

particle_results_prepostlists = particle_results_matched_only[['ID_pre', 'ID_post', 'wafer', 'polymer']]
particle_results_prepostlists['area'] = particle_results_matched_only[['area_x','area_y']].values.tolist()
particle_results_prepostlists['perimeter'] = particle_results_matched_only[['perimeter_x','perimeter_y']].values.tolist()
particle_results_prepostlists['major_axis'] = particle_results_matched_only[['major_axis_length_x','major_axis_length_y']].values.tolist()
particle_results_prepostlists['minor_axis'] = particle_results_matched_only[['minor_axis_length_x','minor_axis_length_y']].values.tolist()
particle_results_prepostlists['intensity'] = particle_results_matched_only[['mean_intensity_x','mean_intensity_y']].values.tolist()

particle_results_melted = particle_results_prepostlists.melt(id_vars=['ID_pre', 'ID_post', 'wafer', 'polymer'], value_vars=['area', 'perimeter', 'major_axis', 'minor_axis', 'intensity'])
particle_results_melted[['pre', 'post']] = pd.DataFrame(particle_results_melted['value'].tolist(), index = particle_results_melted.index)
melted_particles = particle_results_melted.drop(columns='value')

Wafer file used is:   wafer_results_04-12-20_01-02.csv
Particle file used is:   particle_results_04-12-20_01-02.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  particle_results_prepostlists['area'] = particle_results_matched_only[['area_x','area_y']].values.tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  particle_results_prepostlists['perimeter'] = particle_results_matched_only[['perimeter_x','perimeter_y']].values.tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

In [4]:
particle_wafer_groups = particle_results_matched_only.groupby('wafer')
for _, group_content in particle_wafer_groups:  # cycle through polymer groups
    pre_area_sum = group_content.area_x.sum()
    wafer_results.loc[(wafer_results.wafer == group_content.wafer.unique()[0]), 'pre_area'] = pre_area_sum
    post_area_sum = group_content.area_y.sum()
    wafer_results.loc[(wafer_results.wafer == group_content.wafer.unique()[0]), 'post_area'] = post_area_sum
    

In [5]:
wafer_results.dropna(inplace=True)  # drop any lines were there were no results
wafer_results.drop(wafer_results[wafer_results.treatment == 'HCl'].index, inplace=True)  # drop HCl treatment (this was only used for calibration)

wafer_results['count_ratios'] = abs(wafer_results.post_count / wafer_results.pre_count - 1) * 100  # Get normalised count ratios for each wafer: counts of particles in pre state divided by counts in post state. Subtract 1 to normalise. Take absolute value to treat particle loss and particle addition as the same. Multiply by 100 to get percent values.
wafer_results['area_ratios'] = abs(wafer_results.post_area / wafer_results.pre_area - 1) * 100  # same for area...

wafer_polymer_groups = wafer_results.groupby('polymer')  # group by polymers

wafer_results_wrangled =pd.DataFrame()  # create empty df to fill in results from loop
for _, group_content in wafer_polymer_groups:  # cycle through polymer groups
        
    group_content['count_error'] = group_content['count_ratios'] - group_content.loc[group_content.treatment == 'water', 'count_ratios'].iloc[0]  # subtract count ratio of water from count ratios of all other treatments to get the percentage point error (meaning loss OR addition) of particle numbers caused by each treatment 
    group_content['area_error'] = group_content['area_ratios'] - group_content.loc[group_content.treatment == 'water', 'area_ratios'].iloc[0]  # same for area
        
    group_content.drop(group_content[group_content.treatment == 'water'].index, inplace=True)  # the rows with the water treatments can now be deleted
        
    # following line is good example how to not get into "set with copy" warning. Also several bool conditions could be used like that: df.loc[(df['A'] == 'blue') & (df['B'] == 'red') & (df['C'] == 'square'),'D'] = 'M5' (found here: https://stackoverflow.com/questions/21263020/pandas-update-value-if-condition-in-3-columns-are-met)
    group_content.loc[(group_content['count_error'] < 0), 'count_error'] = 0  # set all ratios that were smaller than water ratio (and thus got now negative due to water ratio correction) to 0.
    group_content.loc[(group_content['area_error'] < 0), 'area_error'] = 0  # same for are
    
    wafer_results_wrangled = wafer_results_wrangled.append(group_content)  # save results to df
    melted_wafers = wafer_results_wrangled.melt(id_vars=['wafer','polymer','treatment'], value_vars=['count_error', 'area_error'])


In [19]:
alt.data_transformers.disable_max_rows()

waferprop_radio = alt.binding_radio(options=['count_error', 'area_error'], name='Wafer property:   ')
waferprop_select = alt.selection_single(fields=['variable'], bind=waferprop_radio)

particleprop_dropdown = alt.binding_select(options=['area', 'perimeter', 'major_axis', 'minor_axis', 'intensity'], name='Particle properties')
particleprop_select = alt.selection_single(fields=['variable'], bind=particleprop_dropdown)

brush = alt.selection_interval(encodings=['x', 'y'])  # selection of type "interval"

# useful for radio buttons: https://stackoverflow.com/questions/59025953/on-an-altair-plot-can-you-change-the-location-that-a-selection-e-g-dropdown
# interactive heatmap: https://towardsdatascience.com/altair-plot-deconstruction-visualizing-the-correlation-structure-of-weather-data-38fb5668c5b1
# alt.data_transformers.disable_max_rows()  # if MaxRowsError becomes a problem
quant_HM = alt.Chart(melted_wafers).mark_rect().encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, orient="left", domain=False)),
    color=alt.Color('value:Q', scale=alt.Scale(scheme='lighttealblue'), legend=alt.Legend(title=['Change', '+/- [%]'])),
    tooltip = alt.Tooltip('wafer:N')
).add_selection(
    waferprop_select
).transform_filter(
    waferprop_select
).add_selection(brush)



particle_scatter = alt.Chart(melted_particles).mark_circle().encode(
    x='pre',
    y='post',
    tooltip=['ID_pre', 'ID_post']
).add_selection(
    particleprop_select
).transform_filter(
    particleprop_select
).transform_filter(
    brush
)




alt.hconcat(quant_HM, particle_scatter).configure_rect(
    size=18
).configure_title(
    orient='bottom',
    offset=20
).configure_view(
    strokeWidth=0  # get rid of chart box
).configure_axis(
    title=None,
    domain=False,  # no axis line
    ticks=False
).configure_legend(
    gradientLength=250,
    orient='left',
    titlePadding = 20,
    gradientLabelOffset=10
).add_selection(brush)

#quant_HM_areas = alt.Chart(wafer_results_wrangled).mark_rect().encode(
#    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
#    y=alt.Y('polymer:N', axis=alt.Axis(title=None, labels=False, orient="right", domain=False)),
#    color=alt.Color('area_error:Q', scale=alt.Scale(scheme='lighttealblue'), legend=alt.Legend(title=['Change', '+/- [%]'])),
#    tooltip = alt.Tooltip('wafer:N')
#).properties(
#    title='Particle area'
#)

#brush = alt.selection_interval()  # selection of type "interval"

#qhm = alt.hconcat(quant_HM_counts, quant_HM_areas).configure_rect(
#    size=18
#).configure_title(
#    orient='bottom',
#    offset=20
#).configure_view(
#    strokeWidth=0  # get rid of chart box
#).configure_axis(
#    title=None,
#    domain=False,  # no axis line
#    ticks=False
#).configure_legend(
#    gradientLength=250,
#    orient='left',
#    titlePadding = 20,
#    gradientLabelOffset=10
#).add_selection(brush)

#alt.Chart.save(qhm,'figures/quant_HM.html', embed_options={'renderer':'svg'})

#alt.Chart.show(quant_HM)

In [8]:
# useful for radio buttons: https://stackoverflow.com/questions/59025953/on-an-altair-plot-can-you-change-the-location-that-a-selection-e-g-dropdown
# interactive heatmap: https://towardsdatascience.com/altair-plot-deconstruction-visualizing-the-correlation-structure-of-weather-data-38fb5668c5b1
# alt.data_transformers.disable_max_rows()  # if MaxRowsError becomes a problem
quant_HM_counts = alt.Chart(wafer_results_wrangled).mark_rect().encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, orient="left", domain=False)),
    color=alt.Color('count_error:Q'),
    tooltip = alt.Tooltip('wafer:N')
).properties(
    title='Particle counts'
)



quant_HM_areas = alt.Chart(wafer_results_wrangled).mark_rect().encode(
    x=alt.X('treatment:N', axis=alt.Axis(title=None, orient="top", domain=False)),
    y=alt.Y('polymer:N', axis=alt.Axis(title=None, labels=False, orient="right", domain=False)),
    color=alt.Color('area_error:Q', scale=alt.Scale(scheme='lighttealblue'), legend=alt.Legend(title=['Change', '+/- [%]'])),
    tooltip = alt.Tooltip('wafer:N')
).properties(
    title='Particle area'
)



qhm = alt.hconcat(quant_HM_counts, quant_HM_areas).configure_rect(
    size=18
).configure_title(
    orient='bottom',
    offset=20
).configure_view(
    strokeWidth=0  # get rid of chart box
).configure_axis(
    title=None,
    domain=False,  # no axis line
    ticks=False
).configure_legend(
    gradientLength=250,
    orient='left',
    titlePadding = 20,
    gradientLabelOffset=10
)

alt.Chart.save(qhm,'figures/quant_HM.html', embed_options={'renderer':'svg'})

#alt.Chart.show(quant_HM)

In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import os
from IPython.display import Image

@interact
def show_images(file=os.listdir('.')):
    display(Image(file))

In [None]:
import altair as alt
import pandas as pd

source = pd.DataFrame.from_records([
      {"img": "https://www.thecakedecoratingcompany.co.uk/images/the-cake-decorating-co-red-square-drum-cake-board-p9622-19420_image.jpg"}
])

alt.Chart(source).mark_image(
    width=50,
    height=100
).encode(
    url='img'
)

In [15]:
df = pd.DataFrame({'name_id': [1,2,3,4,5,6], 'house': ['red','yellow','blue','pink','red','blue'], 'school': ['abc','dps','gdf','abc','dps','gdf'], 'duration': [20,30,40,20,10,5]})

In [16]:
input_dropdown1 = alt.binding_select(options=['abc','dps','gdf'])
input_dropdown2 = alt.binding_select(options=['red','yello','blue','pink'])
selection1 = alt.selection_single(fields=['house'], bind=input_dropdown1, name='belonging ')
selection2 = alt.selection_single(fields=['school'], bind=input_dropdown2, name='origin ')

 
alt.Chart(df).mark_point().encode(
    x='name_id:Q',
    y='duration:Q',
    color='house',
    ##### added the opacity ,here
    opacity=alt.condition(
         selection1 & selection2,
         alt.value(1),
         alt.value(0.1)
)).add_selection(
    selection1,
    selection2
)

In [23]:
from vega_datasets import data

movies = alt.UrlData(
    data.movies.url,
    format=alt.DataFormat(parse={"Release_Date":"date"})
)
ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R']
genres = ['Action', 'Adventure', 'Black Comedy', 'Comedy',
       'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical',
       'Romantic Comedy', 'Thriller/Suspense', 'Western']

base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate(
    Rounded_IMDB_Rating = "floor(datum.IMDB_Rating)",
    Hundred_Million_Production =  "datum.Production_Budget > 100000000.0 ? 100 : 10",
    Release_Year = "year(datum.Release_Date)"
).transform_filter(
    alt.datum.IMDB_Rating > 0
).transform_filter(
    alt.FieldOneOfPredicate(field='MPAA_Rating', oneOf=ratings)
).encode(
    x=alt.X('Worldwide_Gross:Q', scale=alt.Scale(domain=(100000,10**9), clamp=True)),
    y='IMDB_Rating:Q',
    tooltip="Title:N"
)

# A slider filter
year_slider = alt.binding_range(min=1969, max=2018, step=1)
slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_")


filter_year = base.add_selection(
    slider_selection
).transform_filter(
    slider_selection
).properties(title="Slider Filtering")

# A dropdown filter
genre_dropdown = alt.binding_select(options=genres)
genre_select = alt.selection_single(fields=['Major_Genre'], bind=genre_dropdown, name="Genre")

filter_genres = base.add_selection(
    genre_select
).transform_filter(
    genre_select
).properties(title="Dropdown Filtering")

#color changing marks
rating_radio = alt.binding_radio(options=ratings)

rating_select = alt.selection_single(fields=['MPAA_Rating'], bind=rating_radio, name="Rating")
rating_color_condition = alt.condition(rating_select,
                      alt.Color('MPAA_Rating:N', legend=None),
                      alt.value('lightgray'))

highlight_ratings = base.add_selection(
    rating_select
).encode(
    color=rating_color_condition
).properties(title="Radio Button Highlighting")

# Boolean selection for format changes
input_checkbox = alt.binding_checkbox()
checkbox_selection = alt.selection_single(bind=input_checkbox, name="Big Budget Films")

size_checkbox_condition = alt.condition(checkbox_selection,
                                        alt.SizeValue(25),
                                        alt.Size('Hundred_Million_Production:Q')
                                       )

budget_sizing = base.add_selection(
    checkbox_selection
).encode(
    size=size_checkbox_condition
).properties(title="Checkbox Formatting")

( filter_year | filter_genres) &  (highlight_ratings | budget_sizing  )