<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
# Do once to get all libraries up to date
if False:
    !pip install -U pandas numpy process_improve plotly IPython

import pathlib
from process_improve.batch.preprocessing import find_reference_batch, batch_dtw
from process_improve.batch.data_input import melted_to_dict
from process_improve.batch.plotting  import plot__all_batches_per_tag
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
from IPython.core.display import display, HTML
import pandas as pd

In [None]:
# Settings
init_notebook_mode(connected=True)
display(HTML("<style>.container { width:100% !important; }</style>"))

# Ideally, use more than 1 tag to align on. These columns must exist in all data frames for all batches. There should be NO missing data in any columns.
columns_to_align = ["AgitatorPower", "AgitatorTorque", "JacketTemperature", "DryerTemp"]
tag_to_plot = columns_to_align[3]

In [None]:
# Import the data: a dictionary of dataframes
import process_improve.datasets.batch as batch_ds 

dryer_raw = pd.read_csv(pathlib.Path(batch_ds.__path__._recalculate()[0]) / "dryer.csv")
dryer_df_dict = melted_to_dict(dryer_raw, batch_id_col="batch_id")
dryer_df_dict.keys()

In [None]:
# Plot some data, to get an idea of what is present
iplot(plot__all_batches_per_tag(df_dict=dryer_df_dict, 
                                tag=tag_to_plot,  
                                time_column ="ClockTime", 
                                x_axis_label="Time [hours]"))

In [None]:
# What is a good batch number to align on?
good_reference_candidate = find_reference_batch(dryer_df_dict, 
                                                columns_to_align=columns_to_align, 
                                                settings={"robust": False})
good_reference_candidate

In [None]:
# Align the data based on the reference candidate, using the specified tags/columns.
aligned_out = batch_dtw(dryer_df_dict, columns_to_align=columns_to_align,
        reference_batch=good_reference_candidate,
        settings={
            "robust": False,
            
            # High tolerance of around 1.0 will run alignment only once; 
            # Typically set to 0.1, so that at least 2 or 3 iterations of alignment take place
            "tolerance": 0.05, 
             
            "show_progress": True, # show progress: shows total "distance" of batch relative to the reference
        },  
    )
    

In [None]:
# NOTE: this is still in progress. The library API will change ito what the output from `batch_dtw` is.
# For now you have to do a bit of post-processing on the result.
synced = {}
for batch_id, result in aligned_out['aligned_batch_objects'].items():
    synced[batch_id] = result.synced * aligned_out['scale_df']['Range'] + aligned_out['scale_df']['Minimum']
    

In [None]:
print('Weight history (the higher the weight, the greater the importance of that tag in the alignment process):')
display(pd.DataFrame(aligned_out['weight_history'], columns=columns_to_align))

In [None]:
# Plot the aligned (synced) data
iplot(plot__all_batches_per_tag(synced, tag_to_plot, x_axis_label='Normalized duration', html_aspect_ratio_w_over_h=2))