<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
# Do once to get all libraries up to date
if False:
    !pip install -U pandas numpy process_improve plotly IPython

import pathlib
from process_improve.batch.preprocessing import find_reference_batch, batch_dtw
from process_improve.batch.data_input import melted_to_dict
from process_improve.batch.plotting  import plot__all_batches_per_tag
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
from IPython.core.display import display, HTML
import pandas as pd

In [2]:
# Settings
init_notebook_mode(connected=True)
display(HTML("<style>.container { width:100% !important; }</style>"))

# Ideally, use more than 1 tag to align on. These columns must exist in all data frames for all batches. There should be NO missing data in any columns.
columns_to_align = ["AgitatorPower", "AgitatorTorque", "JacketTemperature", "DryerTemp"]
tag_to_plot = columns_to_align[3]

In [3]:
# Import the data: a dictionary of dataframes
import process_improve.datasets.batch as batch_ds 

dryer_raw = pd.read_csv(pathlib.Path(batch_ds.__path__._recalculate()[0]) / "dryer.csv")
dryer_df_dict = melted_to_dict(dryer_raw, batch_id_col="batch_id")
dryer_df_dict.keys()

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71])

In [4]:
# Plot some data, to get an idea of what is present
iplot(plot__all_batches_per_tag(df_dict=dryer_df_dict, 
                                tag=tag_to_plot,  
                                time_column ="ClockTime", 
                                x_axis_label="Time [hours]"))

In [5]:
# What is a good batch number to align on?
good_reference_candidate = find_reference_batch(dryer_df_dict, 
                                                columns_to_align=columns_to_align, 
                                                settings={"robust": False})
good_reference_candidate

22

In [10]:
# Align the data based on the reference candidate, using the specified tags/columns.
aligned_out = batch_dtw(dryer_df_dict, columns_to_align=columns_to_align,
        reference_batch=good_reference_candidate,
        settings={
            "robust": False,
            
            # High tolerance of around 1.0 will run alignment only once; 
            # Typically set to 0.1, so that at least 2 or 3 iterations of alignment take place
            "tolerance": 0.05, 
             
            "show_progress": True, # show progress: shows total "distance" of batch relative to the reference
        },  
    )
    

Iter = 0 and norm = 2.0
  * 1: distance = 176.24384509022656
  * 2: distance = 224.04238113284924
  * 3: distance = 158.52144513597128
  * 4: distance = 295.79315506108634
  * 5: distance = 168.97404323669178
  * 6: distance = 208.22362415899653
  * 7: distance = 118.23648410682776
  * 8: distance = 89.09836752907876
  * 9: distance = 69.77135985976605
  * 10: distance = 326.4299490442387
  * 11: distance = 63.272452501528015
  * 12: distance = 145.9553005184147
  * 13: distance = 132.47857348614343
  * 14: distance = 55.68663649942192
  * 15: distance = 22.22419188310919
  * 16: distance = 27.76633553056108
  * 17: distance = 27.012171837224944
  * 18: distance = 37.74677521921444
  * 19: distance = 37.384182328163234
  * 20: distance = 330.28088891328497
  * 21: distance = 16.623725528186544
  * 22: distance = 0.0
  * 23: distance = 245.8581348094801
  * 24: distance = 20.241472604717426
  * 25: distance = 46.913049921512176
  * 26: distance = 385.71948315768566
  * 27: distance = 47

In [7]:
# NOTE: this is still in progress. The library API will change ito what the output from `batch_dtw` is.
# For now you have to do a bit of post-processing on the result.
synced = {}
for batch_id, result in aligned_out['aligned_batch_objects'].items():
    synced[batch_id] = result.synced * aligned_out['scale_df']['Range'] + aligned_out['scale_df']['Minimum']
    

In [8]:
print('Weight history (the higher the weight, the greater the importance of that tag in the alignment process):')
display(pd.DataFrame(aligned_out['weight_history'], columns=columns_to_align))

Weight history (the higher the weight, the greater the importance of that tag in the alignment process):


Unnamed: 0,AgitatorPower,AgitatorTorque,JacketTemperature,DryerTemp
0,1.0,1.0,1.0,1.0
1,0.524787,1.073823,1.539865,0.861526


In [9]:
# Plot the aligned (synced) data
iplot(plot__all_batches_per_tag(synced, tag_to_plot, x_axis_label='Normalized duration', html_aspect_ratio_w_over_h=2))