In [10]:
#import graphviz 
#from IPython.display import Image
#import pydotplus


import pandas as pd #dataframe handling library
import numpy as np #math library

import os.path #path library


# Use bokeh to plot Interactive Plots on separate tab
from bokeh.models.annotations import Title
from bokeh.embed import components, file_html
from bokeh.resources import CDN
from bokeh.io import output_file, show
from bokeh.layouts import row, column, gridplot
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label
from bokeh.palettes import Reds256, Category10_4,Spectral6
from bokeh.models import HoverTool, WheelZoomTool, PanTool, BoxZoomTool, ResetTool, TapTool, SaveTool
from collections import OrderedDict

import matplotlib
import matplotlib.pyplot as plt
#import seaborn as sns
#from mpl_toolkits import mplot3d



from sklearn.model_selection import RepeatedKFold, RepeatedStratifiedKFold
from sklearn.linear_model import LinearRegression

from scipy.optimize import fsolve
from scipy.signal import find_peaks #science library peak finder algorithm


import itertools
np.set_printoptions(precision=3, suppress=True)  # suppress scientific float notation

# SETTINGS FOR NOTEBOOK
#To show all columns
pd.set_option('max_columns', None)
pd.set_option("max_rows", None)
pd.set_option('max_colwidth', 10)


import pickle
import joblib
#from sklearn.externals import joblib
from joblib import Memory

#rom ipywidgets import interactive

In [11]:
# Check Versions 

import sys 
for name, module in sorted(sys.modules.items()): 
  if hasattr(module, '__version__'): 
    print(name, module.__version__) 

IPython 7.27.0
IPython.core.release 7.27.0
PIL 8.3.1
PIL.Image 8.3.1
PIL._version 8.3.1
_csv 1.0
_ctypes 1.1.0
_curses b'2.2'
_decimal 1.70
argparse 1.1
backcall 0.2.0
bokeh 2.3.3
bokeh.document.document 2.3.3
bokeh.resources 2.3.3
bokeh.util.version 2.3.3
certifi 2021.05.30
cffi 1.14.6
charset_normalizer 2.0.4
charset_normalizer.version 2.0.4
colorama 0.4.4
csv 1.0
ctypes 1.1.0
cycler 0.10.0
dateutil 2.8.2
decimal 1.70
decorator 5.0.9
defusedxml 0.7.1
distutils 3.8.0
entrypoints 0.3
hdfs 2.5.8
idna 3.2
idna.idnadata 13.0.0
idna.package_data 3.2
ipykernel 4.10.0
ipykernel._version 4.10.0
ipython_genutils 0.2.0
ipython_genutils._version 0.2.0
ipywidgets 7.6.3
ipywidgets._version 7.6.3
jedi 0.18.0
jinja2 2.10.3
joblib 1.0.1
joblib.externals.cloudpickle 1.6.0
joblib.externals.loky 2.9.0
json 2.0.9
jupyter_client 7.0.1
jupyter_client._version 7.0.1
jupyter_core 4.7.1
jupyter_core.version 4.7.1
kiwisolver 1.3.2
logging 0.5.1.2
markupsafe 2.0.1
matplotlib 3.4.3
numpy 1.21.2
numpy.core 1.21.2

In [4]:
class Welding_model:
  
  def __init__(self):
    #  Initialiaze Parameters
    #  Replace with read statement for implementation
    
    self.fixed_params = {
      'Nm_prominence':1,   
      'forward_torque_points':30,
      
      'min_gap': 5,
      'max_gap': 20,
      'end_loc': 50,
      'look_forward_start': 400,
      'look_forward_finish': 600,
      'ow_ll': 0.4,
      'ow_ul': 1.4,
      'iw_ll': 1.2,      
      'iw_ul': 2.1
      }   
    
    self.model_params = {
      'force_start': [370,540],
      'mm_iw_offset': [0.1022,0.1022],
      'mm_ow_offset': [-0.0166,-0.0166],
      'force_prominence': [1,1]
      }
    
  def display_params(self):
    print(self.model_params)
    print(self.fixed_params)   
   
    
  def predict(self, Spindle_Position,Spindle_Force,Spindle_Torque, head, sample_num, start_time, end_time):
    '''
      Main function to compute Inner and Outer Welt
      
      Spindle_Position(mm)   Array
      Spindle_Force(N)       Array
      Spindle_TorqueNm)      Array
      head                   int
      sample_num             int
      start_time             time
      end_time               time 
      
    '''
  
    error_msg="Error --  at Initialize"
    try:
    
      # Create DataFrame from 3 input streams
      # Add the sample number and a column to track the peaks that are identified
      df = pd.DataFrame({'mm': Spindle_Position, 'N': Spindle_Force, 'Nm':Spindle_Torque }) 
      df['mm_avg']=df['mm'].rolling(center=True, window=10, min_periods=1).mean() 
      df['Nm_avg']=df['Nm'].rolling(center=True, window=5, min_periods=1).mean() 
      df['N_avg']=df['N'].rolling(center=True, window=5, min_periods=1).mean() 
      df['points1']=0      # for Force 
      df['points2']=0      # for Torque
      df['points3']=0      # other
      
      df['sample']= sample_num
      
      error_msg="Error --  After Initialize"
       
      if len(Spindle_Force) < 1000:
        error_msg="Error --  # of points is too low :  " + str(len(Spindle_Force))
        a=1/0
      error_msg="Error --  After Initialize 2"
      
      # --- IW Estimate ---
      tsi=df[df['Nm_avg']>=.05].index[0]                                       # initial point in Torque
      df.loc[df.index==tsi , 'points2']=1
      iws=df.loc[tsi,'mm_avg']                                                 # equivalent starting point in Position (shift to correct for Bias)
      
      error_msg="Error --  After Initialize 3"
      # --- Alternative method for IW estimate
      fst=df[df['N'] > self.model_params['force_start'][head-1]].index[0]
      df.loc[df.index==fst , 'points1']=1                                                           # force start position
      
      error_msg="Error --  After Initialize 4"
      #Find peaks in Torque and Force curves
      pks_torque=find_peaks(df['Nm_avg'],prominence= self.fixed_params['Nm_prominence'])[0]
      pks_force=find_peaks(df['N_avg'],prominence = self.model_params['force_prominence'][head-1])[0]
      error_msg='Error --  Peaks Identified Torque: ' + str(len(pks_torque)) + '\tForce: ' + str(len(pks_force))
      
      error_msg="Error --  After Initialize 5"
      max_torque=0
      max_point=0     
      for u in range(len(pks_torque)): 
          df.loc[df.index==pks_torque[u] , 'points2']=1   #tag the points for labeling
          if df.loc[df.index==pks_torque[u] , 'Nm_avg'].values[0] > max_torque:
            max_point=u
            max_torque=df.loc[df.index==pks_torque[u] , 'Nm_avg'].values[0]
      pt=pks_torque[max_point]
      pt2=pks_torque[max_point+1]
      #print(pks_torque[max_point],pt,pt2)
      
      
      # Find the max Force after initial identified condition in Torqur curve
      max_force=0
      max_point_force=0     
      ow_pk_index=[]
      for u in range(len(pks_force)):
          if pks_force[u]>=pt + self.fixed_params['forward_torque_points'] and pks_force[u]<=pt2:
            ow_pk_index.append(pks_force[u])
            if df.loc[df.index==pks_force[u] , 'N_avg'].values[0] > max_force:
              max_point_force= pks_force[u]
              max_force= df.loc[df.index==pks_force[u] , 'N_avg'].values[0]
      #print('max force  ',max_point,max_force)
      #print(ow_pk_index)
      error_msg="Error --  After Initialize 6"
      
      
      # Use the second peak in the Torque curve to identify the final Position (in mm curve)
      final_mm=df.loc[(pks_torque[1]+ self.fixed_params['look_forward_start']):(pks_torque[1]+ self.fixed_params['look_forward_finish'])]['mm_avg'].median()
      #final_mm=df['mm_avg'].max()
      
      iwp=final_mm - iws
      error_msg="Error --  After IW estimate :" + str(iwp)
      
      # Identify Peak with largest drop in Force .. ad the last point to the pks_force ARRAY
      #pks_force = np.append(pks_force , 0)
      pks_force=np.sort(ow_pk_index)
      
      prev_N=0
      max_drop=-1
      max_drop_pos=0
      for u in range(len(ow_pk_index)):
          if ow_pk_index[u] >= max_point_force:                                   #search only in points past the largest pea
            df.loc[df.index==ow_pk_index[u] , 'points1']=1
            drop=df.loc[df.index==ow_pk_index[u] , 'N_avg'].values[0] - prev_N
            #print('drop: ', drop,'\tpoint and N value: ',ow_pk_index[u], df.loc[df.index==ow_pk_index[u] , 'N'].values[0])
            if drop<max_drop:
                max_drop=drop
                max_drop_pos=ow_pk_index[u-1]    #take the previous point
            prev_N=df.loc[df.index==ow_pk_index[u] , 'N_avg'].values[0]
            
      if max_drop_pos==0:
        max_drop_pos=ow_pk_index[-1]
      error_msg='Error --  Peaks Identified Torque: ' + str(len(pks_torque)) + '\tForce Kept: ' + str(len(ow_pk_index))
      #print('max drop poit: ', max_drop_pos)
      
      # Estimate OW
      ows=df.loc[max_drop_pos,'mm_avg']                                                # use the last peak before the largest drop in the N curve
      owp=final_mm-ows
      
      
      # apply Bias correction
      iwp = iwp +  self.model_params['mm_iw_offset'][head-1]
      owp = owp +  self.model_params['mm_ow_offset'][head-1]
      #print(iwp,owp)
                                                            
                                                                
      df.loc[df.index==pt2+self.fixed_params['look_forward_start'] , 'points2']=1                   # end position
      df.loc[df.index==pt+self.fixed_params['forward_torque_points'] , 'points2']=1                 # shifted start
      df.loc[max_drop_pos , 'points3']=1                                               
                                                                 

      error_msg='Error --  during Tagging data'
    
      # --- Set IW, OW and Overall Flags -- if either IW or OW fail, the overall sample will fail also
      sample_flag= 'PASS'
      iw_flag='PASS'
      ow_flag='PASS'
    
      if (iwp<self.fixed_params['iw_ll']):
        iw_flag='LOW'
        sample_flag= 'FAIL'
      elif (iwp>self.fixed_params['iw_ul']):
        iw_flag='HIGH'
        sample_flag= 'FAIL'

      if (owp<self.fixed_params['ow_ll']):
        ow_flag='LOW'
        sample_flag= 'FAIL'
      elif (owp>self.fixed_params['ow_ul']):
        ow_flag='HIGH'
        sample_flag= 'FAIL'
        
    
      # Check to ensure the data that came into the function was not truncated 
      if df.iloc[0]['N'] > self.model_params['force_start'][head-1] + 300:
        error_msg = 'WARNING - Initial Force too high ' + str(df.iloc[0]['N'])
      else:
        error_msg = 'ok'
        
      return np.round(iwp,4),np.round(owp,4),iw_flag, ow_flag, sample_flag,self.fixed_params['iw_ll'], self.fixed_params['iw_ul'], self.fixed_params['ow_ll'], self.fixed_params['ow_ul'], \
              sample_num,start_time,end_time, head, df, error_msg
    
    except:
      if error_msg.find('kept')>0:           #tag the points it found before checking for gap
        for u in range(len(pks_force)-1):  
           df.loc[df.index==pks_force[u] , 'points1']=1
        
      return 0.0, 0.0, 'ERROR','ERROR','ERROR',self.fixed_params['iw_ll'], self.fixed_params['iw_ul'], self.fixed_params['ow_ll'], self.fixed_params['ow_ul'], \
              sample_num,start_time,end_time, head, df,error_msg 



In [12]:
iw_ow = Welding_model()

iw_ow.display_params()

{'force_start': [370, 540], 'mm_iw_offset': [0.1022, 0.1022], 'mm_ow_offset': [-0.0166, -0.0166], 'force_prominence': [1, 1]}
{'Nm_prominence': 1, 'forward_torque_points': 30, 'min_gap': 5, 'max_gap': 20, 'end_loc': 50, 'look_forward_start': 400, 'look_forward_finish': 600, 'ow_ll': 0.4, 'ow_ul': 1.4, 'iw_ll': 1.2, 'iw_ul': 2.1}


In [35]:
from dfml.python.dataset import Dataset
from dfml.utilities import dfml_utils

dataset_1 = Dataset("/P&G/Physical_Weld_Data.csv").get_dataframe()
dataset_2 = Dataset("/P&G/combined_2.csv").get_dataframe()

Dataframe created
Dataframe created


In [36]:
lab_results = dataset_1

print(lab_results.dtypes)
print(lab_results.shape)
print(type(lab_results))
lab_results.head()

Timestamp     object
Sample_ID      int64
Head           int64
IW_Avg_mm    float64
OW_Avg_mm    float64
dtype: object
(52, 5)
<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Timestamp,Sample_ID,Head,IW_Avg_mm,OW_Avg_mm
0,21/5/5...,1,1,1.357,0.367
1,21/5/6...,2,1,1.336,0.301
2,21/5/7...,3,1,1.324,0.311
3,21/5/9...,4,1,1.421,0.414
4,21/5/1...,5,1,1.352,0.354


In [37]:
detail_data = dataset_2

print(detail_data.dtypes)
print(detail_data.shape)
print(type(detail_data))
detail_data.head()

time              float64
Spindle_Pos       float64
Spindle_Force     float64
Spindle_Torque    float64
sample              int64
head                int64
dtype: object
(693402, 6)
<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,time,Spindle_Pos,Spindle_Force,Spindle_Torque,sample,head
0,0.0,0.0094,0.5905,0.0011,10001,1
1,1.0,0.0149,0.583,-0.0042,10001,1
2,2.0,0.017,0.656,-0.0033,10001,1
3,3.0,0.0133,0.6164,0.0011,10001,1
4,4.0,0.0091,0.6237,-0.0036,10001,1


In [16]:
#predict(self, Spindle_Position,Spindle_Force,Spindle_Torque, head, sample_num, start_time, end_time):

sample = 10001
selected_point = detail_data.loc[detail_data['sample'] == sample]
selected_point.reset_index(inplace=True)

hd=lab_results.loc[lab_results['Weld_ID'] == sample,'Head'].values[0]
ts=lab_results.loc[lab_results['Weld_ID'] == sample,'Timestamp'].values[0]
print(hd, ts)

selected_point.head()

1 21/5/5 17:07:45


Unnamed: 0,index,time,Spindle_Pos,Spindle_Force,Spindle_Torque,sample,head
0,0,0.0,0.0094,0.5905,0.0011,10001,1
1,1,1.0,0.0149,0.583,-0.0042,10001,1
2,2,2.0,0.017,0.656,-0.0033,10001,1
3,3,3.0,0.0133,0.6164,0.0011,10001,1
4,4,4.0,0.0091,0.6237,-0.0036,10001,1


In [17]:
est = iw_ow.predict(selected_point['Spindle_Pos'] ,selected_point['Spindle_Force'],selected_point['Spindle_Torque'] ,hd , sample, ts, ts)

print(est[0:13], est[14])
print(lab_results.loc[lab_results.Weld_ID==sample,['IW_Avg_mm','OW_Avg_mm']])

(15.4599, 0.3032, 'HIGH', 'LOW', 'FAIL', 1.2, 2.1, 0.4, 1.4, 10001, '21/5/5 17:07:45', '21/5/5 17:07:45', 1) ok
   IW_Avg_mm  OW_Avg_mm
0      1.357      0.367


In [18]:
if est[14]!='aa':
  df_out = est[13]
  df_out.loc[df_out.points1 == 0 , "points1"] = np.nan
  df_out.loc[df_out.points2 == 0 , "points2"] = np.nan
  df_out.loc[df_out.points3 == 0 , "points3"] = np.nan
  
  df_out['N_points']= df_out['N'] * df_out['points1'] 
  df_out['N_points_2']= df_out['N'] * df_out['points2']
  df_out['N_points_3']= df_out['N'] * df_out['points3']
  
  df_out['Nm_points']= df_out['Nm'] * df_out['points1'] 
  df_out['Nm_points_2']= df_out['Nm'] * df_out['points2'] 
  df_out['Nm_points_3']= df_out['Nm'] * df_out['points3']
  
  df_out['mm_points']= df_out['mm_avg'] * df_out['points1'] 
  df_out['mm_points_2']= df_out['mm_avg'] * df_out['points2'] 
  df_out['mm_points_3']= df_out['mm_avg'] * df_out['points3']
  
  #What should have been the position based on the lab result
  end_mm= df_out['mm_points_2'].max()
  #print(end_mm)
  
  iw_actual=lab_results.loc[lab_results['Weld_ID']==sample,'IW_Avg_mm'].values[0]
  ow_actual=lab_results.loc[lab_results['Weld_ID']==sample,'OW_Avg_mm'].values[0]
  iwsp=end_mm-iw_actual
  owsp=end_mm-ow_actual
  iwi=df_out[df_out['mm_avg']>=iwsp].index[0]
  owi=df_out[df_out['mm_avg']>=owsp].index[0]

  df_out['points4']=float("nan")
  df_out.loc[df_out.index==iwi , 'points4']=1
  df_out.loc[df_out.index==owi , 'points4']=1
  
  df_out['N_points_4']= df_out['N'] * df_out['points4']
  df_out['mm_points_4']= df_out['mm_avg'] * df_out['points4']
  df_out['Nm_points_4']= df_out['Nm'] * df_out['points4']

  start_f= df_out['N_points_4'].min()
  print(start_f)


360.9189


In [27]:
df

Unnamed: 0,mm,N,Nm,mm_avg,Nm_avg,N_avg,points1,points2,points3,sample,N_points,N_points_2,N_points_3,Nm_points,Nm_points_2,Nm_points_3,mm_points,mm_points_2,mm_points_3,points4,N_points_4,mm_points_4,Nm_points_4
0,0.0094,0.5905,0.0011,0.01274,-2.1333...,0.609833,,,,10001,,,,,,,,,,,,,
1,0.0149,0.583,-0.0042,0.01195,-1.3250...,0.611475,,,,10001,,,,,,,,,,,,,
2,0.017,0.656,-0.0033,0.011471,-1.7800...,0.613920,,,,10001,,,,,,,,,,,,,
3,0.0133,0.6164,0.0011,0.011075,-2.1600...,0.601680,,,,10001,,,,,,,,,,,,,
4,0.0091,0.6237,-0.0036,0.010622,-3.4400...,0.629740,,,,10001,,,,,,,,,,,,,
5,0.008,0.5293,-0.0008,0.00961,-3.5000...,0.620500,,,,10001,,,,,,,,,,,,,
6,0.0086,0.7233,-0.0106,0.00848,-5.1200...,0.609760,,,,10001,,,,,,,,,,,,,
7,0.0083,0.6098,-0.0036,0.00711,-5.4800...,0.628300,,,,10001,,,,,,,,,,,,,
8,0.007,0.5627,-0.007,0.0059,-5.6000...,0.648520,,,,10001,,,,,,,,,,,,,
9,0.0005,0.7164,-0.0054,0.00523,-2.1400...,0.617680,,,,10001,,,,,,,,,,,,,


In [28]:

p1 = figure(title="Nm", x_axis_label='time', y_axis_label='Nm')
p1.line(df.index, df['Nm'], line_width=2, color="blue", legend_label="Nm")
p1.line(df.index, df['Nm_avg'], line_width=2, color="green", line_dash='dotted', legend_label="Nm_avg")
p1.circle(df.index, df['Nm_points'], size=5, color="red")
p1.circle(df.index, df['Nm_points_2'], size=7, color="green")
p1.legend.location = "bottom_left"
t = Title()
t.text = "Sample # " + str(sample) + "    Nm"
p1.title=t
p1.legend.click_policy="hide"

p2 = figure(title="mm", x_axis_label='time', y_axis_label='mm')
p2.line(df.index, df['mm'], line_width=2, color="blue", legend_label="mm")
p2.line(df.index, df['mm_avg'], line_width=2, color="green", line_dash='dotted', legend_label="mm_avg")
p2.circle(df.index, df['mm_points'], size=5, color="red")
p2.circle(df.index, df['mm_points_2'], size=7, color="green")
p2.legend.location = "bottom_left"
p2.legend.click_policy="hide"

p3 = figure(title="N", x_axis_label='time', y_axis_label='N')
p3.line(df.index, df['N'], line_width=2, color="blue", legend_label="N")
p3.line(df.index, df['N_avg'], line_width=2, color="green", line_dash='dotted', legend_label="N_avg")
p3.circle(df.index, df['N_points'], size=5, color="red")
p3.circle(df.index, df['N_points_2'], size=7, color="green")
p3.legend.location = "bottom_left"
p3.legend.click_policy="hide"


grid = gridplot([[p1,p2,p3]], plot_width=500, plot_height=500)
show(grid)