In [31]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import scipy.signal
from tqdm import tqdm
import seaborn as sns
from tsfresh import extract_features
from tsfresh import extract_relevant_features
from tsfresh.feature_extraction.settings import EfficientFCParameters

plt.rcParams['figure.figsize'] = (12, 8)

## <font color=orange>  Helper Functions </font>

In [2]:
def calculateBendingAngleAfterSpringback(df,
                                         threshold   = 1.0,
                                         section_dim = 200,
                                         median      = True):

      """Iterates backwards through dataframe in section_dim rows sections and 
      calculates the difference between the first and the last bending angle of 
      the section. If the difference is greater than threshold, the section is 
      considered as out of final beding angle range and the bending angle after 
      springback is calculated as the median of the after springback range.
      param: 
            df          : DataFrame
            threshold   : increasing rate of bedning angle in degrees/section_dim
            section_dim : section dimension to be considered while iterating.
            median      : True to use a median function on the section, that 
                        is considered as part of springback range.
      return: bending angle after springback
      """
      # drop the rows where at least one element is missing
      df.dropna(inplace=True)
      row = len(df) - 1
      while row > 0:
            if abs(df['Bending_Angle'].iloc[row] - df['Bending_Angle'].iloc[row - section_dim]) > threshold:
                  break
            row -= section_dim
      df_springback = df.iloc[row:]
      if median:
            return df_springback['Bending_Angle'].median()
      else:
            return df_springback['Bending_Angle'].mean()

## <font color=orange>  1. Data preprocessing </font>

In [3]:
# Constants
THRESHOLD_SPRINGBACK = 0.5 # in degrees/section_dim  

In [52]:
# create new pandas dataframe
df_ML = pd.DataFrame(columns=['Aluminum','Thickness','Springback'])

In [53]:
# check wheter bending angle after spring back is calculated correctly
files = os.listdir('1_Datensatz_Halbzeug_Blechdicke/Trainingsdaten/Messdaten gesamt/')
files = [f for f in files if f.endswith('.csv')]
springback = []
material = []
thickness = []
for file in tqdm(files):
    df = pd.read_csv('1_Datensatz_Halbzeug_Blechdicke/Trainingsdaten/Messdaten gesamt/' + str(file), sep=';', decimal=',')
    # extract material from file name
    material_value = int(file.split('_')[0])
    material.append(material_value)
    # extract thickness from file name
    thickness_value = int(file.split('_')[1]) / 10
    thickness.append(thickness_value)
    # calculate springback
    max_bending_angle = df['Bending_Angle'].max()
    bending_angle_after_springback = calculateBendingAngleAfterSpringback(df, threshold=THRESHOLD_SPRINGBACK, section_dim=200, median=True)
    springback_value = max_bending_angle - bending_angle_after_springback
    springback.append(springback_value)

100%|██████████| 363/363 [00:35<00:00, 10.22it/s]


In [54]:
# add the values from the lists to dataframe
df_ML['Thickness'] = thickness
df_ML['Springback'] = springback
# material column is categorical and in order to avoid spurious correlation 
# between features and prediction due to numerical values, it must be binary
df_ML['Aluminum'] = [1 if x == 1 else 0 for x in material]

In [55]:
df_ML

Unnamed: 0,Aluminum,Thickness,Springback
0,1,1.0,3.363434
1,1,1.0,3.305664
2,1,1.0,3.207978
3,1,1.0,3.411530
4,1,1.0,3.379150
...,...,...,...
358,0,2.0,1.536682
359,0,2.0,1.892334
360,0,2.0,1.146362
361,0,2.0,1.199310


In [42]:
# export dataframe to csv
df_ML.to_csv("df_ML.csv", index=False)

In [43]:
# merge all dataframes into one dataframe for tsfresh
files = os.listdir('1_Datensatz_Halbzeug_Blechdicke/Trainingsdaten/Messdaten ML/')
files = [f for f in files if f.endswith('.csv')]
df_forces = pd.DataFrame(columns=['id','Time','Force1','Force2','Force3'])
id_ = 0
for file in tqdm(files):
    df_temp = pd.read_csv('1_Datensatz_Halbzeug_Blechdicke/Trainingsdaten/Messdaten ML/' + str(file), sep=';', decimal=',')
    df_temp['Time'] = pd.to_datetime(df_temp['Time'])
    df_temp['id'] = id_
    df_forces = pd.concat([df_forces, df_temp[['id','Time','Force1','Force2','Force3']]], ignore_index=True)
    id_ += 1


100%|██████████| 363/363 [02:29<00:00,  2.43it/s]


In [57]:
# drop the rows where at least one element is missing
df_forces.dropna(inplace=True)
# change data type of columns
df_forces['id'] = df_forces['id'].astype(int)
df_forces['Force1'] = df_forces['Force1'].astype(float)
df_forces['Force2'] = df_forces['Force2'].astype(float)
df_forces['Force3'] = df_forces['Force3'].astype(float)

In [58]:
df_forces

Unnamed: 0,id,Time,Force1,Force2,Force3
0,0,2022-06-14 20:59:40.263000,0.557487,1.750093,2.703058
1,0,2022-06-14 20:59:40.268000,0.863571,2.28133,2.906976
2,0,2022-06-14 20:59:40.270000,0.74879,2.041008,2.805017
3,0,2022-06-14 20:59:40.271000,0.978353,2.19279,2.9707
4,0,2022-06-14 20:59:40.272000,0.723283,1.86393,2.728548
...,...,...,...,...,...
1600843,362,2022-06-14 15:24:26.434000,-33.173756,82.348546,149.705733
1600844,362,2022-06-14 15:24:26.435000,-33.033468,82.753298,149.731223
1600845,362,2022-06-14 15:24:26.436000,-33.22477,82.424437,149.680243
1600846,362,2022-06-14 15:24:26.438000,-32.9697,82.715352,149.731223


In [65]:
features_filtered_direct = extract_relevant_features(df_forces, 
                                                     df_ML["Springback"],
                                                     column_id='id', 
                                                     column_sort='Time',
                                                     default_fc_parameters=EfficientFCParameters(),
                                                     n_jobs=4)

Feature Extraction:   0%|          | 0/20 [00:00<?, ?it/s]