In [None]:
!pip install tsfresh

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tsfresh
  Downloading tsfresh-0.20.0-py2.py3-none-any.whl (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting stumpy>=1.7.2 (from tsfresh)
  Downloading stumpy-1.11.1-py3-none-any.whl (136 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m136.2/136.2 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: stumpy, tsfresh
Successfully installed stumpy-1.11.1 tsfresh-0.20.0


In [None]:
import os
import pandas as pd
import numpy as np
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters, extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute

In [None]:
# Read the DataFrame from the HDF5 file
df = pd.read_hdf(r"/content/drive/MyDrive/Dataset.h5", key='dataset')

In [None]:
print(df)

                  Source  Time (ms)          Type  Value
0          i030520235006      0.000  Nset (1/min)  0.000
1          i030520235068      0.000  Nset (1/min)  0.000
2          i050520238018      0.000   Angle (deg)  0.000
3          i030520237070      0.000   Angle (deg)  0.000
4          i280420232085      0.000   Angle (deg)  0.000
...                  ...        ...           ...    ...
33375222  t1005202314051   5001.519    TCP_y (mm)  2.600
33375223  t1005202314051   5001.519    TCP_x (mm)  0.000
33375224  t1005202314051   5001.519  TCP_rz (rad) -1.204
33375225  t1005202314051   5001.519    TCP_z (mm) -4.600
33375226  t1005202314051   5001.519  TCP_rx (rad)  1.234

[33375227 rows x 4 columns]


In [None]:
df_intrinsic = df

In [None]:
df_intrinsic = df[df['Source'].str.startswith('t')]
print(df_intrinsic)

                  Source  Time (ms)          Type  Value
46        t1005202313030      0.000   Robot_I (A)  1.091
47        t1005202313030      0.000  TCP_rz (rad) -1.208
48        t1005202313030      0.000  TCP_ry (rad)  1.171
86         t280420233060      0.000    TCP_x (mm)  0.000
174       t1005202313030      0.000  TCP_rx (rad)  1.228
...                  ...        ...           ...    ...
33375222  t1005202314051   5001.519    TCP_y (mm)  2.600
33375223  t1005202314051   5001.519    TCP_x (mm)  0.000
33375224  t1005202314051   5001.519  TCP_rz (rad) -1.204
33375225  t1005202314051   5001.519    TCP_z (mm) -4.600
33375226  t1005202314051   5001.519  TCP_rx (rad)  1.234

[12905592 rows x 4 columns]


In [None]:
# Rename columns to meet tsfresh requirements
df_intrinsic = df_intrinsic.rename(columns={'Source': 'id', 'Time (ms)': 'time', 'Type': 'kind', 'Value': 'value'})

print(df_intrinsic)




                      id      time          kind  value
0          i030520235006     0.000  Nset (1/min)  0.000
1          i030520235068     0.000  Nset (1/min)  0.000
2          i050520238018     0.000   Angle (deg)  0.000
3          i030520237070     0.000   Angle (deg)  0.000
4          i280420232085     0.000   Angle (deg)  0.000
...                  ...       ...           ...    ...
33375222  t1005202314051  5001.519    TCP_y (mm)  2.600
33375223  t1005202314051  5001.519    TCP_x (mm)  0.000
33375224  t1005202314051  5001.519  TCP_rz (rad) -1.204
33375225  t1005202314051  5001.519    TCP_z (mm) -4.600
33375226  t1005202314051  5001.519  TCP_rx (rad)  1.234

[33375227 rows x 4 columns]


In [None]:
df_intrinsic['id'] = 'id_' + df_intrinsic['id']

# Replace "idt" with "id" in the column
df_intrinsic['id'] = df_intrinsic['id'].str.replace('id_t', 'id', regex=True)
df_intrinsic['id'] = df_intrinsic['id'].str.replace('id_i', 'id', regex=True)
print(df_intrinsic)

                       id      time          kind  value
0          id030520235006     0.000  Nset (1/min)  0.000
1          id030520235068     0.000  Nset (1/min)  0.000
2          id050520238018     0.000   Angle (deg)  0.000
3          id030520237070     0.000   Angle (deg)  0.000
4          id280420232085     0.000   Angle (deg)  0.000
...                   ...       ...           ...    ...
33375222  id1005202314051  5001.519    TCP_y (mm)  2.600
33375223  id1005202314051  5001.519    TCP_x (mm)  0.000
33375224  id1005202314051  5001.519  TCP_rz (rad) -1.204
33375225  id1005202314051  5001.519    TCP_z (mm) -4.600
33375226  id1005202314051  5001.519  TCP_rx (rad)  1.234

[33375227 rows x 4 columns]


In [None]:
# Define feature extraction settings
#settings = MinimalFCParameters()
settings = EfficientFCParameters()

# Extract features
extracted_features = extract_features(df_intrinsic, column_id="id", column_kind="kind", column_sort="time", column_value="value", default_fc_parameters=EfficientFCParameters())


Feature Extraction: 100%|██████████| 16092/16092 [1:47:27<00:00,  2.50it/s]


In [None]:
# Step 2: Impute missing (NaN) values
imputed_features = impute(extracted_features)

 'Current (V)__query_similarity_count__query_None__threshold_0.0'
 'Depth (mm)__friedrich_coefficients__coeff_0__m_3__r_30'
 'Depth (mm)__friedrich_coefficients__coeff_1__m_3__r_30'
 'Depth (mm)__friedrich_coefficients__coeff_2__m_3__r_30'
 'Depth (mm)__friedrich_coefficients__coeff_3__m_3__r_30'
 'Depth (mm)__max_langevin_fixed_point__m_3__r_30'
 'Depth (mm)__query_similarity_count__query_None__threshold_0.0'
 'Nset (1/min)__friedrich_coefficients__coeff_0__m_3__r_30'
 'Nset (1/min)__friedrich_coefficients__coeff_1__m_3__r_30'
 'Nset (1/min)__friedrich_coefficients__coeff_2__m_3__r_30'
 'Nset (1/min)__friedrich_coefficients__coeff_3__m_3__r_30'
 'Nset (1/min)__max_langevin_fixed_point__m_3__r_30'
 'Nset (1/min)__query_similarity_count__query_None__threshold_0.0'
 'Robot_I (A)__friedrich_coefficients__coeff_0__m_3__r_30'
 'Robot_I (A)__friedrich_coefficients__coeff_1__m_3__r_30'
 'Robot_I (A)__friedrich_coefficients__coeff_2__m_3__r_30'
 'Robot_I (A)__friedrich_coefficients__coeff_3__m

In [None]:
print(imputed_features)

                Angle (deg)__variance_larger_than_standard_deviation  \
id030520234000                                                1.0      
id030520234001                                                1.0      
id030520234002                                                1.0      
id030520234003                                                1.0      
id030520234004                                                1.0      
...                                                           ...      
id280420233082                                                1.0      
id280420233083                                                1.0      
id280420233084                                                1.0      
id280420233085                                                1.0      
id280420233086                                                1.0      

                Angle (deg)__has_duplicate_max  \
id030520234000                             0.0   
id030520234001                     

In [None]:
# Drop columns which only contain NaN or infinity after imputation
imputed_features = imputed_features.replace([np.inf, -np.inf], np.nan)
imputed_features = imputed_features.dropna(axis=1, how="all")

In [None]:
imputed_features.to_csv("tsfresh_efficient_features.csv")

In [None]:
print(extracted_features.columns)

Index(['Angle (deg)__sum_values', 'Angle (deg)__median', 'Angle (deg)__mean',
       'Angle (deg)__length', 'Angle (deg)__standard_deviation',
       'Angle (deg)__variance', 'Angle (deg)__root_mean_square',
       'Angle (deg)__maximum', 'Angle (deg)__absolute_maximum',
       'Angle (deg)__minimum',
       ...
       'Torque (Nm)__sum_values', 'Torque (Nm)__median', 'Torque (Nm)__mean',
       'Torque (Nm)__length', 'Torque (Nm)__standard_deviation',
       'Torque (Nm)__variance', 'Torque (Nm)__root_mean_square',
       'Torque (Nm)__maximum', 'Torque (Nm)__absolute_maximum',
       'Torque (Nm)__minimum'],
      dtype='object', length=120)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Assume that labels are in a separate dataframe
labels = pd.read_csv(r"/content/Labels_names.csv")
labels = labels.rename(columns={'File Name': 'id'})

# Create a LabelEncoder instance
encoder = LabelEncoder()

# Fit the encoder and transform the labels
labels['Label'] = encoder.fit_transform(labels['Label'])

# Print original labels and their encoded values
for original, encoded in zip(encoder.classes_, range(len(encoder.classes_))):
    print(f'Original: {original}, Encoded: {encoded}')

# Merge features and labels
data = extracted_features.merge(labels, left_index=True, right_on='id')

print(data)

Original: N, Encoded: 0
Original: NS, Encoded: 1
Original: OT, Encoded: 2
Original: P, Encoded: 3
Original: UT, Encoded: 4
      Angle (deg)__sum_values  Angle (deg)__median  Angle (deg)__mean  \
1033             2.495588e+06          1327.200000        1328.147099   
1034             2.158879e+06          1231.866700        1232.940567   
1035             2.345152e+06          1285.466670        1286.424502   
1036             1.558602e+06          1041.333370        1042.543322   
1037             1.867665e+06          1143.266665        1144.402777   
...                       ...                  ...                ...   
407              3.037567e+06          1465.866700        1466.715145   
408              3.314281e+06          1532.800000        1533.679346   
409              3.263832e+06          1520.666630        1521.600124   
410              3.064482e+06          1472.400000        1473.308782   
411              3.140530e+06          1490.933350        1491.938242   



In [None]:
# Split data into features and target variable
X = data.drop(columns=['Label'])
X = data.drop(columns=['id'])
y = data['Label']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print('Accuracy:', accuracy_score(y_test, y_pred))

Accuracy: 0.9776951672862454


In [None]:
from sklearn.metrics import classification_report

# Evaluate the model's performance
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99        84
           1       0.97      0.97      0.97        30
           2       1.00      0.94      0.97        67
           3       0.95      1.00      0.97        35
           4       0.96      0.98      0.97        53

    accuracy                           0.98       269
   macro avg       0.97      0.98      0.97       269
weighted avg       0.98      0.98      0.98       269

