In [23]:
import pandas as pd
import numpy as np
import os

In [24]:
#defining features

def extract_msm(df, column_name):
    return {
        'mean': np.mean(df[column_name]),
        'std': np.std(df[column_name]),
        'median': np.median(df[column_name]),
    }

def extract_all(df, column_name):
    return {
        f'{column_name}_mean': np.mean(df[column_name]),
        f'{column_name}_std': np.std(df[column_name]),
        f'{column_name}_min': np.min(df[column_name]),
        f'{column_name}_max': np.max(df[column_name]),
        f'{column_name}_median': np.median(df[column_name]),
        f'{column_name}_iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        f'{column_name}_skew': df[column_name].skew(),
        f'{column_name}_kurtosis': df[column_name].kurtosis()
    }

def extract_w(df, column_name):
    return {
        'iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        'skew': df[column_name].skew(),
        'kurtosis': df[column_name].kurtosis()
    }

def extract_max(df, column_name):
    return {
        'Screwdriving time (ms)': np.max(df[column_name]),
    }

In [25]:
def feature_extraction(file_path, folder_path):

    df = pd.read_csv(file_path, index_col=None, header=0)
    
    # Extract the filename and folder name
    filename_with_ext = os.path.basename(file_path)
    filename, _ = os.path.splitext(filename_with_ext)
    
    label = os.path.basename(folder_name)
    
    features = []
    # Add the filename and folder name as new columns
    features.append(('name', filename))
    features.append(('label', label))

    for column_name in ['Time (ms)']:
        features.extend(extract_max(df, column_name).items())

    for column_name in ['TCP_x (mm)', 'TCP_y (mm)', 'TCP_z (mm)', 'TCP_rx (mm)', 'TCP_ry (mm)', 'TCP_rz (mm)', 'Robot_I (A)']:
        features.extend(extract_all(df, column_name).items())

    return dict(features)

In [35]:
dataset_task = r"C:\Users\GHB\Desktop\Test dataset"

features_dataset_task = []

for folder_name, _, file_names in os.walk(dataset_task):
    print(f"Processing directory: {folder_name}")
    for file_name in file_names:
        if file_name.endswith('.csv') and file_name.startswith("t"):
            print(file_name)
            file_path = os.path.join(folder_name, file_name)
            file_features = feature_extraction(file_path, folder_name)
            features_dataset_task.append(file_features)
            


Processing directory: C:\Users\GHB\Desktop\Test dataset
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\B
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\M
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\N
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\NS
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\OT
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\UT
Processing directory: C:\Users\GHB\Desktop\Test dataset\Intrinsic data
Processing directory: C:\Users\GHB\Desktop\Test dataset\Intrinsic data\B
Processing directory: C:\Users\GHB\Desktop\Test dataset\Intrinsic data\KXML
Processing directory: C:\Users\GHB\Desktop\Test dataset\Intrinsic data\M
Processing directory: C:\Users\GHB\Desktop\Test dataset\Intrinsic data\N
Processing directory: C:\Users\GHB\Desktop\Test dataset\Intrinsic 

In [36]:
print(len(features_dataset_task))

222


In [37]:
feature_task = pd.DataFrame(features_dataset_task)

In [38]:
feature_task.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM(task).csv")

In [41]:
# Making classification binary

feature_task['label'] = feature_task['label'].apply(lambda x: 'F' if x != 'N' else x)
print(feature_task)

              name label  Screwdriving time (ms)  TCP_x (mm)_mean  \
0    t270420231012     F                4255.624       115.012991   
1    t270420231014     F                4264.931       115.258553   
2    t270420231035     F                4267.581       -24.634574   
3    t280420232014     F                4256.104       115.136221   
4    t280420232024     F                4255.915        35.101700   
..             ...   ...                     ...              ...   
217  t280420233080     F                2275.398      -323.784193   
218  t280420233082     F                2258.525      -343.554079   
219  t280420233083     F                2357.379      -343.698483   
220  t280420233085     F                2272.819      -363.588496   
221  t280420233086     F                2302.808      -363.807623   

     TCP_x (mm)_std  TCP_x (mm)_min  TCP_x (mm)_max  TCP_x (mm)_median  \
0          0.496715           114.6           116.2              114.7   
1          0.370162    

In [43]:
feature_task.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM(task)(binary).csv")