In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
#defining features

def extract_msm(df, column_name):
    return {
        'mean': np.mean(df[column_name]),
        'std': np.std(df[column_name]),
        'median': np.median(df[column_name]),
    }

def extract_all(df, column_name):
    return {
        f'{column_name}_mean': np.mean(df[column_name]),
        f'{column_name}_std': np.std(df[column_name]),
        f'{column_name}_min': np.min(df[column_name]),
        f'{column_name}_max': np.max(df[column_name]),
        f'{column_name}_median': np.median(df[column_name]),
        f'{column_name}_iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        f'{column_name}_skew': df[column_name].skew(),
        f'{column_name}_kurtosis': df[column_name].kurtosis()
    }

def extract_w(df, column_name):
    return {
        'iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        'skew': df[column_name].skew(),
        'kurtosis': df[column_name].kurtosis()
    }

def extract_max(df, column_name):
    return {
        'Screwdriving time (ms)': np.max(df[column_name]),
    }

In [3]:
def feature_extraction(file_path, folder_path):

    df = pd.read_csv(file_path, index_col=None, header=0)
    
    # Extract the filename and folder name
    filename_with_ext = os.path.basename(file_path)
    filename, _ = os.path.splitext(filename_with_ext)
    
    label = os.path.basename(folder_name)
    
    features = []
    # Add the filename and folder name as new columns
    features.append(('name', filename))
    features.append(('label', label))

    for column_name in ['Time (ms)']:
        features.extend(extract_max(df, column_name).items())

    for column_name in ['TCP_x (mm)', 'TCP_y (mm)', 'TCP_z (mm)', 'TCP_rx (mm)', 'TCP_ry (mm)', 'TCP_rz (mm)', 'Robot_I (A)']:
        features.extend(extract_all(df, column_name).items())

    return dict(features)

In [15]:
dataset_task = r"C:\Users\GHB\Desktop\Task data(test)"

features_dataset_task = []

for folder_name, _, file_names in os.walk(dataset_task):
    print(f"Processing directory: {folder_name}")
    for file_name in file_names:
        if file_name.endswith('.csv') and file_name.startswith("t"):
            print(file_name)
            file_path = os.path.join(folder_name, file_name)
            file_features = feature_extraction(file_path, folder_name)
            features_dataset_task.append(file_features)
            


Processing directory: C:\Users\GHB\Desktop\Task data(test)
Processing directory: C:\Users\GHB\Desktop\Task data(test)\B
t030520234020.csv
t030520235027.csv
t030520235062.csv
t030520235066.csv
t030520235079.csv
t030520235082.csv
t030520235086.csv
t030520236004.csv
t030520236012.csv
t030520236019.csv
t030520236034.csv
t030520236044.csv
t030520236080.csv
t030520237003.csv
t030520237014.csv
t030520237047.csv
t030520237048.csv
t030520237056.csv
t030520237073.csv
t0505202310053.csv
t050520238043.csv
t050520238045.csv
t050520238052.csv
t050520238080.csv
t050520239008.csv
t050520239017.csv
t050520239045.csv
t050520239053.csv
t050520239066.csv
t050520239077.csv
t270420231012.csv
t270420231014.csv
t270420231035.csv
t280420232014.csv
t280420232024.csv
t280420232028.csv
t280420232031.csv
t280420232050.csv
t280420232063.csv
t280420232069.csv
t280420232080.csv
t280420233013.csv
t280420233045.csv
t280420233069.csv
Processing directory: C:\Users\GHB\Desktop\Task data(test)\M
t030520234012.csv
t0305202

t280420233065.csv
t280420233067.csv
t280420233068.csv
t280420233070.csv
t280420233071.csv
t280420233072.csv
t280420233073.csv
t280420233075.csv
t280420233076.csv
t280420233077.csv
t280420233078.csv
t280420233079.csv
t280420233080.csv
t280420233081.csv
t280420233082.csv
t280420233083.csv
t280420233084.csv
t280420233085.csv
t280420233086.csv
Processing directory: C:\Users\GHB\Desktop\Task data(test)\NS
t030520235038.csv
t030520235039.csv
t030520235070.csv
t0505202310038.csv
t050520239073.csv
t050520239074.csv
t050520239075.csv
t270420231016.csv
t270420231017.csv
t270420231018.csv
t270420231019.csv
t270420231020.csv
t270420231021.csv
t280420232020.csv
t280420232021.csv
Processing directory: C:\Users\GHB\Desktop\Task data(test)\OT
t0505202310000.csv
t0505202310002.csv
t0505202310003.csv
t0505202310004.csv
t0505202310005.csv
t0505202310006.csv
t0505202310007.csv
t0505202310008.csv
t0505202310009.csv
t0505202310011.csv
t0505202310012.csv
t0505202310013.csv
t0505202310014.csv
t0505202310015.c

In [16]:
print(len(features_dataset_task))

781


In [17]:
feature_task = pd.DataFrame(features_dataset_task)

In [18]:
feature_task.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM(task).csv")

In [19]:
# Making classification binary

feature_task['label'] = feature_task['label'].apply(lambda x: 'F' if x != 'N' else x)
print(feature_task)

              name label  Screwdriving time (ms)  TCP_x (mm)_mean  \
0    t030520234020     F                1698.230        -0.413268   
1    t030520235027     F                1854.826        -0.509463   
2    t030520235062     F                1839.971        -0.363449   
3    t030520235066     F                4264.803         0.271444   
4    t030520235079     F                1693.609         0.221680   
..             ...   ...                     ...              ...   
776  t030520235080     F                1855.377         0.241602   
777  t030520235081     F                1839.732         0.295855   
778  t030520235083     F                1846.298         0.500881   
779  t030520235084     F                1621.677         0.534599   
780  t030520235085     F                2031.907         0.168182   

     TCP_x (mm)_std  TCP_x (mm)_min  TCP_x (mm)_max  TCP_x (mm)_median  \
0          0.468630            -1.2             0.5               -0.3   
1          0.464590    

In [20]:
feature_task.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM(task)(binary).csv")