In [71]:
import pandas as pd
import numpy as np
import os

In [72]:
#defining features

def extract_msm(df, column_name):
    return {
        'mean': np.mean(df[column_name]),
        'std': np.std(df[column_name]),
        'median': np.median(df[column_name]),
    }

def extract_all(df, column_name):
    return {
        f'{column_name}_mean': np.mean(df[column_name]),
        f'{column_name}_std': np.std(df[column_name]),
        f'{column_name}_min': np.min(df[column_name]),
        f'{column_name}_max': np.max(df[column_name]),
        f'{column_name}_median': np.median(df[column_name]),
        f'{column_name}_iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        f'{column_name}_skew': df[column_name].skew(),
        f'{column_name}_kurtosis': df[column_name].kurtosis()
    }

def extract_w(df, column_name):
    return {
        'iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        'skew': df[column_name].skew(),
        'kurtosis': df[column_name].kurtosis()
    }

def extract_max(df, column_name):
    return {
        'Screwdriving time (ms)': np.max(df[column_name]),
    }

In [73]:
def feature_extraction(file_path, folder_path):

    df = pd.read_csv(file_path, index_col=None, header=0)
    
    # Extract the filename and folder name
    filename_with_ext = os.path.basename(file_path)
    filename, _ = os.path.splitext(filename_with_ext)
    
    label = os.path.basename(folder_name)
    
    features = []
    # Add the filename and folder name as new columns
    features.append(('name', filename))
    features.append(('label', label))

    for column_name in ['Time (ms)']:
        features.extend(extract_max(df, column_name).items())

    for column_name in ['Nset (1/min)', 'Torque (Nm)', 'Current (V)', 'Angle (°)', 'Depth (mm)']:
        features.extend(extract_all(df, column_name).items())

    return dict(features)

In [79]:
dataset = r"C:\Users\GHB\Desktop\Test dataset"

features_dataset = []

for folder_name, _, file_names in os.walk(dataset):
    print(f"Processing directory: {folder_name}")
    for file_name in file_names:
        if file_name.endswith('.csv') and file_name.startswith("i"):
            print(file_name)
            file_path = os.path.join(folder_name, file_name)
            file_features = feature_extraction(file_path, folder_name)
            features_dataset.append(file_features)

Processing directory: C:\Users\GHB\Desktop\Test dataset
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\B
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\M
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\N
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\NS
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\OT
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\Task data
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\Task data\B
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\Task data\M
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\Task data\N
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\Task data\NS
Processing directory: C:\Users\GHB\Desktop\Test dataset\Extrinsic data\Task data\OT
Processi

In [84]:
print(len(features_dataset))

222


In [85]:
feature_df = pd.DataFrame(features_dataset)

In [91]:
feature_df.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM.csv")

In [92]:
# Making classification binary

feature_df['label'] = feature_df['label'].apply(lambda x: 'F' if x != 'N' else x)
print(feature_df)

              name label  Screwdriving time (ms)  Nset (1/min)_mean  \
0    i270420231012     F                  4018.0         246.967405   
1    i270420231014     F                  4019.0         246.905970   
2    i270420231035     F                  4019.0         246.905970   
3    i280420232014     F                  4017.0         247.028870   
4    i280420232024     F                  4019.0         246.905970   
..             ...   ...                     ...                ...   
217  i280420233080     F                  2074.0         243.885301   
218  i280420233082     F                  2070.0         243.873491   
219  i280420233083     F                  2160.0         244.128644   
220  i280420233085     F                  2079.0         243.900000   
221  i280420233086     F                  2104.0         243.972447   

     Nset (1/min)_std  Nset (1/min)_min  Nset (1/min)_max  \
0           24.227496               0.0             250.0   
1           24.535569    

In [93]:
feature_df.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM(binary).csv")