In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
#defining features

def extract_msm(df, column_name):
    return {
        'mean': np.mean(df[column_name]),
        'std': np.std(df[column_name]),
        'median': np.median(df[column_name]),
    }

def extract_all(df, column_name):
    return {
        f'{column_name}_mean': np.mean(df[column_name]),
        f'{column_name}_std': np.std(df[column_name]),
        f'{column_name}_min': np.min(df[column_name]),
        f'{column_name}_max': np.max(df[column_name]),
        f'{column_name}_median': np.median(df[column_name]),
        f'{column_name}_iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        f'{column_name}_skew': df[column_name].skew(),
        f'{column_name}_kurtosis': df[column_name].kurtosis()
    }

def extract_w(df, column_name):
    return {
        'iqr': np.percentile(df[column_name], 75) - np.percentile(df[column_name], 25),
        'skew': df[column_name].skew(),
        'kurtosis': df[column_name].kurtosis()
    }

def extract_max(df, column_name):
    return {
        'Screwdriving time (ms)': np.max(df[column_name]),
    }

In [3]:
def feature_extraction(file_path, folder_path):

    df = pd.read_csv(file_path, index_col=None, header=0)
    
    # Extract the filename and folder name
    filename_with_ext = os.path.basename(file_path)
    filename, _ = os.path.splitext(filename_with_ext)
    
    label = os.path.basename(folder_name)
    
    features = []
    # Add the filename and folder name as new columns
    features.append(('name', filename))
    features.append(('label', label))

    for column_name in ['Time (ms)']:
        features.extend(extract_max(df, column_name).items())

    for column_name in ['Nset (1/min)', 'Torque (Nm)', 'Current (V)', 'Angle (°)', 'Depth (mm)']:
        features.extend(extract_all(df, column_name).items())

    return dict(features)

In [4]:
dataset = r"C:\Users\GHB\Desktop\Screwcell dataset"

features_dataset = []

for folder_name, _, file_names in os.walk(dataset):
    print(f"Processing directory: {folder_name}")
    for file_name in file_names:
        if file_name.endswith('.csv') and file_name.startswith("i"):
            print(file_name)
            file_path = os.path.join(folder_name, file_name)
            file_features = feature_extraction(file_path, folder_name)
            features_dataset.append(file_features)

Processing directory: C:\Users\GHB\Desktop\Screwcell dataset
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data\B
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data\M
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data\N
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data\NS
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data\OT
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Extrinsic data\UT
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Intrinsic data
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Intrinsic data\B
i030520234020.csv
i030520235027.csv
i030520235062.csv
i030520235066.csv
i030520235079.csv
i030520235082.csv
i030520235086.csv
i030520236004.csv
i030520236012.csv
i030520236019.csv
i030520236034.csv
i030520236044.csv
i030520236080.csv
i030

i280420233081.csv
i280420233082.csv
i280420233083.csv
i280420233084.csv
i280420233085.csv
i280420233086.csv
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Intrinsic data\NS
i030520235038.csv
i030520235039.csv
i030520235070.csv
i270420231016.csv
i270420231017.csv
i270420231018.csv
i270420231019.csv
i270420231020.csv
i270420231021.csv
i280420232020.csv
i280420232021.csv
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Intrinsic data\OT
i280420233050.csv
i280420233053.csv
i280420233059.csv
i280420233064.csv
Processing directory: C:\Users\GHB\Desktop\Screwcell dataset\Intrinsic data\UT
i030520234000.csv
i030520234001.csv
i030520234002.csv
i030520234003.csv
i030520234004.csv
i030520234005.csv
i030520234006.csv
i030520234007.csv
i030520234008.csv
i030520234009.csv
i030520234010.csv
i030520234011.csv
i030520234013.csv
i030520234014.csv
i030520234015.csv
i030520234017.csv
i030520234018.csv
i030520234019.csv
i030520234021.csv
i030520234023.csv
i030520234028.csv
i030520

In [5]:
print(len(features_dataset))

540


In [6]:
feature_df = pd.DataFrame(features_dataset)

In [7]:
feature_df.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM1.csv")

In [8]:
# Making classification binary

feature_df['label'] = feature_df['label'].apply(lambda x: 'F' if x != 'N' else x)
print(feature_df)

              name label  Screwdriving time (ms)  Nset (1/min)_mean  \
0    i030520234020     F                  1523.0         241.838583   
1    i030520235027     F                  1674.0         242.574328   
2    i030520235062     F                  1658.0         242.502712   
3    i030520235066     F                  4017.0         247.028870   
4    i030520235079     F                  1539.0         242.248052   
..             ...   ...                     ...                ...   
535  i030520235080     F                  1664.0         242.830030   
536  i030520235081     F                  1649.0         242.764848   
537  i030520235083     F                  1663.0         242.825721   
538  i030520235084     F                  1439.0         241.883333   
539  i030520235085     F                  1842.0         243.658166   

     Nset (1/min)_std  Nset (1/min)_min  Nset (1/min)_max  \
0           39.326584               0.0             250.0   
1           37.584845    

In [9]:
feature_df.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Data\testforSVM1(binary).csv")