In [1]:
#get all excel files in the data directory
import os
excel_files = [f for f in os.listdir('cleaned-data') if f.endswith('.xlsx')]
print(excel_files)


['cleaned-lp1.xls( failure1).xlsx', 'cleaned-lp2.xls(failures in transfer of a part).xlsx', 'cleaned-lp3.xls(position of part after a transfer failure).xlsx', 'cleaned-lp4.txt(failures in approach to ungrasp position).xlsx', 'cleaned-lp5.txt( failures in motion with part).xlsx']


In [2]:
import pandas as pd
import numpy as np
#LP1: Failures in Approach to Grasp Position
# Method 1: Using a raw string
file_path = f'cleaned-data/cleaned-lp4.txt(failures in approach to ungrasp position).xlsx'

# Load the Excel file
df = pd.read_excel(file_path)
print(df.head())



  class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal
0    ok  -2  -1  81   0  -5   0              2
1    ok  -2  -1  79   0  -4   0              2
2    ok  -2  -1  79   0  -4   0              2
3    ok  -2  -1  80   0  -4   0              2
4    ok  -3  -1  79   1  -5   1              2


In [3]:
#add time column where it restarts each 15 samples
time = []
for i in range(len(df)):
    time.append(i%15)
df['time'] = time
print(df.head())



  class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time
0    ok  -2  -1  81   0  -5   0              2     0
1    ok  -2  -1  79   0  -4   0              2     1
2    ok  -2  -1  79   0  -4   0              2     2
3    ok  -2  -1  80   0  -4   0              2     3
4    ok  -3  -1  79   1  -5   1              2     4


In [4]:
#add id column where it identifies each 15 samples
id = []
for i in range(len(df)):
    id.append(i//15)
df['id'] = id
print(df.head())


  class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time  id
0    ok  -2  -1  81   0  -5   0              2     0   0
1    ok  -2  -1  79   0  -4   0              2     1   0
2    ok  -2  -1  79   0  -4   0              2     2   0
3    ok  -2  -1  80   0  -4   0              2     3   0
4    ok  -3  -1  79   1  -5   1              2     4   0


In [5]:
# Drop rows with NaN values in the relevant columns
cleaned_df = df.dropna(subset=['Fx', 'Fy', 'Tx', 'Ty'])

In [6]:
# Convert columns to float
cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']] = cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']].astype(float)

In [7]:
#normalizing the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cleaned_df[['Fx', 'Fy','Fz', 'Tx', 'Ty','Tz']] = scaler.fit_transform(cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']])
print(cleaned_df.head())

  class        Fx        Fy        Fz        Tx        Ty        Tz  \
0    ok  0.407643  0.444079  0.202279  0.501185  0.269674  0.575472   
1    ok  0.407643  0.444079  0.196581  0.501185  0.270633  0.575472   
2    ok  0.407643  0.444079  0.196581  0.501185  0.270633  0.575472   
3    ok  0.407643  0.444079  0.199430  0.501185  0.270633  0.575472   
4    ok  0.404459  0.444079  0.196581  0.502370  0.269674  0.580189   

   class-oridnal  time  id  
0              2     0   0  
1              2     1   0  
2              2     2   0  
3              2     3   0  
4              2     4   0  


In [8]:
# I want to group the data by the 'id','class' column and calculate the mean, median, std, etc of each group without including the 'time' ans 'class columns in the calculation 
grouped_df = cleaned_df.groupby(['id','class']).agg({'Fx': ['mean', 'median', 'std'],
                                                        'Fy': ['mean', 'median', 'std'],
                                                        'Fz': ['mean', 'median', 'std'],
                                                        'Tx': ['mean', 'median', 'std'],
                                                        'Ty': ['mean', 'median', 'std'],
                                                        'Tz': ['mean', 'median', 'std'],
                                                        'class-oridnal':['mean']}).reset_index()
print(grouped_df.head())

  id class        Fx                            Fy                      \
                mean    median       std      mean    median       std   
0  0    ok  0.407431  0.407643  0.001458  0.444079  0.444079  0.000000   
1  1    ok  0.417410  0.404459  0.037479  0.440351  0.444079  0.019256   
2  2    ok  0.404459  0.404459  0.008075  0.442544  0.444079  0.018973   
3  3    ok  0.406582  0.407643  0.009605  0.436184  0.427632  0.021275   
4  4    ok  0.407856  0.407643  0.009665  0.437939  0.437500  0.020387   

         Fz            ...        Tx                            Ty            \
       mean    median  ...      mean    median       std      mean    median   
0  0.198101  0.196581  ...  0.501422  0.501185  0.000491  0.270377  0.270633   
1  0.197531  0.196581  ...  0.501817  0.498815  0.013381  0.272297  0.268714   
2  0.195252  0.193732  ...  0.499842  0.500000  0.014696  0.264811  0.263916   
3  0.202089  0.202279  ...  0.499605  0.504739  0.014102  0.261996  0.262956   
4

In [9]:
#flat the columns names
grouped_df.columns = ['_'.join(col) for col in grouped_df.columns]
print(grouped_df.head())


   id_ class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median    Fy_std  \
0    0     ok  0.407431   0.407643  0.001458  0.444079   0.444079  0.000000   
1    1     ok  0.417410   0.404459  0.037479  0.440351   0.444079  0.019256   
2    2     ok  0.404459   0.404459  0.008075  0.442544   0.444079  0.018973   
3    3     ok  0.406582   0.407643  0.009605  0.436184   0.427632  0.021275   
4    4     ok  0.407856   0.407643  0.009665  0.437939   0.437500  0.020387   

    Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std   Ty_mean  \
0  0.198101   0.196581  ...  0.501422   0.501185  0.000491  0.270377   
1  0.197531   0.196581  ...  0.501817   0.498815  0.013381  0.272297   
2  0.195252   0.193732  ...  0.499842   0.500000  0.014696  0.264811   
3  0.202089   0.202279  ...  0.499605   0.504739  0.014102  0.261996   
4  0.195442   0.190883  ...  0.495656   0.495261  0.013822  0.260269   

   Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  class-oridnal_mean  
0   0.270633  0.

In [10]:
# view a raw from each class_
print(grouped_df[grouped_df['class-oridnal_mean']==0].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==1].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==2].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==3].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==4].head(3))

    id_ class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median    Fy_std  \
35   35   lost  0.373036   0.391720  0.084439  0.428070   0.430921  0.044025   
41   41   lost  0.434607   0.414013  0.120502  0.470175   0.450658  0.160658   
43   43   lost  0.406582   0.433121  0.114198  0.480263   0.457237  0.140086   

     Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std   Ty_mean  \
35  0.170940   0.165242  ...  0.503555   0.496445  0.027873  0.237876   
41  0.169041   0.168091  ...  0.471564   0.485782  0.094382  0.252591   
43  0.187274   0.170940  ...  0.484913   0.501185  0.083057  0.279207   

    Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  class-oridnal_mean  
35   0.254319  0.050791  0.551258   0.542453  0.087321                 0.0  
41   0.255278  0.047916  0.496855   0.523585  0.106191                 0.0  
43   0.287908  0.042947  0.551258   0.584906  0.152315                 0.0  

[3 rows x 21 columns]
    id_ class_   Fx_mean  Fx_median    Fx_std   Fy_mean

In [11]:
#build logistic regression model to predict the class
from mlfromscratch import train_test_split_2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = grouped_df.drop(columns=['class-oridnal_mean','id_','class_'])
y = grouped_df['class-oridnal_mean']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test=train_test_split_2(X,y,random_state=50)


In [12]:
from mlfromscratch import LogisticRegressionMultiClass

model = LogisticRegressionMultiClass()

model.fit(X_train, y_train.to_numpy().astype(int))
ypred=model.predict(X_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,ypred)
acc=accuracy_score(y_test,ypred)
print(cm)
print(acc)

[[2 0 0]
 [0 5 0]
 [0 2 0]]
0.7777777777777778


In [13]:
#build logistic regression model to predict the class
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)


In [14]:
# Evaluate the model

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.7777777777777778


In [15]:
#build decision tree model to predict the class
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [16]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.8888888888888888


In [17]:
# Convert to numpy array and ensure float type

from mlfromscratch import DecisionTree
X_train = X_train.values.astype(np.float64)
X_test = X_test.values.astype(np.float64)
DT_cl = DecisionTree()
DT_cl.fit(X_train, y_train)
y_pred_2 = DT_cl.predict(X_test)
acc = accuracy_score(y_test, y_pred_2)
acc

0.7777777777777778

In [18]:
#build random forest model to predict the class
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [19]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.7777777777777778


In [20]:
#build KNN model to predict the class
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)


In [21]:
# Ensure numpy and pandas are up to date
# %pip install --upgrade numpy pandas
import numpy as np
import pandas as pd

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.5555555555555556


In [22]:
from mlfromscratch import KNNClassifier
knn_cl=KNNClassifier()
knn_cl.fit(X_train,y_train)
ypred=knn_cl.predict(X_test)
acc=accuracy_score(y_test,ypred)
acc

0.6666666666666666

In [23]:
#build Bayesian model to predict the class
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)


In [24]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.7777777777777778


In [25]:
from sklearn.metrics import accuracy_score
# Convert to numpy array and ensure float type
#its required to convert the data to numpy array before using the model
# X_train = X_train.values.astype(np.float64)
# X_test = X_test.values.astype(np.float64)
from mlfromscratch import GaussianNaiveBayes
NB = GaussianNaiveBayes()
NB.fit(X_train, y_train)
y_pred = NB.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.7777777777777778


  likelihood = np.sum(np.log(self._pdf(idx, x)))


In [26]:
import numpy as np

class SVMClassifier:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, n_iters=4000):
        """
        Initialize the SVM Classifier.

        Parameters:
        - learning_rate: Step size for gradient descent.
        - lambda_param: Regularization parameter.
        - n_iters: Number of iterations for training.
        """
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        """
        Train the SVM Classifier using Stochastic Gradient Descent (SGD).

        Parameters:
        - X: Training features (n_samples, n_features). Can be a NumPy array or pandas DataFrame.
        - y: Target labels (n_samples,). Can be a NumPy array or pandas Series.
        """
        # Convert inputs to NumPy arrays if they are pandas DataFrames/Series
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        n_samples, n_features = X.shape

        # Initialize weights and bias
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Convert labels to -1 and 1 for SVM
        y_ = np.where(y <= 0, -1, 1)

        # Stochastic Gradient Descent
        for _ in range(self.n_iters):
            for i in range(n_samples):
                # Check if the sample is correctly classified
                condition = y_[i] * (np.dot(X[i], self.weights) - self.bias) >= 1

                # Update weights and bias based on the hinge loss
                if condition:
                    self.weights -= self.learning_rate * (2 * self.lambda_param * self.weights)
                else:
                    self.weights -= self.learning_rate * (
                        2 * self.lambda_param * self.weights - np.dot(X[i], y_[i])
                    )
                    self.bias -= self.learning_rate * y_[i]

    def predict(self, X):
        """
        Predict the class labels for the test data.

        Parameters:
        - X: Test features (n_samples, n_features). Can be a NumPy array or pandas DataFrame.

        Returns:
        - Predicted class labels (n_samples,) as 0 or 1.
        """
        # Convert input to NumPy array if it is a pandas DataFrame
        if isinstance(X, pd.DataFrame):
            X = X.values

        # Compute the decision function
        decision_function = np.dot(X, self.weights) - self.bias

        # Convert predictions to 0 or 1
        return np.where(decision_function <= 0, 0, 1)

In [27]:
svm=SVMClassifier()
svm.fit(X_train,y_train)
ypred=svm.predict(X_test)
acc3=accuracy_score(y_test,ypred)
acc3

0.2222222222222222

In [28]:
from mlfromscratch import StandardScaler2
scaler = StandardScaler2()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
