In [1]:
#get all excel files in the data directory
import os
excel_files = [f for f in os.listdir('cleaned-data') if f.endswith('.xlsx')]
print(excel_files)


['cleaned-lp1.xls( failure1).xlsx', 'cleaned-lp3.xls(position of part after a transfer failure).xlsx', 'cleaned-lp4.txt(failures in approach to ungrasp position).xlsx', 'cleaned-lp5.txt( failures in motion with part).xlsx', 'lp2_cleaned.xlsx']


In [2]:
import pandas as pd
import numpy as np
#LP1: Failures in Approach to Grasp Position
# Method 1: Using a raw string
file_path = f'cleaned-data/{excel_files[0]}'

# Load the Excel file
df = pd.read_excel(file_path)
print(df.head())



    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal
0  normal  -1  -1  63  -3  -1   0              2
1  normal   0   0  62  -3  -1   0              2
2  normal  -1  -1  61  -3   0   0              2
3  normal  -1  -1  63  -2  -1   0              2
4  normal  -1  -1  63  -3  -1   0              2


In [3]:
#add time column where it restarts each 15 samples
time = []
for i in range(len(df)):
    time.append(i%15)
df['time'] = time
print(df.head())



    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time
0  normal  -1  -1  63  -3  -1   0              2     0
1  normal   0   0  62  -3  -1   0              2     1
2  normal  -1  -1  61  -3   0   0              2     2
3  normal  -1  -1  63  -2  -1   0              2     3
4  normal  -1  -1  63  -3  -1   0              2     4


In [4]:
#add id column where it identifies each 15 samples
id = []
for i in range(len(df)):
    id.append(i//15)
df['id'] = id
print(df.head())


    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time  id
0  normal  -1  -1  63  -3  -1   0              2     0   0
1  normal   0   0  62  -3  -1   0              2     1   0
2  normal  -1  -1  61  -3   0   0              2     2   0
3  normal  -1  -1  63  -2  -1   0              2     3   0
4  normal  -1  -1  63  -3  -1   0              2     4   0


In [5]:
# Drop rows with NaN values in the relevant columns
cleaned_df = df.dropna(subset=['Fx', 'Fy', 'Tx', 'Ty'])

In [6]:
# Convert columns to float
cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']] = cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']].astype(float)

In [7]:
#normalizing the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cleaned_df[['Fx', 'Fy','Fz', 'Tx', 'Ty','Tz']] = scaler.fit_transform(cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']])
print(cleaned_df.head())

    class        Fx        Fy        Fz        Tx        Ty        Tz  \
0  normal  0.430233  0.597623  0.944836  0.492636  0.517241  0.526923   
1  normal  0.431894  0.599321  0.944249  0.492636  0.517241  0.526923   
2  normal  0.430233  0.597623  0.943662  0.492636  0.518043  0.526923   
3  normal  0.430233  0.597623  0.944836  0.493373  0.517241  0.526923   
4  normal  0.430233  0.597623  0.944836  0.492636  0.517241  0.526923   

   class-oridnal  time  id  
0              2     0   0  
1              2     1   0  
2              2     2   0  
3              2     3   0  
4              2     4   0  


In [8]:
# I want to group the data by the 'id','class' column and calculate the mean, median, std, etc of each group without including the 'time' ans 'class columns in the calculation 
grouped_df = cleaned_df.groupby(['id','class']).agg({'Fx': ['mean', 'median', 'std'],
                                                        'Fy': ['mean', 'median', 'std'],
                                                        'Fz': ['mean', 'median', 'std'],
                                                        'Tx': ['mean', 'median', 'std'],
                                                        'Ty': ['mean', 'median', 'std'],
                                                        'Tz': ['mean', 'median', 'std'],
                                                        'class-oridnal':['mean']}).reset_index()
print(grouped_df.head())

  id   class        Fx                            Fy                      \
                  mean    median       std      mean    median       std   
0  0  normal  0.430343  0.430233  0.000429  0.597849  0.597623  0.000597   
1  1  normal  0.430454  0.430233  0.001645  0.598189  0.597623  0.003778   
2  2  normal  0.430786  0.430233  0.001025  0.598415  0.599321  0.002712   
3  3  normal  0.431229  0.431894  0.001637  0.599547  0.601019  0.003507   
4  4  normal  0.430897  0.430233  0.001512  0.598868  0.599321  0.003042   

         Fz            ...        Tx                            Ty            \
       mean    median  ...      mean    median       std      mean    median   
0  0.944562  0.944836  ...  0.492734  0.492636  0.000259  0.517509  0.517241   
1  0.944327  0.944836  ...  0.492243  0.492636  0.002609  0.516974  0.517241   
2  0.943740  0.943662  ...  0.491900  0.491163  0.002007  0.516493  0.516439   
3  0.944366  0.944836  ...  0.490280  0.490427  0.002687  0.517188 

In [9]:
#flat the columns names
grouped_df.columns = ['_'.join(col) for col in grouped_df.columns]
print(grouped_df.head())


   id_  class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median    Fy_std  \
0    0  normal  0.430343   0.430233  0.000429  0.597849   0.597623  0.000597   
1    1  normal  0.430454   0.430233  0.001645  0.598189   0.597623  0.003778   
2    2  normal  0.430786   0.430233  0.001025  0.598415   0.599321  0.002712   
3    3  normal  0.431229   0.431894  0.001637  0.599547   0.601019  0.003507   
4    4  normal  0.430897   0.430233  0.001512  0.598868   0.599321  0.003042   

    Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std   Ty_mean  \
0  0.944562   0.944836  ...  0.492734   0.492636  0.000259  0.517509   
1  0.944327   0.944836  ...  0.492243   0.492636  0.002609  0.516974   
2  0.943740   0.943662  ...  0.491900   0.491163  0.002007  0.516493   
3  0.944366   0.944836  ...  0.490280   0.490427  0.002687  0.517188   
4  0.943427   0.942488  ...  0.489691   0.488954  0.002101  0.515798   

   Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  class-oridnal_mean  
0   0.5172

In [10]:
# view a raw from each class_
print(grouped_df[grouped_df['class-oridnal_mean']==0].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==1].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==2].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==3].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==4].head(3))

    id_     class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median  \
18   18  collision  0.442857   0.433555  0.032791  0.604527   0.602716   
19   19  collision  0.449059   0.430233  0.080469  0.610187   0.599321   
20   20  collision  0.412957   0.430233  0.053543  0.594228   0.597623   

      Fy_std   Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std  \
18  0.027319  0.944249   0.944249  ...  0.481934   0.490427  0.038639   
19  0.046508  0.946009   0.943662  ...  0.482769   0.491163  0.034018   
20  0.023717  0.946518   0.945423  ...  0.485223   0.494109  0.034241   

     Ty_mean  Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  \
18  0.528629   0.518845  0.030654  0.528462   0.526923  0.003791   
19  0.536808   0.514836  0.078283  0.529487   0.526923  0.012392   
20  0.524993   0.515638  0.062320  0.547436   0.526923  0.099647   

    class-oridnal_mean  
18                 0.0  
19                 0.0  
20                 0.0  

[3 rows x 21 columns]
    id_       

In [11]:
#build logistic regression model to predict the class
from mlfromscratch import train_test_split_2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = grouped_df.drop(columns=['class-oridnal_mean','id_','class_'])
y = grouped_df['class-oridnal_mean']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test=train_test_split_2(X,y,random_state=50)


In [12]:
from mlfromscratch import LogisticRegressionMultiClass

model = LogisticRegressionMultiClass()

model.fit(X_train, y_train.to_numpy().astype(int))
ypred=model.predict(X_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,ypred)
acc=accuracy_score(y_test,ypred)
print(cm)
print(acc)

[[0 0 1 1]
 [0 0 1 2]
 [0 0 5 0]
 [0 0 0 7]]
0.7058823529411765


In [13]:
#build logistic regression model to predict the class
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)


In [14]:
# Evaluate the model

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.7058823529411765


In [15]:
#build decision tree model to predict the class
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [16]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 1.0


In [17]:
# Convert to numpy array and ensure float type

from mlfromscratch import DecisionTree
X_train = X_train.values.astype(np.float64)
X_test = X_test.values.astype(np.float64)
DT_cl = DecisionTree()
DT_cl.fit(X_train, y_train)
y_pred_2 = DT_cl.predict(X_test)
acc = accuracy_score(y_test, y_pred_2)
acc

0.9411764705882353

In [18]:
#build random forest model to predict the class
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [19]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 1.0


In [20]:
#build KNN model to predict the class
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)


In [21]:
# Ensure numpy and pandas are up to date
# %pip install --upgrade numpy pandas
import numpy as np
import pandas as pd

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.9411764705882353


In [22]:
from mlfromscratch import KNNClassifier
knn_cl=KNNClassifier()
knn_cl.fit(X_train,y_train)
ypred=knn_cl.predict(X_test)
acc=accuracy_score(y_test,ypred)
acc

0.9411764705882353

In [23]:
#build Bayesian model to predict the class
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)


In [24]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 1.0


In [25]:
from sklearn.metrics import accuracy_score
# Convert to numpy array and ensure float type
#its required to convert the data to numpy array before using the model
# X_train = X_train.values.astype(np.float64)
# X_test = X_test.values.astype(np.float64)
from mlfromscratch import GaussianNaiveBayes
NB = GaussianNaiveBayes()
NB.fit(X_train, y_train)
y_pred = NB.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 1.0


  likelihood = np.sum(np.log(self._pdf(idx, x)))


In [26]:
import numpy as np

class SVMClassifier:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, n_iters=4000):
        """
        Initialize the SVM Classifier.

        Parameters:
        - learning_rate: Step size for gradient descent.
        - lambda_param: Regularization parameter.
        - n_iters: Number of iterations for training.
        """
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        """
        Train the SVM Classifier using Stochastic Gradient Descent (SGD).

        Parameters:
        - X: Training features (n_samples, n_features). Can be a NumPy array or pandas DataFrame.
        - y: Target labels (n_samples,). Can be a NumPy array or pandas Series.
        """
        # Convert inputs to NumPy arrays if they are pandas DataFrames/Series
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        n_samples, n_features = X.shape

        # Initialize weights and bias
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Convert labels to -1 and 1 for SVM
        y_ = np.where(y <= 0, -1, 1)

        # Stochastic Gradient Descent
        for _ in range(self.n_iters):
            for i in range(n_samples):
                # Check if the sample is correctly classified
                condition = y_[i] * (np.dot(X[i], self.weights) - self.bias) >= 1

                # Update weights and bias based on the hinge loss
                if condition:
                    self.weights -= self.learning_rate * (2 * self.lambda_param * self.weights)
                else:
                    self.weights -= self.learning_rate * (
                        2 * self.lambda_param * self.weights - np.dot(X[i], y_[i])
                    )
                    self.bias -= self.learning_rate * y_[i]

    def predict(self, X):
        """
        Predict the class labels for the test data.

        Parameters:
        - X: Test features (n_samples, n_features). Can be a NumPy array or pandas DataFrame.

        Returns:
        - Predicted class labels (n_samples,) as 0 or 1.
        """
        # Convert input to NumPy array if it is a pandas DataFrame
        if isinstance(X, pd.DataFrame):
            X = X.values

        # Compute the decision function
        decision_function = np.dot(X, self.weights) - self.bias

        # Convert predictions to 0 or 1
        return np.where(decision_function <= 0, 0, 1)

In [27]:
svm=SVMClassifier()
svm.fit(X_train,y_train)
ypred=svm.predict(X_test)
acc3=accuracy_score(y_test,ypred)
acc3

0.17647058823529413

In [28]:
from mlfromscratch import StandardScaler2
scaler = StandardScaler2()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
