In [1]:
#get all excel files in the data directory
import os
excel_files = [f for f in os.listdir('cleaned-data') if f.endswith('.xlsx')]
print(excel_files)


['cleaned-lp1.xls( failure1).xlsx', 'cleaned-lp2.xls(failures in transfer of a part).xlsx', 'cleaned-lp3.xls(position of part after a transfer failure).xlsx', 'cleaned-lp4.txt(failures in approach to ungrasp position).xlsx', 'cleaned-lp5.txt( failures in motion with part).xlsx']


In [2]:
import pandas as pd
import numpy as np
#LP1: Failures in Approach to Grasp Position
# Method 1: Using a raw string
file_path = f'cleaned-data/cleaned-lp5.txt( failures in motion with part).xlsx'

# Load the Excel file
df = pd.read_excel(file_path)
print(df.head())



    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal
0  normal  -2  -1  81   0  -5   0              4
1  normal  -2  -1  79   0  -4   0              4
2  normal  -2  -1  79   0  -4   0              4
3  normal  -2  -1  80   0  -4   0              4
4  normal  -3  -1  79   1  -5   1              4


In [3]:
#add time column where it restarts each 15 samples
time = []
for i in range(len(df)):
    time.append(i%15)
df['time'] = time
print(df.head())



    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time
0  normal  -2  -1  81   0  -5   0              4     0
1  normal  -2  -1  79   0  -4   0              4     1
2  normal  -2  -1  79   0  -4   0              4     2
3  normal  -2  -1  80   0  -4   0              4     3
4  normal  -3  -1  79   1  -5   1              4     4


In [4]:
#add id column where it identifies each 15 samples
id = []
for i in range(len(df)):
    id.append(i//15)
df['id'] = id
print(df.head())


    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time  id
0  normal  -2  -1  81   0  -5   0              4     0   0
1  normal  -2  -1  79   0  -4   0              4     1   0
2  normal  -2  -1  79   0  -4   0              4     2   0
3  normal  -2  -1  80   0  -4   0              4     3   0
4  normal  -3  -1  79   1  -5   1              4     4   0


In [5]:
# Drop rows with NaN values in the relevant columns
cleaned_df = df.dropna(subset=['Fx', 'Fy', 'Tx', 'Ty'])

In [6]:
# Convert columns to float
cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']] = cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']].astype(float)

In [7]:
#normalizing the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cleaned_df[['Fx', 'Fy','Fz', 'Tx', 'Ty','Tz']] = scaler.fit_transform(cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']])
print(cleaned_df.head())

    class        Fx        Fy        Fz        Tx        Ty        Tz  \
0  normal  0.655019  0.464634  0.916481  0.447826  0.486158  0.504065   
1  normal  0.655019  0.464634  0.915985  0.447826  0.486833  0.504065   
2  normal  0.655019  0.464634  0.915985  0.447826  0.486833  0.504065   
3  normal  0.655019  0.464634  0.916233  0.447826  0.486833  0.504065   
4  normal  0.654275  0.464634  0.915985  0.448370  0.486158  0.506098   

   class-oridnal  time  id  
0              4     0   0  
1              4     1   0  
2              4     2   0  
3              4     3   0  
4              4     4   0  


In [8]:
# I want to group the data by the 'id','class' column and calculate the mean, median, std, etc of each group without including the 'time' ans 'class columns in the calculation 
grouped_df = cleaned_df.groupby(['id','class']).agg({'Fx': ['mean', 'median', 'std'],
                                                        'Fy': ['mean', 'median', 'std'],
                                                        'Fz': ['mean', 'median', 'std'],
                                                        'Tx': ['mean', 'median', 'std'],
                                                        'Ty': ['mean', 'median', 'std'],
                                                        'Tz': ['mean', 'median', 'std'],
                                                        'class-oridnal':['mean']}).reset_index()
print(grouped_df.head())

  id   class        Fx                            Fy                      \
                  mean    median       std      mean    median       std   
0  0  normal  0.654969  0.655019  0.000340  0.464634  0.464634  0.000000   
1  1  normal  0.657299  0.654275  0.008750  0.463252  0.464634  0.007139   
2  2  normal  0.654275  0.654275  0.001885  0.464065  0.464634  0.007034   
3  3  normal  0.654771  0.655019  0.002242  0.461707  0.458537  0.007887   
4  4  normal  0.655068  0.655019  0.002256  0.462358  0.462195  0.007558   

         Fz            ...        Tx                            Ty            \
       mean    median  ...      mean    median       std      mean    median   
0  0.916117  0.915985  ...  0.447935  0.447826  0.000225  0.486653  0.486833   
1  0.916068  0.915985  ...  0.448116  0.446739  0.006138  0.488004  0.485483   
2  0.915869  0.915737  ...  0.447210  0.447283  0.006741  0.482737  0.482107   
3  0.916464  0.916481  ...  0.447101  0.449457  0.006469  0.480756 

In [9]:
#flat the columns names
grouped_df.columns = ['_'.join(col) for col in grouped_df.columns]
print(grouped_df.head())


   id_  class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median    Fy_std  \
0    0  normal  0.654969   0.655019  0.000340  0.464634   0.464634  0.000000   
1    1  normal  0.657299   0.654275  0.008750  0.463252   0.464634  0.007139   
2    2  normal  0.654275   0.654275  0.001885  0.464065   0.464634  0.007034   
3    3  normal  0.654771   0.655019  0.002242  0.461707   0.458537  0.007887   
4    4  normal  0.655068   0.655019  0.002256  0.462358   0.462195  0.007558   

    Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std   Ty_mean  \
0  0.916117   0.915985  ...  0.447935   0.447826  0.000225  0.486653   
1  0.916068   0.915985  ...  0.448116   0.446739  0.006138  0.488004   
2  0.915869   0.915737  ...  0.447210   0.447283  0.006741  0.482737   
3  0.916464   0.916481  ...  0.447101   0.449457  0.006469  0.480756   
4  0.915886   0.915489  ...  0.445290   0.445109  0.006340  0.479541   

   Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  class-oridnal_mean  
0   0.4868

In [10]:
# view a raw from each class_
print(grouped_df[grouped_df['class-oridnal_mean']==0].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==1].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==2].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==3].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==4].head(3))

     id_            class_   Fx_mean  Fx_median    Fx_std   Fy_mean  \
101  101  bottom_collision  0.654919   0.657993  0.011718  0.450325   
106  106  bottom_collision  0.667361   0.669888  0.021771  0.444146   
107  107  bottom_collision  0.664634   0.659480  0.018342  0.443496   

     Fy_median    Fy_std   Fz_mean  Fz_median  ...   Tx_mean  Tx_median  \
101   0.463415  0.032806  0.869294   0.898141  ...  0.432246   0.445652   
106   0.430488  0.045607  0.834895   0.856010  ...  0.453007   0.430435   
107   0.435366  0.026044  0.828335   0.848079  ...  0.467971   0.486413   

       Tx_std   Ty_mean  Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  \
101  0.037223  0.506415   0.493585  0.048821  0.503388   0.500000  0.007859   
106  0.038514  0.471168   0.481431  0.045982  0.494038   0.520325  0.084614   
107  0.047748  0.520414   0.512492  0.091965  0.487534   0.495935  0.027708   

     class-oridnal_mean  
101                 0.0  
106                 0.0  
107                

In [11]:
#build logistic regression model to predict the class
from mlfromscratch import train_test_split_2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = grouped_df.drop(columns=['class-oridnal_mean','id_','class_'])
y = grouped_df['class-oridnal_mean']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test=train_test_split_2(X,y,random_state=50)


In [12]:
from mlfromscratch import LogisticRegressionMultiClass

model = LogisticRegressionMultiClass()

model.fit(X_train, y_train.to_numpy().astype(int))
ypred=model.predict(X_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,ypred)
acc=accuracy_score(y_test,ypred)
print(cm)
print(acc)

[[ 0  0  0  0  5]
 [ 0  0  0  0  4]
 [ 0  0  0  0 14]
 [ 0  0  0  0  5]
 [ 0  0  0  0  4]]
0.125


In [13]:
#build logistic regression model to predict the class
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)


In [14]:
# Evaluate the model

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.15625


In [15]:
#build decision tree model to predict the class
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [16]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.4375


In [17]:
# Convert to numpy array and ensure float type

from mlfromscratch import DecisionTree
X_train = X_train.values.astype(np.float64)
X_test = X_test.values.astype(np.float64)
DT_cl = DecisionTree()
DT_cl.fit(X_train, y_train)
y_pred_2 = DT_cl.predict(X_test)
acc = accuracy_score(y_test, y_pred_2)
acc

0.46875

In [18]:
#build random forest model to predict the class
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [19]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.5625


In [20]:
#build KNN model to predict the class
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)


In [21]:
# Ensure numpy and pandas are up to date
# %pip install --upgrade numpy pandas
import numpy as np
import pandas as pd

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.625


In [22]:
from mlfromscratch import KNNClassifier
knn_cl=KNNClassifier()
knn_cl.fit(X_train,y_train)
ypred=knn_cl.predict(X_test)
acc=accuracy_score(y_test,ypred)
acc

0.65625

In [23]:
#build Bayesian model to predict the class
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)


In [24]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.4375


In [25]:
from sklearn.metrics import accuracy_score
# Convert to numpy array and ensure float type
#its required to convert the data to numpy array before using the model
# X_train = X_train.values.astype(np.float64)
# X_test = X_test.values.astype(np.float64)
from mlfromscratch import GaussianNaiveBayes
NB = GaussianNaiveBayes()
NB.fit(X_train, y_train)
y_pred = NB.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.4375


  likelihood = np.sum(np.log(self._pdf(idx, x)))


In [26]:
import numpy as np

class SVMClassifier:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, n_iters=4000):
        """
        Initialize the SVM Classifier.

        Parameters:
        - learning_rate: Step size for gradient descent.
        - lambda_param: Regularization parameter.
        - n_iters: Number of iterations for training.
        """
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        """
        Train the SVM Classifier using Stochastic Gradient Descent (SGD).

        Parameters:
        - X: Training features (n_samples, n_features). Can be a NumPy array or pandas DataFrame.
        - y: Target labels (n_samples,). Can be a NumPy array or pandas Series.
        """
        # Convert inputs to NumPy arrays if they are pandas DataFrames/Series
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        n_samples, n_features = X.shape

        # Initialize weights and bias
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Convert labels to -1 and 1 for SVM
        y_ = np.where(y <= 0, -1, 1)

        # Stochastic Gradient Descent
        for _ in range(self.n_iters):
            for i in range(n_samples):
                # Check if the sample is correctly classified
                condition = y_[i] * (np.dot(X[i], self.weights) - self.bias) >= 1

                # Update weights and bias based on the hinge loss
                if condition:
                    self.weights -= self.learning_rate * (2 * self.lambda_param * self.weights)
                else:
                    self.weights -= self.learning_rate * (
                        2 * self.lambda_param * self.weights - np.dot(X[i], y_[i])
                    )
                    self.bias -= self.learning_rate * y_[i]

    def predict(self, X):
        """
        Predict the class labels for the test data.

        Parameters:
        - X: Test features (n_samples, n_features). Can be a NumPy array or pandas DataFrame.

        Returns:
        - Predicted class labels (n_samples,) as 0 or 1.
        """
        # Convert input to NumPy array if it is a pandas DataFrame
        if isinstance(X, pd.DataFrame):
            X = X.values

        # Compute the decision function
        decision_function = np.dot(X, self.weights) - self.bias

        # Convert predictions to 0 or 1
        return np.where(decision_function <= 0, 0, 1)

In [27]:
svm=SVMClassifier()
svm.fit(X_train,y_train)
ypred=svm.predict(X_test)
acc3=accuracy_score(y_test,ypred)
acc3

0.125

In [28]:
from mlfromscratch import StandardScaler2
scaler = StandardScaler2()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
