In [1]:
#get all excel files in the data directory
import os
excel_files = [f for f in os.listdir('cleaned-data') if f.endswith('.xlsx')]
print(excel_files)


['cleaned-lp1.xls( failure1).xlsx', 'cleaned-lp2.xls(failures in transfer of a part).xlsx', 'cleaned-lp3.xls(position of part after a transfer failure).xlsx', 'cleaned-lp4.txt(failures in approach to ungrasp position).xlsx', 'cleaned-lp5.txt( failures in motion with part).xlsx']


In [2]:
import pandas as pd
import numpy as np
#LP1: Failures in Approach to Grasp Position
# Method 1: Using a raw string
file_path = f'cleaned-data/cleaned-lp2.xls(failures in transfer of a part).xlsx'

# Load the Excel file
df = pd.read_excel(file_path)
print(df.head())



    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal
0  normal  -2  -1  81   0  -5   0              3
1  normal  -2  -1  79   0  -4   0              3
2  normal  -2  -1  79   0  -4   0              3
3  normal  -2  -1  80   0  -4   0              3
4  normal  -3  -1  79   1  -5   1              3


In [3]:
#add time column where it restarts each 15 samples
time = []
for i in range(len(df)):
    time.append(i%15)
df['time'] = time
print(df.head())



    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time
0  normal  -2  -1  81   0  -5   0              3     0
1  normal  -2  -1  79   0  -4   0              3     1
2  normal  -2  -1  79   0  -4   0              3     2
3  normal  -2  -1  80   0  -4   0              3     3
4  normal  -3  -1  79   1  -5   1              3     4


In [4]:
#add id column where it identifies each 15 samples
id = []
for i in range(len(df)):
    id.append(i//15)
df['id'] = id
print(df.head())


    class  Fx  Fy  Fz  Tx  Ty  Tz  class-oridnal  time  id
0  normal  -2  -1  81   0  -5   0              3     0   0
1  normal  -2  -1  79   0  -4   0              3     1   0
2  normal  -2  -1  79   0  -4   0              3     2   0
3  normal  -2  -1  80   0  -4   0              3     3   0
4  normal  -3  -1  79   1  -5   1              3     4   0


In [5]:
# Drop rows with NaN values in the relevant columns
cleaned_df = df.dropna(subset=['Fx', 'Fy', 'Tx', 'Ty'])

In [6]:
# Convert columns to float
cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']] = cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']].astype(float)

In [7]:
#normalizing the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cleaned_df[['Fx', 'Fy','Fz', 'Tx', 'Ty','Tz']] = scaler.fit_transform(cleaned_df[['Fx', 'Fy', 'Fz', 'Tx', 'Ty', 'Tz']])
print(cleaned_df.head())

    class        Fx        Fy        Fz        Tx        Ty        Tz  \
0  normal  0.407643  0.444079  0.202279  0.501185  0.269674  0.575472   
1  normal  0.407643  0.444079  0.196581  0.501185  0.270633  0.575472   
2  normal  0.407643  0.444079  0.196581  0.501185  0.270633  0.575472   
3  normal  0.407643  0.444079  0.199430  0.501185  0.270633  0.575472   
4  normal  0.404459  0.444079  0.196581  0.502370  0.269674  0.580189   

   class-oridnal  time  id  
0              3     0   0  
1              3     1   0  
2              3     2   0  
3              3     3   0  
4              3     4   0  


In [8]:
# I want to group the data by the 'id','class' column and calculate the mean, median, std, etc of each group without including the 'time' ans 'class columns in the calculation 
grouped_df = cleaned_df.groupby(['id','class']).agg({'Fx': ['mean', 'median', 'std'],
                                                        'Fy': ['mean', 'median', 'std'],
                                                        'Fz': ['mean', 'median', 'std'],
                                                        'Tx': ['mean', 'median', 'std'],
                                                        'Ty': ['mean', 'median', 'std'],
                                                        'Tz': ['mean', 'median', 'std'],
                                                        'class-oridnal':['mean']}).reset_index()
print(grouped_df.head())

  id   class        Fx                            Fy                      \
                  mean    median       std      mean    median       std   
0  0  normal  0.407431  0.407643  0.001458  0.444079  0.444079  0.000000   
1  1  normal  0.417410  0.404459  0.037479  0.440351  0.444079  0.019256   
2  2  normal  0.404459  0.404459  0.008075  0.442544  0.444079  0.018973   
3  3  normal  0.406582  0.407643  0.009605  0.436184  0.427632  0.021275   
4  4  normal  0.407856  0.407643  0.009665  0.437939  0.437500  0.020387   

         Fz            ...        Tx                            Ty            \
       mean    median  ...      mean    median       std      mean    median   
0  0.198101  0.196581  ...  0.501422  0.501185  0.000491  0.270377  0.270633   
1  0.197531  0.196581  ...  0.501817  0.498815  0.013381  0.272297  0.268714   
2  0.195252  0.193732  ...  0.499842  0.500000  0.014696  0.264811  0.263916   
3  0.202089  0.202279  ...  0.499605  0.504739  0.014102  0.261996 

In [9]:
#flat the columns names
grouped_df.columns = ['_'.join(col) for col in grouped_df.columns]
print(grouped_df.head())


   id_  class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median    Fy_std  \
0    0  normal  0.407431   0.407643  0.001458  0.444079   0.444079  0.000000   
1    1  normal  0.417410   0.404459  0.037479  0.440351   0.444079  0.019256   
2    2  normal  0.404459   0.404459  0.008075  0.442544   0.444079  0.018973   
3    3  normal  0.406582   0.407643  0.009605  0.436184   0.427632  0.021275   
4    4  normal  0.407856   0.407643  0.009665  0.437939   0.437500  0.020387   

    Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std   Ty_mean  \
0  0.198101   0.196581  ...  0.501422   0.501185  0.000491  0.270377   
1  0.197531   0.196581  ...  0.501817   0.498815  0.013381  0.272297   
2  0.195252   0.193732  ...  0.499842   0.500000  0.014696  0.264811   
3  0.202089   0.202279  ...  0.499605   0.504739  0.014102  0.261996   
4  0.195442   0.190883  ...  0.495656   0.495261  0.013822  0.260269   

   Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  class-oridnal_mean  
0   0.2706

In [10]:
# view a raw from each class_
print(grouped_df[grouped_df['class-oridnal_mean']==0].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==1].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==2].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==3].head(3))
print(grouped_df[grouped_df['class-oridnal_mean']==4].head(3))

    id_    class_   Fx_mean  Fx_median    Fx_std   Fy_mean  Fy_median  \
20   20  back_col  0.491083   0.442675  0.221418  0.443860   0.440789   
36   36  back_col  0.403822   0.407643  0.023502  0.437939   0.440789   
40   40  back_col  0.508917   0.417197  0.206074  0.478728   0.440789   

      Fy_std   Fz_mean  Fz_median  ...   Tx_mean  Tx_median    Tx_std  \
20  0.033855  0.217854   0.205128  ...  0.482780   0.481043  0.025537   
36  0.023520  0.211396   0.210826  ...  0.483491   0.485782  0.018715   
40  0.100530  0.223552   0.210826  ...  0.496367   0.488152  0.084287   

     Ty_mean  Ty_median    Ty_std   Tz_mean  Tz_median    Tz_std  \
20  0.296353   0.271593  0.105645  0.535220   0.537736  0.024175   
36  0.255086   0.256238  0.011704  0.533962   0.533019  0.017212   
40  0.300448   0.261036  0.081654  0.535849   0.537736  0.040057   

    class-oridnal_mean  
20                 0.0  
36                 0.0  
40                 0.0  

[3 rows x 21 columns]
    id_     class_

In [11]:
#build logistic regression model to predict the class
from mlfromscratch import train_test_split_2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = grouped_df.drop(columns=['class-oridnal_mean','id_','class_'])
y = grouped_df['class-oridnal_mean']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test=train_test_split_2(X,y,random_state=50)


In [12]:
from mlfromscratch import LogisticRegressionMultiClass

model = LogisticRegressionMultiClass()

model.fit(X_train, y_train.to_numpy().astype(int))
ypred=model.predict(X_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,ypred)
acc=accuracy_score(y_test,ypred)
print(cm)
print(acc)

[[0 0 2 0]
 [0 0 1 0]
 [0 0 5 0]
 [0 0 1 0]]
0.5555555555555556


In [13]:
#build logistic regression model to predict the class
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)


In [14]:
# Evaluate the model

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.5555555555555556


In [15]:
#build decision tree model to predict the class
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [16]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 1.0


In [17]:
# Convert to numpy array and ensure float type

from mlfromscratch import DecisionTree
X_train = X_train.values.astype(np.float64)
X_test = X_test.values.astype(np.float64)
DT_cl = DecisionTree()
DT_cl.fit(X_train, y_train)
y_pred_2 = DT_cl.predict(X_test)
acc = accuracy_score(y_test, y_pred_2)
acc

0.8888888888888888

In [18]:
#build random forest model to predict the class
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [19]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.8888888888888888


In [20]:
#build KNN model to predict the class
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train, y_train)


In [21]:
# Ensure numpy and pandas are up to date
# %pip install --upgrade numpy pandas
import numpy as np
import pandas as pd

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.7777777777777778


In [22]:
from mlfromscratch import KNNClassifier
knn_cl=KNNClassifier()
knn_cl.fit(X_train,y_train)
ypred=knn_cl.predict(X_test)
acc=accuracy_score(y_test,ypred)
acc

0.8888888888888888

In [23]:
#build Bayesian model to predict the class
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)


In [24]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.8888888888888888


In [25]:
from sklearn.metrics import accuracy_score
# Convert to numpy array and ensure float type
#its required to convert the data to numpy array before using the model
# X_train = X_train.values.astype(np.float64)
# X_test = X_test.values.astype(np.float64)
from mlfromscratch import GaussianNaiveBayes
NB = GaussianNaiveBayes()
NB.fit(X_train, y_train)
y_pred = NB.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.8888888888888888


  likelihood = np.sum(np.log(self._pdf(idx, x)))
