# Importing Libraries for Data Engineering

In [1]:
import time
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, classification_report

# Loading Dataset

In [2]:
train_data = pd.read_excel('train.xlsx')
train_data.sample(5)

Unnamed: 0,Feature 1,Feature 2,Feature 3,Feature 4,Classification
1188,0.26462,0.0,0.0,0.0,2
1789,0.26462,0.0,0.0,0.0,2
2566,0.26462,0.0,0.0,0.0,2
3351,2.646203,2.381053,0.773102,4.0482,2
2897,0.26462,0.0,0.0,0.0,2


In [3]:
print("Unique Dependent values and their counts :")
print(train_data["Classification"].value_counts())

Unique Dependent values and their counts :
2    3572
1     448
3     339
Name: Classification, dtype: int64


In [4]:
test_data = pd.read_excel('test.xlsx')
test_data.sample(5)

Unnamed: 0,Feature 1,Feature 2,Feature 3,Feature 4,Classification
307,1.0,0.0,0.0,0.0,2
608,8.0,13.398,0.851487,11.21915,2
243,2.0,1.96,0.866025,0.9604,2
287,11.0,15.366,0.968783,10.732453,2
851,86.0,59.198,0.879203,20.374437,2


In [5]:
print("Unique Dependent values and their counts :")
print(test_data["Classification"].value_counts())

Unique Dependent values and their counts :
2    925
1    113
3     88
Name: Classification, dtype: int64


In [6]:
train_data.shape

(4359, 5)

In [7]:
test_data.shape

(1126, 5)

In [8]:
import pandas as pd

# Assuming you have two DataFrames: train_data and test_data

# Concatenate the two DataFrames vertically (along rows) and reset the index
concatenated_data = pd.concat([train_data, test_data], axis=0).reset_index(drop=True)

# Now, concatenated_data contains both datasets with a new index


In [9]:
concatenated_data

Unnamed: 0,Feature 1,Feature 2,Feature 3,Feature 4,Classification
0,2092.087854,23.806033,0.572817,0.511848,1
1,2921.937020,11.507012,0.690520,0.085625,1
2,3726.911881,95.744377,0.664582,4.647553,1
3,2371.526859,90.953427,0.377896,6.591086,1
4,2900.767399,11.507012,0.690680,0.086250,1
...,...,...,...,...,...
5480,5393.754962,147.720561,0.430699,7.644297,3
5481,5394.813443,147.720561,0.433569,7.642798,3
5482,5422.069331,147.720561,0.433508,7.604379,3
5483,5410.955279,147.720561,0.432639,7.619998,3


In [10]:
# Assuming concatenated_data is your concatenated DataFrame
X = concatenated_data.iloc[:, :-1]  # All columns except the last one
y = concatenated_data.iloc[:, -1]   # The last column

# Now you have X as your features and y as your target variable


In [11]:
X

Unnamed: 0,Feature 1,Feature 2,Feature 3,Feature 4
0,2092.087854,23.806033,0.572817,0.511848
1,2921.937020,11.507012,0.690520,0.085625
2,3726.911881,95.744377,0.664582,4.647553
3,2371.526859,90.953427,0.377896,6.591086
4,2900.767399,11.507012,0.690680,0.086250
...,...,...,...,...
5480,5393.754962,147.720561,0.430699,7.644297
5481,5394.813443,147.720561,0.433569,7.642798
5482,5422.069331,147.720561,0.433508,7.604379
5483,5410.955279,147.720561,0.432639,7.619998


In [12]:
y

0       1
1       1
2       1
3       1
4       1
       ..
5480    3
5481    3
5482    3
5483    3
5484    3
Name: Classification, Length: 5485, dtype: int64

# Importing Libraries 

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier

import time
from sklearn.model_selection import KFold

# Converting Numpy Arrays to Pandas DataFrames for Sensor and Position Data

In [14]:
X = X
y = y

# convert numpy array into pandas dataframe
X = pd.DataFrame(X)
y = pd.DataFrame(y)

# Define models

In [15]:
models = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    ExtraTreesClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    SVC(),
    KNeighborsClassifier(),
    XGBClassifier()
]

# Define k-fold cross-validation

In [16]:
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Create empty dictionary to store the results

In [17]:
results_dict = {
    "Model": [],
    "Fold": [],
    "Accuracy": [],
    "F1 Score": [],
    "Recall": [],
    "Precision": [],
    "Time taken": []
}

# Train and evaluate each model using k-fold cross-validation

In [18]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import numpy as np
import time

# Initialize an empty dictionary to store results
results_dict = {
    "Model": [],
    "Fold": [],
    "Accuracy": [],
    "F1 Score": [],
    "Recall": [],
    "Precision": [],
    "Time taken": [],
}

for model in models:
    # Evaluate the performance of the model using k-fold cross-validation
    accuracy_scores = []
    f1_scores = []
    recall_scores = []
    precision_scores = []
    times = []
    
    for i, (train_index, test_index) in enumerate(kfold.split(X)):
        start_time = time.time()
        
        X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[test_index]
        y_train_fold, y_val_fold = y.iloc[train_index], y.iloc[test_index]
        
        # Ensure that class labels in y_train_fold and y_val_fold are in [0, 1, 2]
        y_train_fold = y_train_fold - 1  # Subtract 1 to map [1, 2, 3] to [0, 1, 2]
        y_val_fold = y_val_fold - 1
        
        model.fit(X_train_fold, y_train_fold)
        y_pred_fold = model.predict(X_val_fold)
        
        accuracy = accuracy_score(y_val_fold, y_pred_fold)
        
        # Calculate F1 score with 'macro' average for multiclass classification
        f1 = f1_score(y_val_fold, y_pred_fold, average='macro')
        
        recall = recall_score(y_val_fold, y_pred_fold, average='macro')
        precision = precision_score(y_val_fold, y_pred_fold, average='macro')
        
        end_time = time.time()
        time_taken = end_time - start_time
        
        accuracy_scores.append(accuracy)
        f1_scores.append(f1)
        recall_scores.append(recall)
        precision_scores.append(precision)
        times.append(time_taken)
        
        # Add the results to the dictionary
        results_dict["Model"].append(type(model).__name__)
        results_dict["Fold"].append(i + 1)
        results_dict["Accuracy"].append(accuracy)
        results_dict["F1 Score"].append(f1)
        results_dict["Recall"].append(recall)
        results_dict["Precision"].append(precision)
        results_dict["Time taken"].append(time_taken)
        
        print(f"Model: {type(model).__name__}")
        print(f"Fold {i + 1} - Accuracy: {accuracy:.2f}")
        print(f"Fold {i + 1} - F1 Score: {f1:.2f}")
        print(f"Fold {i + 1} - Recall: {recall:.2f}")
        print(f"Fold {i + 1} - Precision: {precision:.2f}")
        print(f"Fold {i + 1} - Time taken: {time_taken:.2f} seconds")
    
    # Calculate mean time and results for all the folds
    mean_accuracy = np.mean(accuracy_scores)
    mean_f1 = np.mean(f1_scores)
    mean_recall = np.mean(recall_scores)
    mean_precision = np.mean(precision_scores)
    mean_time = np.mean(times)

    # Add the mean results to the dictionary
    results_dict["Model"].append(type(model).__name__)
    results_dict["Fold"].append("Mean")
    results_dict["Accuracy"].append(mean_accuracy)
    results_dict["F1 Score"].append(mean_f1)
    results_dict["Recall"].append(mean_recall)
    results_dict["Precision"].append(mean_precision)
    results_dict["Time taken"].append(mean_time)

    print(f"Model: {type(model).__name__}")
    print(f"Mean - Accuracy: {mean_accuracy:.2f}")
    print(f"Mean - F1 Score: {mean_f1:.2f}")
    print(f"Mean - Recall: {mean_recall:.2f}")
    print(f"Mean - Precision: {mean_precision:.2f}")
    print(f"Mean - Time taken: {mean_time:.2f} seconds")

# Convert the results dictionary into a pandas dataframe
import pandas as pd

results_df = pd.DataFrame.from_dict(results_dict)


  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Model: LogisticRegression
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 0.99
Fold 1 - Recall: 0.99
Fold 1 - Precision: 0.99
Fold 1 - Time taken: 0.04 seconds
Model: LogisticRegression
Fold 2 - Accuracy: 1.00
Fold 2 - F1 Score: 0.99
Fold 2 - Recall: 0.98
Fold 2 - Precision: 0.99
Fold 2 - Time taken: 0.04 seconds
Model: LogisticRegression
Fold 3 - Accuracy: 1.00
Fold 3 - F1 Score: 0.99
Fold 3 - Recall: 0.99
Fold 3 - Precision: 0.99
Fold 3 - Time taken: 0.04 seconds
Model: LogisticRegression
Fold 4 - Accuracy: 0.99
Fold 4 - F1 Score: 0.99
Fold 4 - Recall: 0.98
Fold 4 - Precision: 0.99
Fold 4 - Time taken: 0.04 seconds
Model: LogisticRegression
Fold 5 - Accuracy: 1.00
Fold 5 - F1 Score: 0.99
Fold 5 - Recall: 0.98
Fold 5 - Precision: 0.99
Fold 5 - Time taken: 0.04 seconds
Model: LogisticRegression
Mean - Accuracy: 1.00
Mean - F1 Score: 0.99
Mean - Recall: 0.99
Mean - Precision: 0.99
Mean - Time taken: 0.04 seconds
Model: DecisionTreeClassifier
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 1.00
Fo

  model.fit(X_train_fold, y_train_fold)
  model.fit(X_train_fold, y_train_fold)


Model: RandomForestClassifier
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 1.00
Fold 1 - Recall: 1.00
Fold 1 - Precision: 1.00
Fold 1 - Time taken: 0.13 seconds
Model: RandomForestClassifier
Fold 2 - Accuracy: 1.00
Fold 2 - F1 Score: 0.99
Fold 2 - Recall: 0.99
Fold 2 - Precision: 1.00
Fold 2 - Time taken: 0.12 seconds


  model.fit(X_train_fold, y_train_fold)
  model.fit(X_train_fold, y_train_fold)


Model: RandomForestClassifier
Fold 3 - Accuracy: 1.00
Fold 3 - F1 Score: 0.99
Fold 3 - Recall: 0.99
Fold 3 - Precision: 1.00
Fold 3 - Time taken: 0.12 seconds
Model: RandomForestClassifier
Fold 4 - Accuracy: 1.00
Fold 4 - F1 Score: 0.99
Fold 4 - Recall: 0.99
Fold 4 - Precision: 1.00
Fold 4 - Time taken: 0.12 seconds


  model.fit(X_train_fold, y_train_fold)
  model.fit(X_train_fold, y_train_fold)


Model: RandomForestClassifier
Fold 5 - Accuracy: 1.00
Fold 5 - F1 Score: 0.99
Fold 5 - Recall: 0.98
Fold 5 - Precision: 1.00
Fold 5 - Time taken: 0.12 seconds
Model: RandomForestClassifier
Mean - Accuracy: 1.00
Mean - F1 Score: 0.99
Mean - Recall: 0.99
Mean - Precision: 1.00
Mean - Time taken: 0.12 seconds
Model: ExtraTreesClassifier
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 1.00
Fold 1 - Recall: 1.00
Fold 1 - Precision: 1.00
Fold 1 - Time taken: 0.09 seconds
Model: ExtraTreesClassifier
Fold 2 - Accuracy: 1.00
Fold 2 - F1 Score: 0.99
Fold 2 - Recall: 0.99
Fold 2 - Precision: 1.00
Fold 2 - Time taken: 0.09 seconds


  model.fit(X_train_fold, y_train_fold)
  model.fit(X_train_fold, y_train_fold)
  model.fit(X_train_fold, y_train_fold)


Model: ExtraTreesClassifier
Fold 3 - Accuracy: 1.00
Fold 3 - F1 Score: 0.99
Fold 3 - Recall: 0.99
Fold 3 - Precision: 1.00
Fold 3 - Time taken: 0.09 seconds
Model: ExtraTreesClassifier
Fold 4 - Accuracy: 1.00
Fold 4 - F1 Score: 0.99
Fold 4 - Recall: 0.99
Fold 4 - Precision: 1.00
Fold 4 - Time taken: 0.09 seconds
Model: ExtraTreesClassifier
Fold 5 - Accuracy: 1.00
Fold 5 - F1 Score: 0.99
Fold 5 - Recall: 0.98
Fold 5 - Precision: 1.00
Fold 5 - Time taken: 0.09 seconds
Model: ExtraTreesClassifier
Mean - Accuracy: 1.00
Mean - F1 Score: 0.99
Mean - Recall: 0.99
Mean - Precision: 1.00
Mean - Time taken: 0.09 seconds


  model.fit(X_train_fold, y_train_fold)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Model: AdaBoostClassifier
Fold 1 - Accuracy: 0.97
Fold 1 - F1 Score: 0.93
Fold 1 - Recall: 0.96
Fold 1 - Precision: 0.91
Fold 1 - Time taken: 0.09 seconds
Model: AdaBoostClassifier
Fold 2 - Accuracy: 0.98
Fold 2 - F1 Score: 0.95
Fold 2 - Recall: 0.98
Fold 2 - Precision: 0.93
Fold 2 - Time taken: 0.09 seconds
Model: AdaBoostClassifier
Fold 3 - Accuracy: 0.91
Fold 3 - F1 Score: 0.58
Fold 3 - Recall: 0.66
Fold 3 - Precision: 0.53
Fold 3 - Time taken: 0.09 seconds


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Model: AdaBoostClassifier
Fold 4 - Accuracy: 0.99
Fold 4 - F1 Score: 0.96
Fold 4 - Recall: 0.96
Fold 4 - Precision: 0.97
Fold 4 - Time taken: 0.09 seconds
Model: AdaBoostClassifier
Fold 5 - Accuracy: 0.97
Fold 5 - F1 Score: 0.91
Fold 5 - Recall: 0.94
Fold 5 - Precision: 0.89
Fold 5 - Time taken: 0.09 seconds
Model: AdaBoostClassifier
Mean - Accuracy: 0.96
Mean - F1 Score: 0.87
Mean - Recall: 0.90
Mean - Precision: 0.85
Mean - Time taken: 0.09 seconds


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Model: GradientBoostingClassifier
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 1.00
Fold 1 - Recall: 1.00
Fold 1 - Precision: 1.00
Fold 1 - Time taken: 0.54 seconds


  y = column_or_1d(y, warn=True)


Model: GradientBoostingClassifier
Fold 2 - Accuracy: 1.00
Fold 2 - F1 Score: 0.99
Fold 2 - Recall: 0.99
Fold 2 - Precision: 1.00
Fold 2 - Time taken: 0.56 seconds


  y = column_or_1d(y, warn=True)


Model: GradientBoostingClassifier
Fold 3 - Accuracy: 1.00
Fold 3 - F1 Score: 0.99
Fold 3 - Recall: 0.99
Fold 3 - Precision: 1.00
Fold 3 - Time taken: 0.57 seconds


  y = column_or_1d(y, warn=True)


Model: GradientBoostingClassifier
Fold 4 - Accuracy: 1.00
Fold 4 - F1 Score: 0.99
Fold 4 - Recall: 0.99
Fold 4 - Precision: 1.00
Fold 4 - Time taken: 0.56 seconds


  y = column_or_1d(y, warn=True)


Model: GradientBoostingClassifier
Fold 5 - Accuracy: 1.00
Fold 5 - F1 Score: 0.99
Fold 5 - Recall: 0.98
Fold 5 - Precision: 1.00
Fold 5 - Time taken: 0.56 seconds
Model: GradientBoostingClassifier
Mean - Accuracy: 1.00
Mean - F1 Score: 0.99
Mean - Recall: 0.99
Mean - Precision: 1.00
Mean - Time taken: 0.56 seconds
Model: SVC
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 1.00
Fold 1 - Recall: 1.00
Fold 1 - Precision: 1.00
Fold 1 - Time taken: 0.01 seconds
Model: SVC
Fold 2 - Accuracy: 1.00
Fold 2 - F1 Score: 0.99
Fold 2 - Recall: 0.99
Fold 2 - Precision: 1.00
Fold 2 - Time taken: 0.01 seconds
Model: SVC
Fold 3 - Accuracy: 1.00
Fold 3 - F1 Score: 0.99
Fold 3 - Recall: 0.99
Fold 3 - Precision: 1.00
Fold 3 - Time taken: 0.01 seconds
Model: SVC
Fold 4 - Accuracy: 1.00
Fold 4 - F1 Score: 0.99
Fold 4 - Recall: 0.99
Fold 4 - Precision: 1.00
Fold 4 - Time taken: 0.01 seconds
Model: SVC
Fold 5 - Accuracy: 1.00
Fold 5 - F1 Score: 0.99
Fold 5 - Recall: 0.98
Fold 5 - Precision: 1.00
Fold 5 - Time take

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Model: XGBClassifier
Fold 1 - Accuracy: 1.00
Fold 1 - F1 Score: 1.00
Fold 1 - Recall: 1.00
Fold 1 - Precision: 1.00
Fold 1 - Time taken: 0.10 seconds
Model: XGBClassifier
Fold 2 - Accuracy: 1.00
Fold 2 - F1 Score: 0.99
Fold 2 - Recall: 0.99
Fold 2 - Precision: 1.00
Fold 2 - Time taken: 0.09 seconds
Model: XGBClassifier
Fold 3 - Accuracy: 1.00
Fold 3 - F1 Score: 0.99
Fold 3 - Recall: 0.99
Fold 3 - Precision: 1.00
Fold 3 - Time taken: 0.09 seconds
Model: XGBClassifier
Fold 4 - Accuracy: 1.00
Fold 4 - F1 Score: 0.99
Fold 4 - Recall: 0.99
Fold 4 - Precision: 1.00
Fold 4 - Time taken: 0.09 seconds
Model: XGBClassifier
Fold 5 - Accuracy: 1.00
Fold 5 - F1 Score: 0.99
Fold 5 - Recall: 0.98
Fold 5 - Precision: 1.00
Fold 5 - Time taken: 0.08 seconds
Model: XGBClassifier
Mean - Accuracy: 1.00
Mean - F1 Score: 0.99
Mean - Recall: 0.99
Mean - Precision: 1.00
Mean - Time taken: 0.09 seconds


# Saving the results to an Excel file

In [19]:
results_df.to_excel("Cross Validation Results.xlsx", index=False)

#Print a message indicating that the results have been saved to a file
print("Results saved to Cross Validation Results.xlsx file")

Results saved to Cross Validation Results.xlsx file
