In [17]:
import pandas as pd 
import os

root_dir = "./ampc"
csv_files = []
for csv_file in os.listdir(root_dir):
    csv_path = os.path.join(root_dir, csv_file)
    csv_files.append(csv_path)

In [18]:
# merging csv files 
df = pd.concat( 
    map(pd.read_csv, csv_files), ignore_index=True) 
print(df) 

       acc_mean_x_right  acc_mean_y_right  acc_mean_z_right  \
0             -0.173300           0.14864          0.981280   
1             -0.406180           0.24715          0.794710   
2             -0.496700           0.37167          0.702830   
3             -0.287800           0.15882          0.916880   
4             -0.561890           0.36946          0.686680   
...                 ...               ...               ...   
11624         -0.594850          -0.77552         -0.097547   
11625         -0.502790          -0.89310          0.166820   
11626         -0.363130          -0.86416          0.324550   
11627         -0.525420          -0.87994          0.153060   
11628          0.045011          -0.59333          0.684180   

       acc_mean_xyz_right  acc_mean_xy_right  acc_mean_yz_right  \
0                 1.10650            0.44735            1.06110   
1                 1.01780            0.52388            0.86595   
2                 1.04020            0.682

In [19]:
df = df.sample(frac = 1)

In [20]:
df.to_csv("all_data.csv")

In [21]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=1)
clf = svm.SVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

0.88134135855546

In [22]:
from sklearn.model_selection import cross_val_score
from sklearn import svm
clf = svm.SVC()
scores = cross_val_score(clf, X, y, cv=10)
print (scores)
ori_cv_scores = max(scores)

[0.87360275 0.89251935 0.89337919 0.90197764 0.89509888 0.88907997
 0.89853826 0.88134136 0.8916595  0.89931153]


In [23]:
from sklearn.model_selection import GridSearchCV

# Update SVC to use RBF kernel
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1]
}
grid_search = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, cv=10)
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best parameters:", best_params)

# Update SVM model with optimal parameters
clf = svm.SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'])
clf.fit(X_train, y_train)

# Train-test split accuracy
y_pred = clf.predict(X_test)
accuracy_split = accuracy_score(y_test, y_pred)
print("Train-test split accuracy:", accuracy_split)

# 10-fold cross-validation accuracy
scores = cross_val_score(clf, X, y, cv=10)
print("10-fold cross-validation scores:", scores)

Best parameters: {'C': 10, 'gamma': 0.001}
Train-test split accuracy: 0.8300372599598739
10-fold cross-validation scores: [0.83576956 0.83319003 0.83576956 0.84006879 0.83662941 0.83748925
 0.83404987 0.83404987 0.83576956 0.83562823]


In [24]:
hyperparam_cv_acc = max(scores)

In [25]:
from sklearn.feature_selection import SelectKBest, f_classif
import numpy as np

k = 100
selector = SelectKBest(score_func=f_classif, k=k)
X_new = selector.fit_transform(X, y)

# Train the final model with the best parameters
svm_model = svm.SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'])
svm_model.fit(X_train, y_train)

# Step 5: Evaluate the model on the test set
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Set Accuracy: {accuracy}")

# Step 6: Perform 10-fold cross-validation with hyperparameter tuning
cv_scores = cross_val_score(svm_model, X_new, y, cv=10)
print(f"10-Fold Cross-Validation Accuracy: {np.mean(cv_scores)}")


Test Set Accuracy: 0.8300372599598739
10-Fold Cross-Validation Accuracy: 0.8379911736369381


In [26]:
feature_selection_train_test_acc = accuracy
feature_selection_cv_acc = np.mean(cv_scores)

In [27]:
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np

# Step 1: Perform PCA to reduce to 10 principal components
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X)

# Step 2: Split the dataset into 70/30 train/test set
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42)

# Step 3: Hyperparameter tuning values (assuming these are obtained from a previous activity)
# Example hyperparameters
param_grid = {
    'C': [1, 10, 100],
    'gamma': [0.001, 0.01, 0.1],
    'kernel': ['rbf']
}

# Step 4: Train an SVM model with hyperparameter tuning on the training set
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Best parameters from grid search
best_params = grid_search.best_params_

# Train the final model with the best parameters
svm_model = SVC(**best_params)
svm_model.fit(X_train, y_train)

# Step 5: Evaluate the model on the test set
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Set Accuracy: {accuracy}")

# Step 6: Perform 10-fold cross-validation with hyperparameter tuning
cv_scores = cross_val_score(SVC(**best_params), X_pca, y, cv=10)
print(f"10-Fold Cross-Validation Accuracy: {np.mean(cv_scores)}")

Test Set Accuracy: 0.8452278589853827
10-Fold Cross-Validation Accuracy: 0.8361854246614266


In [28]:
pca_train_test_acc = accuracy
pca_cv_acc = np.mean(cv_scores)

In [31]:
import pandas as pd

# Assuming you have the accuracy values stored in variables
original_train_test_acc = 0.89
original_cv_acc = np.mean(ori_cv_scores)
hyperparam_train_test_acc = accuracy_split

# Create a dictionary with the accuracy values
data = {
    "SVM Model": [
        "Original features",
        "With hyperparameter tuning",
        "With feature selection and hyperparameter tuning",
        "With PCA and hyperparameter tuning"
    ],
    "Train-test split Accuracy": [
        f"{original_train_test_acc * 100:.2f}%",
        f"{hyperparam_train_test_acc * 100:.2f}%",
        f"{feature_selection_train_test_acc * 100:.2f}%",
        f"{pca_train_test_acc * 100:.2f}%"
    ],
    "Cross-validation Accuracy": [
        f"{original_cv_acc * 100:.2f}%",
        f"{hyperparam_cv_acc * 100:.2f}%",
        f"{feature_selection_cv_acc * 100:.2f}%",
        f"{pca_cv_acc * 100:.2f}%"
    ]
}

# Convert the dictionary to a DataFrame
summary_df = pd.DataFrame(data)

# Display the DataFrame
display(summary_df)

Unnamed: 0,SVM Model,Train-test split Accuracy,Cross-validation Accuracy
0,Original features,89.00%,90.20%
1,With hyperparameter tuning,83.00%,84.01%
2,With feature selection and hyperparameter tuning,83.00%,83.80%
3,With PCA and hyperparameter tuning,84.52%,83.62%


In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('all_data.csv')
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Function to train and evaluate a model
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    train_test_acc = accuracy_score(y_test, y_pred)
    cv_acc = cross_val_score(model, X, y, cv=10).mean()
    return train_test_acc, cv_acc

# Train with SGDClassifier
sgd_model = SGDClassifier(random_state=42)
sgd_train_test_acc, sgd_cv_acc = evaluate_model(sgd_model, X_train, X_test, y_train, y_test)

# Train with RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)
rf_train_test_acc, rf_cv_acc = evaluate_model(rf_model, X_train, X_test, y_train, y_test)

# Train with MLPClassifier
mlp_model = MLPClassifier(random_state=42)
mlp_train_test_acc, mlp_cv_acc = evaluate_model(mlp_model, X_train, X_test, y_train, y_test)

# Assuming you have the SVM accuracy values from previous steps
svm_train_test_acc = 0.89  # Replace with actual value
svm_cv_acc = 0.90  # Replace with actual value

# Create a summary table
data = {
    "Model": ["SVM", "SGD", "RandomForest", "MLP"],
    "Train-test split Accuracy": [
        f"{svm_train_test_acc * 100:.2f}%",
        f"{sgd_train_test_acc * 100:.2f}%",
        f"{rf_train_test_acc * 100:.2f}%",
        f"{mlp_train_test_acc * 100:.2f}%"
    ],
    "Cross-validation Accuracy": [
        f"{svm_cv_acc * 100:.2f}%",
        f"{sgd_cv_acc * 100:.2f}%",
        f"{rf_cv_acc * 100:.2f}%",
        f"{mlp_cv_acc * 100:.2f}%"
    ]
}

summary_df = pd.DataFrame(data)
display(summary_df)

Unnamed: 0,Model,Train-test split Accuracy,Cross-validation Accuracy
0,SVM,89.00%,90.00%
1,SGD,87.42%,88.24%
2,RandomForest,93.41%,93.32%
3,MLP,90.77%,87.63%
