In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt




In [2]:
# Load the entire dataset
df = pd.read_parquet(r"All_Relative_Results_Cleaned.parquet")
df_clean = df.dropna()
index = df_clean.columns.get_loc('time(s)')
df_right = df_clean.iloc[:, index+1:]

# Assuming the first column is not a feature
X = df_right.iloc[:, 1:]
Y = df_clean['Exercise']



In [3]:
#Encode labels
label_encoder = LabelEncoder()
Y_encoded = label_encoder.fit_transform(Y)


In [4]:

# Convert to PyTorch tensors
X_tensor = torch.tensor(X.values, dtype=torch.float32)
y_tensor = torch.tensor(Y_encoded, dtype=torch.long)

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X_tensor, y_tensor, test_size=0.99, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)



In [5]:
# Create PyTorch datasets and loaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
trainLoader = DataLoader(train_dataset, batch_size=32, shuffle=True)
testLoader = DataLoader(test_dataset, batch_size=32, shuffle=True)



In [6]:
# Initialize the Gradient Boosting classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, random_state=10)



In [7]:
# Perform cross-validation
y_pred_cv = cross_val_predict(gb_classifier, X_train, y_train, cv=5)



In [None]:
# Train the Random Forest model on the entire training set
gb_classifier_classifier.fit(X_train, y_train)



In [None]:
# Make predictions on the test set
y_pred_test = gb_classifier_classifier.predict(X_test)



In [None]:
# Evaluate accuracy on the cross validation set
accuracy_train = accuracy_score(y_train, y_pred_cv)
print(f'Accuracy on the train set: {accuracy_train:.5f}')


In [None]:
# Calculate accuracy
accuracy_test = accuracy_score(y_test, y_pred_test)
print(f'Accuracy on the test set: {accuracy_test * 100:.2f}%')



In [None]:
# Confusion matrix of the train 
conf_matrix = confusion_matrix(y_train, y_pred_cv)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.title('Confusion Matrix on the entire dataset')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:

# Visualize the confusion matrix of the test
conf_matrix = confusion_matrix(y_test, y_pred_test)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=range(1, 8), yticklabels=range(1, 8))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()