In [1]:
import pandas as pd

train_df = pd.read_csv('../dataset/jmeter_train.csv')
test_df = pd.read_csv('../dataset/jmeter_test.csv')

In [2]:
features = ['F72', 'F25', 'F65', 'F68', 'F101', 'F104', 'F105', 'F15-NA',
       'F15-private', 'F15-protected', 'F15-public', 'F22', 'F123', 'F77',
       'F41', 'F126']

X_train = train_df[features]
y_train = train_df['label']

X_test = test_df[features]
y_test = test_df['label']


In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
from sklearn.decomposition import PCA

# Apply PCA to retain 95% of the variance
pca = PCA(n_components=0.95)

# Fit PCA on the training data and transform it
X_train_pca = pca.fit_transform(X_train_scaled)

# Transform the test data using the same PCA transformation
X_test_pca = pca.transform(X_test_scaled)

# number of components retained
print(f"Number of principal components: {X_train_pca.shape[1]}")

Number of principal components: 12


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, f1_score, precision_score, recall_score

model = RandomForestClassifier(random_state=42)
model.fit(X_train_pca, y_train)

y_pred = model.predict(X_test_pca)

# Evaluate model
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"F1 Score: {f1_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred)}")
print(f"Recall: {recall_score(y_test, y_pred)}")
print(f"AUC Score: {roc_auc_score(y_test, model.predict_proba(X_test_pca)[:, 1])}")

print(classification_report(y_test, y_pred))

Accuracy: 0.866231647634584
F1 Score: 0.646551724137931
Precision: 0.8620689655172413
Recall: 0.5172413793103449
AUC Score: 0.9159961685823754
              precision    recall  f1-score   support

           0       0.87      0.97      0.92       468
           1       0.86      0.52      0.65       145

    accuracy                           0.87       613
   macro avg       0.86      0.75      0.78       613
weighted avg       0.87      0.87      0.85       613

