In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, matthews_corrcoef
from sklearn.model_selection import LeaveOneOut
import numpy as np

from feature_extraction import FeatureExtraction

# Load the dataset
data = pd.read_excel('../data/Final_2Sm_modified_with_sequences.xlsx')

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit the encoder to the folding_type column and transform it to numeric labels
data['folding_type'] = label_encoder.fit_transform(data['folding_type'])

# Now, when you extract labels for model training:
labels = data['folding_type'].values

# Initialize the FeatureExtraction class
feature_extraction = FeatureExtraction()

# Feature extraction using AAC with length
features = np.array([feature_extraction.calculate_dde(seq) for seq in data['sequence']])

# SVM with Leave-One-Out Cross-Validation (LOOCV)
loo = LeaveOneOut()
y_true, y_pred = [], []
for train_index, test_index in loo.split(features):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    clf = SVC(kernel='linear')
    clf.fit(X_train, y_train)
    y_pred.append(clf.predict(X_test)[0])
    y_true.append(y_test[0])

In [3]:
# Calculate and display the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display the accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\nAccuracy (ACC): {accuracy:.2f}")

# Calculate and display the Matthews Correlation Coefficient (MCC)
mcc = matthews_corrcoef(y_true, y_pred)
print(f"Matthews Correlation Coefficient (MCC): {mcc:.2f}")

# Generate classification report
report = classification_report(y_true, y_pred, zero_division=0)
print("\nClassification Report:")
print(report)

Confusion Matrix:
[[59 30]
 [25 27]]

Accuracy (ACC): 0.61
Matthews Correlation Coefficient (MCC): 0.18

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.66      0.68        89
           1       0.47      0.52      0.50        52

    accuracy                           0.61       141
   macro avg       0.59      0.59      0.59       141
weighted avg       0.62      0.61      0.61       141
