### Import Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

### Load the Fold-Change Results

In [2]:
# Load fold-change results
fold_change_data = pd.read_csv('../results/fold_change_results.csv')

# Ensure the file is loaded properly
print(fold_change_data.head())


       Unnamed: 0  log2_fold_change  absolute_fold_change   regulation
0    hsa-mir-518f         15.479993             15.479993  upregulated
1         subtype         13.808021             13.808021  upregulated
2    hsa-mir-520e         13.290780             13.290780  upregulated
3    hsa-mir-520d         13.166629             13.166629  upregulated
4  hsa-mir-1283-2         12.642423             12.642423  upregulated


### Prepare the Data

In [4]:
# Drop columns not needed for SVM classification
# We're keeping 'log2_fold_change' as the feature and using 'regulation' as the label
X = fold_change_data[['log2_fold_change']]  # Features: Only log2_fold_change
y = fold_change_data['regulation']  # Labels: Regulation status (upregulated/downregulated)

# Validate dimensions
print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")

# Preview the data
print("\nFeature sample:")
print(X.head())

print("\nLabel sample:")
print(y.head())


Features shape: (1882, 1)
Labels shape: (1882,)

Feature sample:
   log2_fold_change
0         15.479993
1         13.808021
2         13.290780
3         13.166629
4         12.642423

Label sample:
0    upregulated
1    upregulated
2    upregulated
3    upregulated
4    upregulated
Name: regulation, dtype: object


### Train-Test Split

In [5]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

Training set: (1505, 1)
Testing set: (377, 1)


### Train the SVM Classifier

In [6]:
# Initialize and train SVM
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Predictions
y_pred = svm_classifier.predict(X_test)


### Evaluate the Model

In [7]:
# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.95

Classification Report:
               precision    recall  f1-score   support

downregulated       0.89      1.00      0.94       146
  upregulated       1.00      0.92      0.96       231

     accuracy                           0.95       377
    macro avg       0.95      0.96      0.95       377
 weighted avg       0.96      0.95      0.95       377



### Save Results

In [8]:
# Save predictions
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
results.to_csv('../results/svm_baseline_results.csv', index=False)

print("Results saved to '../results/svm_baseline_results.csv'")


Results saved to '../results/svm_baseline_results.csv'
