In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the dataset
dataset_file = '../../data/raw/dataset.csv'
dataset = pd.read_csv(dataset_file)

In [3]:
# Define X (features) and y (target variable)
X = dataset.drop(columns=['Disease'])
y = dataset['Disease']

In [4]:
# Identify categorical columns
categorical_cols = [col for col in X.columns if X[col].dtype == 'object']

In [5]:
# Label Encoding for categorical features
label_encoder = LabelEncoder()
for col in categorical_cols:
    X[col] = label_encoder.fit_transform(X[col])

# Label Encoding for target variable (Disease column)
label_encoder_y = LabelEncoder()
y_encoded = label_encoder_y.fit_transform(y)

In [6]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [7]:
# Create and train the SVM model
svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train, y_train)


In [8]:
# Predict on the test set
y_pred_svm = svm_model.predict(X_test)

In [9]:
# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm, output_dict=True)

In [11]:
# Print results
print(f"SVM Model Accuracy (Initial): {accuracy_svm:.4f}")
print("\nClassification Report (Initial):")
print(report_svm)


SVM Model Accuracy (Initial): 0.9695

Classification Report (Initial):
{'0': {'precision': 0.9, 'recall': 1.0, 'f1-score': 0.9473684210526315, 'support': 18.0}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 30.0}, '2': {'precision': 0.8695652173913043, 'recall': 0.8333333333333334, 'f1-score': 0.851063829787234, 'support': 24.0}, '3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 25.0}, '4': {'precision': 0.92, 'recall': 0.9583333333333334, 'f1-score': 0.9387755102040817, 'support': 24.0}, '5': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 23.0}, '6': {'precision': 1.0, 'recall': 0.9696969696969697, 'f1-score': 0.9846153846153847, 'support': 33.0}, '7': {'precision': 1.0, 'recall': 0.8695652173913043, 'f1-score': 0.9302325581395349, 'support': 23.0}, '8': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 21.0}, '9': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 15.0}, '10': {'precision': 1.0, 'recall': 1.0