In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
df = pd.read_csv('Medicaldataset.csv')
df

Unnamed: 0,age,gender,impulse,pressurehight,pressurelow,glucose,kcm,troponin,class
0,64,1,66,160,83,160.0,1.80,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.060,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative
...,...,...,...,...,...,...,...,...,...
1314,44,1,94,122,67,204.0,1.63,0.006,negative
1315,66,1,84,125,55,149.0,1.33,0.172,positive
1316,45,1,85,168,104,96.0,1.24,4.250,positive
1317,54,1,58,117,68,443.0,5.80,0.359,positive


In [3]:
# Convert glucose column to integer
df['glucose'] = df['glucose'].astype(np.int64)
# Convert class labels to 0,1
df['class'].replace({'negative': 0, 'positive': 1, 'positive            ': 1}, inplace=True)
# Convert class column to integer
df['class'] = pd.to_numeric(df['class'])

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['class'].replace({'negative': 0, 'positive': 1, 'positive            ': 1}, inplace=True)
  df['class'].replace({'negative': 0, 'positive': 1, 'positive            ': 1}, inplace=True)


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1319 entries, 0 to 1318
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   age            1319 non-null   int64  
 1   gender         1319 non-null   int64  
 2   impulse        1319 non-null   int64  
 3   pressurehight  1319 non-null   int64  
 4   pressurelow    1319 non-null   int64  
 5   glucose        1319 non-null   int64  
 6   kcm            1319 non-null   float64
 7   troponin       1319 non-null   float64
 8   class          1319 non-null   int64  
dtypes: float64(2), int64(7)
memory usage: 92.9 KB


In [5]:
# Feature selection and target variable
X = df.drop('class', axis=1)  # Replace 'target' with the actual target column name
y = df['class']

In [6]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Training the SVM classifier
svm = SVC(kernel='linear',degree=2,C=100, gamma='scale')  # You can also use 'rbf', 'poly', etc. and tune other parameters
svm.fit(X_train, y_train)

# Making predictions
y_pred = svm.predict(X_test)

In [9]:
# Evaluating the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[ 93   8]
 [ 13 150]]
              precision    recall  f1-score   support

           0       0.88      0.92      0.90       101
           1       0.95      0.92      0.93       163

    accuracy                           0.92       264
   macro avg       0.91      0.92      0.92       264
weighted avg       0.92      0.92      0.92       264



In [10]:
from sklearn import metrics

# Calculate Classification metrics: Accuracy, Precision, Recall
accuracy = metrics.accuracy_score(y_test, y_pred)
Precision = metrics.precision_score(y_test, y_pred)
Recall = metrics.recall_score(y_test, y_pred)
print("Accuracy :",accuracy)
print("Precision:",Precision)
print("Recall   :",Recall)

Accuracy : 0.9204545454545454
Precision: 0.9493670886075949
Recall   : 0.9202453987730062


In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

In [12]:
# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4]  # Only relevant for 'poly' kernel
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the model
grid_search.fit(X_train, y_train)

In [13]:
# Print the best parameters and the best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation accuracy: {:.2f}".format(grid_search.best_score_))

# Use the best estimator to make predictions
best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test)

# Evaluate the model
print("Test set accuracy: {:.2f}".format(accuracy_score(y_test, y_pred)))
print("Classification report:\n", classification_report(y_test, y_pred))

Best parameters found:  {'C': 100, 'degree': 2, 'gamma': 'scale', 'kernel': 'linear'}
Best cross-validation accuracy: 0.92
Test set accuracy: 0.92
Classification report:
               precision    recall  f1-score   support

           0       0.88      0.92      0.90       101
           1       0.95      0.92      0.93       163

    accuracy                           0.92       264
   macro avg       0.91      0.92      0.92       264
weighted avg       0.92      0.92      0.92       264



In [None]:
# Print best parameters
print("Best parameters:", grid_search.best_params_)

In [None]:
from sklearn import metrics

# Calculate Classification metrics: Accuracy, Precision, Recall
accuracy = metrics.accuracy_score(y_test, y_pred)
Precision = metrics.precision_score(y_test, y_pred)
Recall = metrics.recall_score(y_test, y_pred)
print("Accuracy :",accuracy)
print("Precision:",Precision)
print("Recall   :",Recall)