In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Example data loading
df = pd.read_csv('Medicaldataset.csv')
df.head()

Unnamed: 0,age,gender,impulse,pressurehight,pressurelow,glucose,kcm,troponin,class
0,64,1,66,160,83,160.0,1.8,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.06,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative


In [3]:
# Convert glucose column to integer
df['glucose'] = df['glucose'].astype(np.int64)
# Convert class labels to 0,1
df['class'].replace({'negative': 0, 'positive': 1, 'positive            ': 1}, inplace=True)
# Convert class column to integer
df['class'] = pd.to_numeric(df['class'])

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['class'].replace({'negative': 0, 'positive': 1, 'positive            ': 1}, inplace=True)
  df['class'].replace({'negative': 0, 'positive': 1, 'positive            ': 1}, inplace=True)


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1319 entries, 0 to 1318
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   age            1319 non-null   int64  
 1   gender         1319 non-null   int64  
 2   impulse        1319 non-null   int64  
 3   pressurehight  1319 non-null   int64  
 4   pressurelow    1319 non-null   int64  
 5   glucose        1319 non-null   int64  
 6   kcm            1319 non-null   float64
 7   troponin       1319 non-null   float64
 8   class          1319 non-null   int64  
dtypes: float64(2), int64(7)
memory usage: 92.9 KB


In [4]:
# Feature selection and target variable
X = df.drop('class', axis=1)  # Replace 'target' with the actual target column name
y = df['class']
y

0       0
1       1
2       0
3       1
4       0
       ..
1314    0
1315    1
1316    1
1317    1
1318    1
Name: class, Length: 1319, dtype: int64

In [5]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:
# Training the KNN classifier
knn = KNeighborsClassifier(n_neighbors=11)  # You can tune the number of neighbors
knn.fit(X_train, y_train)

In [8]:
# Making predictions
y_pred = knn.predict(X_test)


In [None]:
y_pred

In [9]:
# Evaluating the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 50  51]
 [ 40 123]]
              precision    recall  f1-score   support

           0       0.56      0.50      0.52       101
           1       0.71      0.75      0.73       163

    accuracy                           0.66       264
   macro avg       0.63      0.62      0.63       264
weighted avg       0.65      0.66      0.65       264



In [10]:
from sklearn import metrics

# Calculate Classification metrics: Accuracy, Precision, Recall
accuracy = metrics.accuracy_score(y_test, y_pred)
Precision = metrics.precision_score(y_test, y_pred)
Recall = metrics.recall_score(y_test, y_pred)
print("Accuracy :",accuracy)
print("Precision:",Precision)
print("Recall   :",Recall)

Accuracy : 0.6553030303030303
Precision: 0.7068965517241379
Recall   : 0.754601226993865


In [11]:
from sklearn.model_selection import GridSearchCV
# Define the parameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

In [12]:
# Set up GridSearchCV
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the model
grid_search.fit(X_train, y_train)

In [13]:
from sklearn.metrics import accuracy_score, classification_report
# Print the best parameters and the best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation accuracy: {:.2f}".format(grid_search.best_score_))

# Use the best estimator to make predictions
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

# Evaluate the model
print("Test set accuracy: {:.2f}".format(accuracy_score(y_test, y_pred)))
print("Classification report:\n", classification_report(y_test, y_pred))

Best parameters found:  {'algorithm': 'auto', 'n_neighbors': 11, 'weights': 'distance'}
Best cross-validation accuracy: 0.66
Test set accuracy: 0.64
Classification report:
               precision    recall  f1-score   support

           0       0.54      0.50      0.52       101
           1       0.70      0.74      0.72       163

    accuracy                           0.64       264
   macro avg       0.62      0.62      0.62       264
weighted avg       0.64      0.64      0.64       264



In [None]:
grid_search.best_score_

In [None]:
# Print best parameters
print("Best parameters:", grid_search.best_params_)