In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler

In [2]:
#Read the data in and remove any missing values
#Remove any row with a missing value in any of the columns.
filename='./MuhammadOwais_Imran_Dataset_BP.csv'
nan_values=[]
att_type={'BP_Status':'category'}

df=pd.read_csv(filename,dtype=att_type,na_values=nan_values)

print(df[df.isna().any(axis=1)])

print(df.shape)
print(df.isnull().sum())
print()

print(f"shape : {df.shape}")
print(df.isnull().sum())
df.drop('Gender', axis=1, inplace=True)
# print(df.describe())
# df.head()
print(df.head())

Empty DataFrame
Columns: [Somker, Gender, Age, Height, Weight, BP_Status]
Index: []
(4403, 6)
Somker       0
Gender       0
Age          0
Height       0
Weight       0
BP_Status    0
dtype: int64

shape : (4403, 6)
Somker       0
Gender       0
Age          0
Height       0
Weight       0
BP_Status    0
dtype: int64
          Somker  Age  Height  Weight BP_Status
0  1- Non-smoker   29   62.50     140    Normal
1  1- Non-smoker   41   59.75     194      High
2    3- Moderate   57   62.25     132      High
3  1- Non-smoker   39   65.75     158    Normal
4  1- Non-smoker   58   61.75     131      High


In [3]:
# Data Transformation

# mapping smoker attribute
map_dict_smoker = {
    "1- Non-smoker": "1",
    "2- Light": "2",
    "3- Moderate": "3",
    "4- Heavy": "4",
    "5- Very Heavy": "5"
}
df['Somker'] = df['Somker'].map(map_dict_smoker)
print(df.head())

scaler = MinMaxScaler()
colToScale =  ["Age", "Weight", "Height"]
df[colToScale] = scaler.fit_transform(df[colToScale])

df.head()

  Somker  Age  Height  Weight BP_Status
0      1   29   62.50     140    Normal
1      1   41   59.75     194      High
2      3   57   62.25     132      High
3      1   39   65.75     158    Normal
4      1   58   61.75     131      High


Unnamed: 0,Somker,Age,Height,Weight,BP_Status
0,1,0.0,0.44,0.313305,Normal
1,1,0.363636,0.33,0.545064,High
2,3,0.848485,0.43,0.27897,High
3,1,0.30303,0.57,0.390558,Normal
4,1,0.878788,0.41,0.274678,High


In [4]:
print(df.dtypes)

Somker         object
Age           float64
Height        float64
Weight        float64
BP_Status    category
dtype: object


In [5]:
# Splitting the dataset
attr = df.drop(columns = ['BP_Status'])  # features
target = df['BP_Status']  # target variable
# attr.head()
# target.head()
attr_train, attr_test, target_train,target_test  = train_test_split(attr, target, test_size = 0.3, random_state = 44, shuffle = True)
target_train.head()
# attr_train.head()
target_test.head()


1182    High
2084    High
4386    High
2163    High
3323    High
Name: BP_Status, dtype: category
Categories (2, object): ['High', 'Normal']

In [6]:
k_values = [1, 3, 5, 7, 9]

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors = k, weights="distance")
    knn.fit(attr_train, target_train)
    target_pred = knn.predict(attr_test)
#     accuracy = round(np.mean(target_test==target_pred ) * 100, 2)
    print(f"====== K-Value: {k} ======")
    print(f"Model Accuracy: {accuracy_score(target_test, target_pred)}")
    print(f"Confusion Matrix:\n {confusion_matrix(target_test, target_pred)}\n")
    print(f"Classification Report:\n {classification_report(target_test, target_pred)}\n")

Model Accuracy: 0.5367146101438305
Confusion Matrix:
 [[360 320]
 [292 349]]

Classification Report:
               precision    recall  f1-score   support

        High       0.55      0.53      0.54       680
      Normal       0.52      0.54      0.53       641

    accuracy                           0.54      1321
   macro avg       0.54      0.54      0.54      1321
weighted avg       0.54      0.54      0.54      1321


Model Accuracy: 0.5579106737320212
Confusion Matrix:
 [[381 299]
 [285 356]]

Classification Report:
               precision    recall  f1-score   support

        High       0.57      0.56      0.57       680
      Normal       0.54      0.56      0.55       641

    accuracy                           0.56      1321
   macro avg       0.56      0.56      0.56      1321
weighted avg       0.56      0.56      0.56      1321


Model Accuracy: 0.5760787282361847
Confusion Matrix:
 [[405 275]
 [285 356]]

Classification Report:
               precision    recall  f1-