In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import warnings

warnings.filterwarnings('ignore')

# Load dataset
data = pd.read_csv(r"D:\Data Mining\Data sets for lab report\Heart Attack\heart_attack_risk_dataset.csv")

# Data Preprocessing
data.columns = data.columns.str.strip()  # Remove unwanted spaces
data.fillna(data.median(), inplace=True)  # Fill missing values

# Convert categorical columns
categorical_cols = data.select_dtypes(include=['object']).columns
label_encoder = LabelEncoder()
for col in categorical_cols:
    data[col] = label_encoder.fit_transform(data[col])

# Normalize numerical columns
numerical_columns = ['Age', 'BMI', 'Cholesterol_Level', 'Resting_BP', 'Heart_Rate', 'Fasting_Blood_Sugar', 'Max_Heart_Rate_Achieved']
scaler = MinMaxScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# Define Features and Target
X = data.drop(columns=['Heart_Attack_Risk'])
y = data['Heart_Attack_Risk']

# Apply SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train KNN Model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predictions
y_pred = knn.predict(X_test)

# Accuracy Calculation
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

TypeError: Cannot convert [['Female' 'Male' 'Male' ... 'Male' 'Male' 'Male']
 ['Moderate' 'Moderate' 'Moderate' ... 'High' 'Low' 'High']
 ['Moderate' 'Low' 'Low' ... 'Low' 'Low' 'High']
 ...
 ['Reversible defect' 'Normal' 'Reversible defect' ... 'Fixed defect'
  'Reversible defect' 'Normal']
 ['Normal' 'ST-T abnormality' 'ST-T abnormality' ...
  'Left Ventricular Hypertrophy' 'Normal' 'ST-T abnormality']
 ['Low' 'Moderate' 'Low' ... 'Low' 'Moderate' 'Moderate']] to numeric