In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

# Load Heart Disease Dataset in Pandas DataFrame
heart_data = pd.read_csv("heart.csv")

# Convert Text Columns to Numbers using Label Encoding and One-Hot Encoding
label_encoder = LabelEncoder()
heart_data['ChestPainType'] = label_encoder.fit_transform(heart_data['ChestPainType'])

one_hot_encoder = OneHotEncoder(sparse=False)
sex_encoded = one_hot_encoder.fit_transform(heart_data[['Sex']])
sex_encoded_df = pd.DataFrame(sex_encoded, columns=['Sex_0', 'Sex_1'])

heart_data = pd.concat([heart_data, sex_encoded_df], axis=1)
heart_data.drop('Sex', axis=1, inplace=True)

# Apply Scaling
scaler = StandardScaler()
cols_to_scale = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']
heart_data[cols_to_scale] = scaler.fit_transform(heart_data[cols_to_scale])

# Split Data into Training and Testing Sets
X = heart_data.drop('HeartDisease', axis=1)
y = heart_data['HeartDisease']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Build Classification Models and Check Accuracy
models = {
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier()
}

print("Before PCA:")
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy}")

# Apply PCA and Retrain the Models
pca = PCA(n_components=5)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

print("\nAfter PCA:")
for name, model in models.items():
    model.fit(X_train_pca, y_train)
    y_pred = model.predict(X_test_pca)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy after PCA: {accuracy}")


Before PCA:




ValueError: could not convert string to float: 'Normal'