In [148]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import accuracy_score


In [149]:
# Load the dataset (e.g., Cleveland Heart Disease dataset)
df = pd.read_csv('/content/heart.csv')

In [151]:
print(df.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


In [152]:
# # Handle missing values
imputer = SimpleImputer(strategy='median')
df_imputed_median = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
print("\nDataFrame after Median Imputation:")
print(df_imputed_median)


DataFrame after Median Imputation:
      age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0    63.0  1.0  3.0     145.0  233.0  1.0      0.0    150.0    0.0      2.3   
1    37.0  1.0  2.0     130.0  250.0  0.0      1.0    187.0    0.0      3.5   
2    41.0  0.0  1.0     130.0  204.0  0.0      0.0    172.0    0.0      1.4   
3    56.0  1.0  1.0     120.0  236.0  0.0      1.0    178.0    0.0      0.8   
4    57.0  0.0  0.0     120.0  354.0  0.0      1.0    163.0    1.0      0.6   
..    ...  ...  ...       ...    ...  ...      ...      ...    ...      ...   
298  57.0  0.0  0.0     140.0  241.0  0.0      1.0    123.0    1.0      0.2   
299  45.0  1.0  3.0     110.0  264.0  0.0      1.0    132.0    0.0      1.2   
300  68.0  1.0  0.0     144.0  193.0  1.0      1.0    141.0    0.0      3.4   
301  57.0  1.0  0.0     130.0  131.0  0.0      1.0    115.0    1.0      1.2   
302  57.0  0.0  1.0     130.0  236.0  0.0      0.0    174.0    0.0      0.0   

     slope   ca

In [153]:
# Convert categorical variables (e.g., Gender, CP Level) into numeric values
label_encoder = LabelEncoder()
df['sex'] = label_encoder.fit_transform(df['sex'])
df['cp'] = label_encoder.fit_transform(df['cp'])

print(df)

     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0     63    1   3       145   233    1        0      150      0      2.3   
1     37    1   2       130   250    0        1      187      0      3.5   
2     41    0   1       130   204    0        0      172      0      1.4   
3     56    1   1       120   236    0        1      178      0      0.8   
4     57    0   0       120   354    0        1      163      1      0.6   
..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1      115      1      1.2   
302   57    0   1       130   236    0        0      174      0      0.0   

     slope  ca  thal  target  
0        0   0     1       1  
1        0   0     2     

In [154]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [155]:
X = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal']] # Replace 'target' with your target column name
y = df['target']  # Replace 'target' with the actual target column (e.g., disease presence)


In [156]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [157]:

# Standardize numerical features (e.g., Age, Cholesterol, etc.)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [158]:
# Train the model (RandomForestClassifier as an example)
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)


In [159]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.6721311475409836
Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.79      0.70        29
           1       0.75      0.56      0.64        32

    accuracy                           0.67        61
   macro avg       0.69      0.68      0.67        61
weighted avg       0.69      0.67      0.67        61





In [161]:
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

In [162]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.5245901639344263
Classification Report:
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        29
           1       1.00      0.09      0.17        32

    accuracy                           0.52        61
   macro avg       0.75      0.55      0.42        61
weighted avg       0.76      0.52      0.41        61





In [163]:
model = XGBClassifier(random_state=42)
model.fit(X_train_scaled, y_train)

In [164]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.6885245901639344
Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.86      0.72        29
           1       0.81      0.53      0.64        32

    accuracy                           0.69        61
   macro avg       0.72      0.70      0.68        61
weighted avg       0.72      0.69      0.68        61



In [165]:
model = KNeighborsClassifier()
model.fit(X_train_scaled, y_train)

In [166]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.5245901639344263
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        29
           1       0.52      1.00      0.69        32

    accuracy                           0.52        61
   macro avg       0.26      0.50      0.34        61
weighted avg       0.28      0.52      0.36        61



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [167]:
model = GaussianNB()
model.fit(X_train_scaled, y_train)

In [168]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.4098360655737705
Classification Report:
              precision    recall  f1-score   support

           0       0.32      0.21      0.25        29
           1       0.45      0.59      0.51        32

    accuracy                           0.41        61
   macro avg       0.38      0.40      0.38        61
weighted avg       0.39      0.41      0.39        61





In [169]:
model = MLPClassifier()
model.fit(X_train_scaled, y_train)



In [170]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.5573770491803278
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.10      0.18        29
           1       0.54      0.97      0.70        32

    accuracy                           0.56        61
   macro avg       0.65      0.54      0.44        61
weighted avg       0.64      0.56      0.45        61





In [171]:
# Predict on the test set
model = SVC(random_state=42)
model.fit(X_train_scaled, y_train)

In [172]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

Accuracy: 0.5245901639344263
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        29
           1       0.52      1.00      0.69        32

    accuracy                           0.52        61
   macro avg       0.26      0.50      0.34        61
weighted avg       0.28      0.52      0.36        61



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
