# Heart Disease Prediction

In this project, we will be prediciting whether a person has heart disease or not based on the attributes given

<b> Importing Libraries <b>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
hd = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')

In [None]:
hd.head()

<font size="4"> <b> Exploratory Data Analysis </font> <b>

In [None]:
hd.shape

In [None]:
hd.info()

In [None]:
hd.describe()

In [None]:
# Checking for null values
hd.isnull()

In [None]:
hd.isna().sum()

No missing values!

In [None]:
# Count of number of patients with heart disease and without
hd['target'].value_counts()

In [None]:
sns.set_style("darkgrid")
sns.countplot(x='target',data=hd)

In [None]:
plt.figure(figsize=(14,10))
sns.countplot(x='age', hue='target', data = hd, palette = 'colorblind',edgecolor = sns.color_palette('dark',n_colors = 1))

In [None]:
# Correlation
hd.corr()

In [None]:
plt.figure(figsize=(14,10))
sns.heatmap(hd.corr(), annot= True)

<font size="4"> <b> Splitting and Scaling the Data </font> <b>

In [None]:
X = hd.iloc[:, :-1].values
Y = hd.iloc[:, -1].values

In [None]:
# Train-test split 30-70
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.30, 
                                                    random_state=101)

In [None]:
#Scaling the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

<font size="4"> <b> Models and Accuracy </font> <b>

In [None]:
def models(X_train,y_train):
    
    #Logistic Regression
    from sklearn.linear_model import LogisticRegression
    log = LogisticRegression(random_state=0)
    log.fit(X_train, y_train)
    
    #Decision Tree
    from sklearn.tree import DecisionTreeClassifier
    decision_tree = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    decision_tree.fit(X_train, y_train)
    
    #Random Forest
    from sklearn.ensemble import RandomForestClassifier
    random_forest = RandomForestClassifier(n_estimators=10,criterion = 'entropy', random_state=0)
    random_forest.fit(X_train, y_train)
    
    #Model Accuracy on Training Data
    print('[0]Logistic Regression Training Acc:', log.score(X_train,y_train))
    print('[1]Decision Tree Training Acc:', decision_tree.score(X_train,y_train))
    print('[2]Random Forest Training Acc:', random_forest.score(X_train,y_train))
    
    return log, decision_tree, random_forest

In [None]:
model = models(X_train,y_train)

In [None]:
# Acc on Testing Data
from sklearn.metrics import confusion_matrix

for i in range(len(model)):
    print('Model ', i)
    cm = confusion_matrix(y_test, model[i].predict(X_test))

    tp = cm[0][0]
    tn = cm[1][1]
    fp = cm[1][0]
    fn = cm[0][1]

    print(cm)
    print('Testing Acc = ', (tp + tn)/(tp +tn +fn + fp))
    print()

<font size="4"> <b> Predictions </font> <b>

In [None]:
# Logistic Regression
pred = model[0].predict(X_test)
print(pred)
print()
print(y_test)

In [None]:
# Decision Tree
pred = model[1].predict(X_test)
print(pred)
print()
print(y_test)

In [None]:
# Random Forest
pred = model[1].predict(X_test)
print(pred)
print()
print(y_test)