In [0]:
from google.colab import files
files.upload()

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [0]:
data = pd.read_csv("heart.csv")
disease_data = pd.DataFrame(data)

In [0]:
disease_data.shape

In [0]:
disease_data.describe()

In [0]:
disease_data.isnull().sum()

In [0]:
disease_data.isna().sum()

In [0]:
disease_data

In [0]:
ax = disease_data['target'].value_counts().plot(kind='bar')
ax.set_xlabel("Heart Disease")
ax.set_ylabel("Count")
plt.show()

In [0]:
sex_df = disease_data.groupby(['target','sex'])['sex'].count().unstack('target')
sex_plot = sex_df.plot.bar()
plt.title('Frequency in of Heart Disease as per Sex')  
plt.xlabel('Sex')  
plt.ylabel('Count')  
plt.show()

In [0]:
sns.countplot(x="cp", hue="target", data=disease_data, palette="Set2")

In [0]:
sns.catplot(x="cp", hue="sex", col="target",data=disease_data, kind="count", palette="Set1")

In [0]:
sns.boxplot(x="target", y="thalach", data=disease_data)

In [0]:
# Correlation HeatMap
corr = disease_data.corr()
plt.figure(figsize=(12,10)) 
sns.heatmap(corr, annot=True)

In [0]:
# Split data into Train and Test Sets
regg_data = disease_data
x1 = regg_data.drop(['target'],axis=1)
X = (x1 - np.min(x1))/(np.max(x1)-np.min(x1)).values
y = disease_data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [0]:
# Logistic Regression
logisticRegr = LogisticRegression()
clf = logisticRegr.fit(X_train, y_train)
predictions = logisticRegr.predict(X_test)
LR_score = logisticRegr.score(X_test, y_test)
print(LR_score)

In [0]:
# Confusion Matrix
def print_ConfusionMatrix(actual, pred,score):
  cm = metrics.confusion_matrix(actual, pred)
  plt.figure(figsize=(9,9))
  sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
  plt.ylabel('Actual label');
  plt.xlabel('Predicted label');
  all_sample_title = 'Accuracy Score: {0}'.format(score)
  plt.title(all_sample_title)

print_ConfusionMatrix(y_test, LR_pred,LR_score)

In [0]:
# Naive Bayes
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
total_points = X_test.shape[0]
mislabelled_points = (y_test != y_pred).sum()
print("Number of mislabeled points out of a total %d points : %d"% (X_test.shape[0], (y_test != y_pred).sum()))

In [0]:
# K-Nearest Neighbors
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)
y_pred = neigh.predict(X_test)
KNN_score = logisticRegr.score(X_test, y_pred)
print(KNN_score)

In [0]:
print_ConfusionMatrix(y_test, y_pred,KNN_score)