In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
data = pd.read_csv('../input/heart-disease-uci/heart.csv')

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
# datatypes of the dataset
data.dtypes

In [None]:
data.info()

In [None]:
# shape of the dataset
data.shape

In [None]:
# Null value checking
data.isnull().sum()

# ***Analyzing the dataset***

In [None]:
sns.countplot(x = 'sex',hue = 'target',data = data)

In [None]:
corr=data.corr()
plt.figure(figsize=(10,10))
sns.heatmap(data.corr(),annot=True,cmap='PiYG')
plt.show()

In [None]:
corr=data.corr()
sections = [56, 66]
colors = ['c', 'y']

plt.pie(sections, colors=colors,
        startangle=90,
        explode = (0, 0),
        autopct = '%1.2f%%')

plt.axis('equal') # Try commenting this out.
plt.show()

In [None]:
sns.pairplot(data)
plt.show()

In [None]:
sns.boxplot(x='target',y='age',hue='sex',data=data)

In [None]:
sns.countplot(data.cp,hue=data.target)
plt.show()

In [None]:
sns.distplot(data.trestbps[data.target==0])
sns.distplot(data.trestbps[data.target==1])
plt.legend(['0','1'])
plt.show()

In [None]:
sns.countplot(data.restecg,hue=data.target)
plt.show()

In [None]:
sns.countplot(data.exang,hue=data.target)
def random_plots():
  xs = []
  ys = []
  
  for i in range(20):
    x = i
    y = np.random.randint(10)
    
    xs.append(x)
    ys.append(y)
  
  return xs, ys

fig = plt.figure()
ax1 = plt.subplot2grid((5, 2), (0, 0), rowspan=1, colspan=2)
ax2 = plt.subplot2grid((5, 2), (1, 0), rowspan=3, colspan=2)
ax3 = plt.subplot2grid((5, 2), (4, 0), rowspan=1, colspan=1)
ax4 = plt.subplot2grid((5, 2), (4, 1), rowspan=1, colspan=1)

x, y = random_plots()
ax1.plot(x, y)

x, y = random_plots()
ax2.plot(x, y)

x, y = random_plots()
ax3.plot(x, y)

x, y = random_plots()
ax4.plot(x, y)

plt.tight_layout()
plt.show()

In [None]:
sns.swarmplot(data.ca,hue=data.target)
plt.show()

In [None]:
sns.kdeplot(data.thal,hue=data.target)
plt.show()

In [None]:
sns.catplot(x="cp", y="chol",hue="sex",data=data, kind="bar")
plt.show()

In [None]:
plt.subplot(1,2,1)
sns.scatterplot(x='age',y='chol',hue='sex',data=data)
plt.subplot(1,2,2)
sns.scatterplot(x='age',y='chol',hue='target',data=data)

In [None]:
# Show the results of a linear regression within each dataset
sns.lmplot(x="trestbps", y="chol",data=data,hue="cp")
plt.show()

In [None]:
plt.figure(figsize=(5,5))
sns.heatmap(pd.DataFrame(data.corr()['target']).sort_values(by='target').transpose().drop('target',axis=1).transpose(),annot=True,cmap='CMRmap')

In [None]:
list = ['sex','cp','fbs','restecg','exang','slope','ca','thal']
m=1
plt.figure(figsize=(15,10))
for i in list:
    plt.subplot(3,3,m)
    sns.countplot(x=i,data=data,hue='target')
    m+=1

In [None]:
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
sns.histplot(data=data,hue='target',x='thalach',bins=20,element='poly')
plt.subplot(1,2,2)
sns.histplot(data=data,hue='target',x='chol',bins=20,element='poly')

# ***Feature Selection***

In [None]:
X = data.drop('target',axis=1)
Y = data['target']

In [None]:
from sklearn.feature_selection import SelectKBest, chi2
fs = SelectKBest(score_func=chi2, k='all')
fs.fit(X, Y)
per = []
for i in fs.scores_:
    per.append(round(((i/sum(fs.scores_))*100),3))

features_data = pd.DataFrame({'Feature':X.columns,'Scores':fs.scores_,'Importance (%)':per}).sort_values(by=['Scores'],ascending=False)

plt.figure(figsize=(9,4))
sns.barplot( 'Importance (%)','Feature',orient='h',data=features_data,palette='CMRmap')
insignificant = features_data.loc[features_data['Importance (%)']<0.005]['Feature'].unique()
features_data = features_data.set_index('Feature')
features_data

# ***LogisticRegression***

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import train_test_split

In [None]:
target = data['target']
detail = data.drop('target',axis=1)

In [None]:
target.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(detail, target, test_size=0.3)

In [None]:
np.array(y_test)

In [None]:
np.array(X_test)

In [None]:
#input and output selection
ip=data.drop(['target'],axis=1)
op=data['target']

In [None]:
from sklearn.model_selection import train_test_split
xtr,xts,ytr,yts=train_test_split(ip,op,test_size=0.3)

In [None]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
sc.fit(xtr)
xtr=sc.transform(xtr)
xts=sc.transform(xts)

In [None]:
from sklearn.linear_model import LogisticRegression
alg=LogisticRegression()

In [None]:
#train the algorithm with the training data
alg.fit(xtr,ytr)
yp=alg.predict(xts)

# ***Checking the Accuracy of the Model***

In [None]:
from sklearn import metrics
cm=metrics.confusion_matrix(yts,yp)
print(cm)

In [None]:
accuracy=metrics.accuracy_score(yts,yp)
print(accuracy)

In [None]:
precission=metrics.precision_score(yts,yp)
print(precission)

In [None]:
recall=metrics.recall_score(yts,yp)
print(recall)

# ***Naive Bayes classifier***

In [None]:
from sklearn.model_selection import train_test_split
xtr,xts,ytr,yts=train_test_split(ip,op,test_size=0.2)

In [None]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
sc.fit(xtr)
xtr=sc.transform(xtr)
xts=sc.transform(xts)

In [None]:
from sklearn.naive_bayes import GaussianNB
GNB=GaussianNB()
GNB.fit(xtr,ytr)
yp=GNB.predict(xts)

In [None]:
from sklearn import metrics
cm=metrics.confusion_matrix(yts,yp)
print(cm)

In [None]:
accuracy=metrics.accuracy_score(yts,yp)
print(accuracy)

In [None]:
recall=metrics.recall_score(yts,yp)
print(recall)

**K-NEAREST NEIGHBOUR**

In [None]:
#KNN algorithm the nearest distance is calculated
from sklearn.neighbors import KNeighborsClassifier

neighbors=np.arange(1,9)
train_accuracy=np.empty(len(neighbors))
test_accuracy=np.empty(len(neighbors))

for i,k in enumerate(neighbors):
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(xtr,ytr)
    train_accuracy[i]=knn.score(xtr,ytr)
    test_accuracy[i]=knn.score(xts,yts)

plt.xlabel('neighbors of number')
plt.ylabel('accuracy')
plt.title('k-NN Varying number of neighbors')
plt.plot(neighbors, test_accuracy, label='Testing Accuracy')
plt.plot(neighbors, train_accuracy, label='Training accuracy')
plt.legend()
plt.show()

# **SVC**

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [None]:
X_train, X_test, y_train, y_test = train_test_split(detail, target, test_size=0.3)

In [None]:
grid={'C':[1,10,100,1000,10000,100000,1000000],'gamma':[0.0000001,0.000001,0.00001,0.0001,0.001,.01,0.1,1]}

In [None]:
g=GridSearchCV(SVC(),grid,verbose=2)

In [None]:
g.fit(X_train,y_train)

In [None]:
g.best_estimator_

In [None]:
Prediction4=g.predict(X_test)

In [None]:
np.array(y_test)

In [None]:
Prediction4

In [None]:
print(classification_report(y_test,Prediction4))

In [None]:
print(confusion_matrix(y_test,Prediction4))

----------------------------------------------------------------