In [None]:
"""
Importing important library
"""

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #For visualisation
import seaborn as sns #For visualisation library

In [None]:
#Importing dataframe as df
df=pd.read_csv('../input/heart-disease-uci/heart.csv')

In [None]:
#Head of dataset
df.head()

In [None]:
"""
Exploratory Data Analysis
"""

In [None]:
#Data information
df.info()

In [None]:
#Describing data
df.describe()

In [None]:
#Visualising null values on heatmap
plt.figure(figsize=(10,10))
sns.heatmap(df.isnull(),cmap='viridis',cbar=False,yticklabels=False)

In [None]:
df.columns

In [None]:
sns.barplot(x='sex',y='trestbps',data=df)

In [None]:
sns.pairplot(df)

In [None]:
sns.heatmap(df.corr(),cmap='coolwarm')

In [None]:
df.columns

In [None]:
sns.jointplot(x='age',y='chol',data=df)

In [None]:
sns.jointplot(x='oldpeak',y='target',data=df)

In [None]:
sns.barplot(y='thalach',x='chol',data=df)

In [None]:
"""
Machine Learning

"""

In [None]:
"""
Importing machine learning module
"""
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
#Now dividing data into training set and testing set

In [None]:
X=df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak','slope', 'ca','thal']]
y=df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
lr=LinearRegression()
lg=LogisticRegression()
model = SVC()
dtree=DecisionTreeClassifier()

In [None]:
#Logistic Regression
lg.fit(X_train,y_train)

In [None]:
pred=lg.predict(X_test)

In [None]:
print(accuracy_score(y_test,pred)*100)

In [None]:
print('Classification Report')
print(classification_report(y_test,pred))
print('________________________________')
print('________________________________')
print('________________________________')
print('Accuracy Score ==',accuracy_score(y_test,pred)*100)

In [None]:
#Support vector machine

In [None]:
model.fit(X_train,y_train)

In [None]:
pred_sv=model.predict(X_test)

In [None]:
print('Classification report')
print(classification_report(y_test,pred_sv))
print('____________________________________________')
print('____________________________________________')
print('____________________________________________')
print("Accuracy Score == ",accuracy_score(y_test,pred_sv)*100)

In [None]:
#Grid search cv

In [None]:
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 

In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)

In [None]:
grid.fit(X_train,y_train)

In [None]:
grid.best_estimator_

In [None]:
grid.best_params_

In [None]:
grid.best_score_

In [None]:
grid_predictions = grid.predict(X_test)

In [None]:
print('Classification Report')
print(classification_report(y_test,grid_predictions))
print('________________________________________________________')
print('________________________________________________________')
print('________________________________________________________')


print("Accuracy Score == ",accuracy_score(y_test,grid_predictions)*100)

In [None]:
print(accuracy_score(y_test,grid_predictions)*100)

In [None]:
#KNearestNeighbor Classifier

In [None]:
#First take KNN 1
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train,y_train)

In [None]:
pred_kn = knn.predict(X_test)

In [None]:
print('Classification Report')
print(classification_report(y_test,pred_kn))
print('________________________________________________________')
print('________________________________________________________')
print('________________________________________________________')


print("Accuracy Score == ",accuracy_score(y_test,pred_kn)*100)

In [None]:
#It show n_neighbors=1 work properly
#Now try other n_neighbors that can give us good results

In [None]:
error_rate = []

# Will take some time
for i in range(1,40):
    
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,40),error_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
# NOW WITH K=23
knn = KNeighborsClassifier(n_neighbors=22)

knn.fit(X_train,y_train)
pred_knn = knn.predict(X_test)

In [None]:
print('WITH K=25')
print('Classification Report')
print(classification_report(y_test,pred_knn))
print('________________________________________________________')
print('________________________________________________________')
print('________________________________________________________')


print("Accuracy Score == ",accuracy_score(y_test,pred_knn)*100)

In [None]:
#It shows that it also doesn't work great like Logistic regression
#Now Try with Random forest classifier and Decision tree classifier

In [None]:
#Decision tree classifier

In [None]:
dtree.fit(X_train,y_train)

In [None]:
pred_dtree=dtree.predict(X_test)

In [None]:
print(classification_report(y_test,pred_dtree))
print('________________________________________________________')
print('________________________________________________________')
print('________________________________________________________')


print("Accuracy Score == ",accuracy_score(y_test,pred_dtree)*100)

In [None]:
#Random forest classifier

In [None]:
rfc = RandomForestClassifier(n_estimators=100)

In [None]:
rfc.fit(X_train,y_train)

In [None]:
pred_rfc=rfc.predict(X_test)

In [None]:
print(classification_report(y_test,pred_rfc))
print('________________________________________________________')
print('________________________________________________________')
print('________________________________________________________')


print("Accuracy Score == ",accuracy_score(y_test,pred_rfc)*100)

In [None]:
#Random forest classifier works very good so we choose Random forest classifier our model

In [None]:
#END

In [None]:
#Thank you