In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV


print('setup complete')

In [None]:
df = pd.read_csv("../input/health-care-data-set-on-heart-attack-possibility/heart.csv")
df

In [None]:
df.info()

In [None]:
plt.figure(figsize=(20,17))
correlation=df.corr()
import seaborn as sns
sns.heatmap(correlation, annot=True)

In [None]:
df.hist(bins=50, figsize=(20,17))

In [None]:
target= df.target
data=df.drop('target',axis=1)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.2)

In [None]:
#          LOGISTIC REGRESSION

from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(solver='liblinear', tol=0.1, C=1)
lr.fit(x_train, y_train)
lr.score(x_test, y_test)

In [None]:
lr_predict= lr.predict(x_test)
from sklearn.metrics import confusion_matrix
cmlr=confusion_matrix(y_test, lr_predict)
cmlr

plt.figure(figsize=(12,7))
sns.heatmap(cmlr, annot=True)
plt.xlabel('predicted')
plt.ylabel('Truth')

In [None]:
#     DECISION TREE

from sklearn import tree
tree=tree.DecisionTreeClassifier()
tree.fit(x_train, y_train)
tree.score(x_test, y_test)

In [None]:
tree_pred=tree.predict(x_test)

from sklearn.metrics import confusion_matrix
cmtree=confusion_matrix(y_test, tree_pred)
cmtree

plt.figure(figsize=(12,7))
sns.heatmap(cmtree, annot=True)
plt.xlabel('predicted')
plt.ylabel('Truth')

In [None]:
#   RANDOM FOREST
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(n_estimators=100, criterion='entropy')
rf.fit(x_train, y_train)
rf.score(x_test, y_test)

In [None]:
rf_pred= rf.predict(x_test)


from sklearn.metrics import confusion_matrix
cmrf=confusion_matrix(y_test, rf_pred)
cmrf

plt.figure(figsize=(12,7))
sns.heatmap(cmrf, annot=True)
plt.xlabel('predicted')
plt.ylabel('Truth')

In [None]:
#            NAIVE BAYES

from sklearn.naive_bayes import GaussianNB
nb=GaussianNB()
nb.fit(x_train, y_train)
nb.score(x_test, y_test)

In [None]:
nb_pred= rf.predict(x_test)


from sklearn.metrics import confusion_matrix
cmnb=confusion_matrix(y_test, nb_pred)
cmnb

plt.figure(figsize=(12,7))
sns.heatmap(cmnb, annot=True)
plt.xlabel('predicted')
plt.ylabel('Truth')

In [None]:
#   SUPPORT VECTOR MACHINE
from sklearn.svm import SVC
svm=SVC()
svm.fit(x_train, y_train)
svm.score(x_test, y_test)

In [None]:
svm_pred= svm.predict(x_test)


from sklearn.metrics import confusion_matrix
cmsvm=confusion_matrix(y_test, svm_pred)
cmsvm

plt.figure(figsize=(12,7))
sns.heatmap(cmsvm, annot=True)
plt.xlabel('predicted')
plt.ylabel('Truth')

In [None]:
model_params={
    'svm':{
        'model':SVC(gamma='auto'),
        'params':{
        'C':[1,10,20],
        'kernel':['rbf','linear']
         }
    },
    'random_forest':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimators':[1,5,10]
        }
    },
    'logistic_regression':{
        'model':LogisticRegression(solver='liblinear',multi_class='auto'),
        'params':{
            'C':[1,5,10]
        }
    },
    'decision_tree':{
        'model':RandomForestClassifier(),
        'params':{
            'criterion':['gini','entropy']
        }
        
    }
        
}

In [None]:
scores=[]
for model_name,mp in model_params.items():
    clf= GridSearchCV(mp['model'],mp['params'],cv=3,return_train_score=False)
    clf.fit(data,target)
    scores.append({
        'model':model_name,
        'best_score':clf.best_score_,
        'best_params':clf.best_params_
    })

In [None]:
df=pd.DataFrame(scores,columns=['model','best_score','best_params'])
df