In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import seaborn as sns
from sklearn.metrics import mean_squared_error,classification_report,f1_score,confusion_matrix

import xgboost as xgb
import lightgbm as lgb

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate, learning_curve,train_test_split,GridSearchCV, cross_val_score, RandomizedSearchCV, RepeatedStratifiedKFold
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import LocalOutlierFactor, KNeighborsClassifier

from warnings import filterwarnings
filterwarnings("ignore")

In [None]:
df = pd.read_csv("../input/heart-disease-uci/heart.csv")
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.describe().T

In [None]:
df.info()

In [None]:
while True:
    out_list = []
    for i in range(len(df.columns)):
        mean = df[df.columns[i]].mean()
        std = df[df.columns[i]].std()

        up_out = mean + (3*std)
        down_out = mean - (3*std)

        out_index = df[df[df.columns[i]]>up_out].index
        down_index = df[df[df.columns[i]]<down_out].index
        out_list.extend(out_index)
        out_list.extend(down_index)
    print(out_list)
    if(len(out_list)!=0):
        df.drop(out_list,axis=0,inplace=True)
    else:
        break

# Model

In [None]:
log_model = LogisticRegression()

gbc_model = GradientBoostingClassifier(random_state=14)

rfc_model = RandomForestClassifier(criterion='gini', n_estimators=999,max_depth=4, random_state=14)

lgb_model = lgb.LGBMClassifier(num_iterations=550, learning_rate=0.01055,max_depth=3, random_state=14)

xgb_model = xgb.XGBClassifier(objective="binary:hinge")

gnb_model = GaussianNB()

mlpc_model = MLPClassifier(random_state=14)

svc_model = SVC(probability=True)

knn_model = KNeighborsClassifier(n_neighbors=19,leaf_size=20)

In [None]:
df.head(3)

In [None]:
models = [log_model, gbc_model, rfc_model, lgb_model, xgb_model, gnb_model, mlpc_model, svc_model,knn_model]
model_names = ["LogisticRegression","GradientBoostingClassifier","RandomForestClassifier","LGBMClassifier","XGBClassifier","GaussianNB","MLPClassifier","SVC","KNeighborsClassifier"]

x = df.drop('target', axis=1)
y = df['target']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,stratify=y,random_state=42)

In [None]:
print("TEST SCORE\n\n")
print("-----------------------------------------")
for i in range(len(models)):
    model = models[i].fit(x_train,y_train)
    print(model_names[i],"Score = %",model.score(x_test,y_test))

In [None]:
print("F1 SCORE\n\n")
print("-----------------------------------------")
for i in range(len(models)):
    model = models[i].fit(x_train,y_train)
    print(model_names[i],"F1 Score = %",f1_score(y_test,model.predict(x_test))*100)

# Model Tuning

## LGBMClassifier

In [None]:
light_params = {"n_estimators":[100,500,1000,2000],
         "subsample":[0.6,0.8,1.0],
         "max_depth":[3,4,5,6,7],
         "learning_rate":[0.1,0.01,0.02,0.05],
         "min_child_samples":[2,5,10,20]}

In [None]:
light_cv = GridSearchCV(lgb_model,light_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)
light_cv.best_params_

In [None]:
light_tuned = lgb.LGBMClassifier(n_estimators = 100,
                         subsample= 0.6, 
                         max_depth=3 , 
                         learning_rate=0.02, 
                         min_child_samples= 20).fit(x_train,y_train)
print("LGBMClassifier","Score = %",light_tuned.score(x_test,y_test))
print("LGBMClassifier","F1 Score = %",f1_score(y_test,light_tuned.predict(x_test))*100)

## KNeighborsClassifier

In [None]:
knn_params = {"n_neighbors":np.arange(1,50),
         "leaf_size":[20,30,50,100,1000,2000]}

In [None]:
knn_cv = GridSearchCV(knn_model,knn_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)
knn_cv.best_params_

In [None]:
knn_tuned = KNeighborsClassifier(n_neighbors = 40, 
                                 leaf_size= 20).fit(x_train,y_train)
print("KNeighborsClassifier","Score = %",knn_tuned.score(x_test,y_test))
print("KNeighborsClassifier","F1 Score = %",f1_score(y_test,knn_tuned.predict(x_test))*100)

## MLPClassifier

In [None]:
mlpc_params = {"alpha":[0.1,0.2,0.02,0.01,0.005,0.0001,0.00001],
         "hidden_layer_sizes":[(10,20,30),(10,10,10),(100,100,100),(100,100),(3,5),(5,3),(10,10)],
         "solver":["lbfgs","sgd","adam"],
         "activation":["relu","logistic"]}

In [None]:
mlpc_cv = GridSearchCV(mlpc_model,mlpc_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)
mlpc_cv.best_params_

In [None]:
mlpc_tuned = MLPClassifier(alpha = 0.01, 
                          hidden_layer_sizes= (100,100),
                          solver="adam" ,
                          activation= "logistic").fit(x_train,y_train)
print("MLPClassifier","Score = %",mlpc_tuned.score(x_test,y_test))
print("MLPClassifier","F1 Score = %",f1_score(y_test,mlpc_tuned.predict(x_test))*100)

## RandomForestClassifier

In [None]:
r_forest_params = {"max_depth":[2,3,5,8,10,20],
         "max_features":[2,5,8],
         "n_estimators":[10,100,500,1000,2000],
         "min_samples_split":[2,5,10,20]}

In [None]:
rfc_cv = GridSearchCV(rfc_model,r_forest_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)
rfc_cv.best_params_

In [None]:
rfc_tuned = RandomForestClassifier(n_estimators = 100, 
                          max_depth= 3,
                          max_features= 2,
                          min_samples_split= 2).fit(x_train,y_train)
print("RandomForestClassifier","Score = %",rfc_tuned.score(x_test,y_test))
print("RandomForestClassifier","F1 Score = %",f1_score(y_test,rfc_tuned.predict(x_test))*100)

In [None]:
print("Tuned Scores\n\n")
print("-----------------------------------------")
print("LGBMClassifier","Score = %",light_tuned.score(x_test,y_test))
print("KNeighborsClassifier","Score = %",knn_tuned.score(x_test,y_test))
print("MLPClassifier","Score = %",mlpc_tuned.score(x_test,y_test))
print("RandomForestClassifier","Score = %",rfc_tuned.score(x_test,y_test))

In [None]:
print("Tuned F1 Scores\n\n")
print("-----------------------------------------")
print("LGBMClassifier","F1 Score = %",f1_score(y_test,light_tuned.predict(x_test))*100)
print("KNeighborsClassifier","F1 Score = %",f1_score(y_test,knn_tuned.predict(x_test))*100)
print("MLPClassifier","F1 Score = %",f1_score(y_test,mlpc_tuned.predict(x_test))*100)
print("RandomForestClassifier","F1 Score = %",f1_score(y_test,rfc_tuned.predict(x_test))*100)