<a href="https://colab.research.google.com/github/sudeepnn/data_sceince/blob/main/day7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('https://talentcocomedia.s3.amazonaws.com/ml-assets/heart-disease.csv')

In [None]:
y = df['target']

In [None]:
x = df.drop('target',axis=1)

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)

# **Logistic Regression - GridSearchCV**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

lr_model = LogisticRegression(penalty='l2')

val = list(np.arange(1, 10, 1))

parameters = {'solver':('newton-cg', 'liblinear', 'sag', 'saga'), 'C':val}

clf_lr = GridSearchCV(lr_model, parameters)
clf_lr.fit(x_train, y_train)



In [None]:
print(clf_lr.best_params_)

{'C': 2, 'solver': 'liblinear'}


In [None]:
test_pred = clf_lr.predict(x_test)

In [None]:
acc_lr_gs = accuracy_score(test_pred,y_test)
acc_lr_gs

0.8571428571428571

# **KNeighborsClassifier - GridSearchCV**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier()

val = list(np.arange(1,30, 1))

parameters = {'n_neighbors':val}

clf_knn = GridSearchCV(neigh, parameters)
clf_knn.fit(x_train, y_train)

In [None]:
print(clf_knn.best_params_)

{'n_neighbors': 28}


In [None]:
test_pred_knn = clf_knn.predict(x_test)

In [None]:
accuracy_knn_gs = accuracy_score(test_pred_knn, y_test)
accuracy_knn_gs

0.6263736263736264

## **DecisionTreeClassifier - GridSearchCV**

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
clf = DecisionTreeClassifier(random_state=1024)

param_grid = {
              'max_features': ['auto', 'sqrt', 'log2'],
              'ccp_alpha': [0.1, .01, .001],
              'max_depth' : np.arange(5,10,1),
              'criterion' :['gini', 'entropy']
             }

clf_dtc = GridSearchCV(clf, param_grid=param_grid, cv=5, verbose=True)

clf_dtc.fit(x_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits




In [None]:
print(clf_dtc.best_params_)

{'ccp_alpha': 0.01, 'criterion': 'gini', 'max_depth': 6, 'max_features': 'auto'}


In [None]:
test_pred_dtc = clf_dtc.predict(x_test)

In [None]:
accuracy_dtc_gs = metrics.accuracy_score(y_test, test_pred_dtc)
accuracy_dtc_gs

0.8021978021978022

# **CONFUSION MATRIX**

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(test_pred,y_test)

array([[29,  3],
       [10, 49]])

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test,test_pred))

              precision    recall  f1-score   support

           0       0.91      0.74      0.82        39
           1       0.83      0.94      0.88        52

    accuracy                           0.86        91
   macro avg       0.87      0.84      0.85        91
weighted avg       0.86      0.86      0.85        91



# **KNeighborsClassifier**

In [None]:
confusion_matrix(test_pred_knn,y_test)

array([[27, 22],
       [12, 30]])

In [None]:
print(classification_report(y_test,test_pred_knn))

              precision    recall  f1-score   support

           0       0.55      0.69      0.61        39
           1       0.71      0.58      0.64        52

    accuracy                           0.63        91
   macro avg       0.63      0.63      0.63        91
weighted avg       0.64      0.63      0.63        91



# **DecisionTreeClassifier**

In [None]:
confusion_matrix(test_pred_dtc,y_test)

array([[29,  8],
       [10, 44]])

In [None]:
print(classification_report(y_test,test_pred_dtc))

              precision    recall  f1-score   support

           0       0.78      0.74      0.76        39
           1       0.81      0.85      0.83        52

    accuracy                           0.80        91
   macro avg       0.80      0.79      0.80        91
weighted avg       0.80      0.80      0.80        91



Thus by comparing the values of each models, we can say that DecisionTreeClassifier gives efficient recall and accuracy