In [20]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.metrics import recall_score,precision_score, f1_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_curve,roc_auc_score, auc


import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_csv("heart.csv")
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [4]:
df['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

In [5]:
x=df.drop("target", axis=1)

y=df['target']

In [6]:
x_train, x_test, y_train, y_test =train_test_split(x,y, test_size=0.20, random_state=1, stratify=y)

## create instance

In [7]:
model= LogisticRegression()
model.fit(x_train, y_train)

LogisticRegression()

## Model evaluation

In [15]:
## testing data

y_pred = model.predict(x_test)

confusion_matrix= confusion_matrix(y_test, y_pred)

classification_report = classification_report(y_test, y_pred)

accuracy_score = accuracy_score(y_test,y_pred)

print("confusion_matrix:\n", confusion_matrix)
print("*"*80)

print("classification_report>>", classification_report)
print("*"*80)

print("accuracy_score>>",accuracy_score)
print("*"*80)

confusion_matrix:
 [[22  6]
 [ 4 29]]
********************************************************************************
classification_report>>               precision    recall  f1-score   support

           0       0.85      0.79      0.81        28
           1       0.83      0.88      0.85        33

    accuracy                           0.84        61
   macro avg       0.84      0.83      0.83        61
weighted avg       0.84      0.84      0.84        61

********************************************************************************
accuracy_score>> 0.8360655737704918
********************************************************************************


In [21]:
## training data

y_pred_train = model.predict(x_train)

confusion_matrix= confusion_matrix(y_train, y_pred_train)

classification_report = classification_report(y_train, y_pred_train)

accuracy_score = accuracy_score(y_train, y_pred_train)

print("confusion_matrix:\n", confusion_matrix)
print("*"*80)

print("classification_report>>", classification_report)
print("*"*80)

print("accuracy_score>>",accuracy_score)
print("*"*80)

confusion_matrix:
 [[ 87  23]
 [ 11 121]]
********************************************************************************
classification_report>>               precision    recall  f1-score   support

           0       0.89      0.79      0.84       110
           1       0.84      0.92      0.88       132

    accuracy                           0.86       242
   macro avg       0.86      0.85      0.86       242
weighted avg       0.86      0.86      0.86       242

********************************************************************************
accuracy_score>> 0.859504132231405
********************************************************************************


## Testing on single row

In [28]:
column_names =x.columns
column_names

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')

In [23]:
x.head(1).T

Unnamed: 0,0
age,63.0
sex,1.0
cp,3.0
trestbps,145.0
chol,233.0
fbs,1.0
restecg,0.0
thalach,150.0
exang,0.0
oldpeak,2.3


In [25]:
age= 63.0
sex= 1.0
cp= 3.0
trestbp= 145.0
chol= 233.0
fbs= 1.0
restecg= 0.0
thalach= 150.0
exang= 0.0
oldpeak= 2.3
slope= 0.0
ca= 0.0
thal= 1.0

In [26]:
array= np.array([age,sex,cp,trestbp,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal])
array

array([ 63. ,   1. ,   3. , 145. , 233. ,   1. ,   0. , 150. ,   0. ,
         2.3,   0. ,   0. ,   1. ])

In [27]:
heart_prediction= model.predict([array])[0]
heart_prediction

1

In [31]:
project_data = {"column_names": list(column_names)}
project_data

{'column_names': ['age',
  'sex',
  'cp',
  'trestbps',
  'chol',
  'fbs',
  'restecg',
  'thalach',
  'exang',
  'oldpeak',
  'slope',
  'ca',
  'thal']}

In [34]:
import json

with open("heart.json", "w") as f:
    json.dump(project_data,f)

In [35]:
import pickle

with open("Heart.pkl", "wb") as f:
    pickle.dump(model,f)