In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model
from scipy.special import expit
from sklearn.model_selection import train_test_split

In [3]:
# Read in the file, “csv.csv”, and create a dataframe.
df = pd.read_csv('heart.csv')

In [4]:
df.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [5]:
y = df['output']

In [6]:
X = df[['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall']]

In [7]:
# Follow the Column Key table for guidance on features and target.
# Need to split the data into training set and test set. Hint: use train_test_split with test_size of .3 and random_state=25
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [8]:
len(df)

303

In [9]:
# How many samples are you using for training and for testing?
X_train.size

2756

In [84]:
# Create the model object using LogisticRegression.
lrm = linear_model.LogisticRegression(max_iter=5000)
lrm

LogisticRegression(max_iter=5000)

In [85]:
# Fit the classifier. If the learning doesn’t converge, add max_iter = 500 as an argument when creating the model in step 5.
lrm.fit(X_train, y_train)

LogisticRegression(max_iter=5000)

In [86]:
# What are the model’s coefficients and intercept?
lrm.coef_

array([[ 0.00555492, -0.9550283 ,  0.94277611, -0.00973355, -0.00270063,
        -0.05813984,  0.16980979,  0.02199382, -0.60722896, -0.55263209,
         0.42905335, -0.80762212, -1.32383434]])

In [87]:
lrm.intercept_

array([1.90020824])

In [88]:
# Make predictions using the test dataset.
predictions = lrm.predict(X_test)
predictions

array([1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 1, 1], dtype=int64)

In [89]:
# Create a dataframe with the test data, predicted results, and the actual values for heart disease.
results = X_test
results

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall
281,52,1,0,128,204,1,1,156,1,1.0,1,0,0
81,45,1,1,128,308,0,0,170,0,0.0,2,0,2
40,51,0,2,140,308,0,0,142,0,1.5,2,1,2
175,40,1,0,110,167,0,0,114,1,2.0,1,0,3
58,34,1,3,118,182,0,0,174,0,0.0,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,55,1,1,130,262,0,1,155,0,0.0,2,0,2
180,55,1,0,132,353,0,1,132,1,1.2,1,1,3
200,44,1,0,110,197,0,0,177,0,0.0,2,1,2
30,41,0,1,105,198,0,1,168,0,0.0,2,1,2


In [90]:
results = results.reset_index(drop=True)
results

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall
0,52,1,0,128,204,1,1,156,1,1.0,1,0,0
1,45,1,1,128,308,0,0,170,0,0.0,2,0,2
2,51,0,2,140,308,0,0,142,0,1.5,2,1,2
3,40,1,0,110,167,0,0,114,1,2.0,1,0,3
4,34,1,3,118,182,0,0,174,0,0.0,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,55,1,1,130,262,0,1,155,0,0.0,2,0,2
87,55,1,0,132,353,0,1,132,1,1.2,1,1,3
88,44,1,0,110,197,0,0,177,0,0.0,2,1,2
89,41,0,1,105,198,0,1,168,0,0.0,2,1,2


In [91]:
results['prediction'] = predictions
results

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,prediction
0,52,1,0,128,204,1,1,156,1,1.0,1,0,0,1
1,45,1,1,128,308,0,0,170,0,0.0,2,0,2,1
2,51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
3,40,1,0,110,167,0,0,114,1,2.0,1,0,3,0
4,34,1,3,118,182,0,0,174,0,0.0,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,55,1,1,130,262,0,1,155,0,0.0,2,0,2,1
87,55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
88,44,1,0,110,197,0,0,177,0,0.0,2,1,2,1
89,41,0,1,105,198,0,1,168,0,0.0,2,1,2,1


In [92]:
results['actual'] = df['output']
results

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,prediction,actual
0,52,1,0,128,204,1,1,156,1,1.0,1,0,0,1,1
1,45,1,1,128,308,0,0,170,0,0.0,2,0,2,1,1
2,51,0,2,140,308,0,0,142,0,1.5,2,1,2,1,1
3,40,1,0,110,167,0,0,114,1,2.0,1,0,3,0,1
4,34,1,3,118,182,0,0,174,0,0.0,2,0,2,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,55,1,1,130,262,0,1,155,0,0.0,2,0,2,1,1
87,55,1,0,132,353,0,1,132,1,1.2,1,1,3,0,1
88,44,1,0,110,197,0,0,177,0,0.0,2,1,2,1,1
89,41,0,1,105,198,0,1,168,0,0.0,2,1,2,1,1


In [93]:
# Using score method to get the accuracy of the model. Hint: should be ~.83 or 83%.
score = lrm.score(X_test, y_test)
score

0.8131868131868132