<a href="https://colab.research.google.com/github/njneubauer/code_hearted/blob/main/simple_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
# Read the csv file into a pandas DataFrame
heart_data = pd.read_csv('/content/heart_failure_clinical_records_dataset.csv')
heart_data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [3]:
simple_heart = heart_data[['age', 'anaemia', 'diabetes', 'high_blood_pressure', 'sex', 'smoking', 'DEATH_EVENT']]

In [4]:
simple_heart

Unnamed: 0,age,anaemia,diabetes,high_blood_pressure,sex,smoking,DEATH_EVENT
0,75.0,0,0,1,1,0,1
1,55.0,0,0,0,1,0,1
2,65.0,0,0,0,1,1,1
3,50.0,1,0,0,1,0,1
4,65.0,1,1,0,0,0,1
...,...,...,...,...,...,...,...
294,62.0,0,1,1,1,1,0
295,55.0,0,0,0,0,0,0
296,45.0,0,1,0,0,0,0
297,45.0,0,0,0,1,1,0


In [5]:
# Assign X (data) and y (target)
X = simple_heart.drop("DEATH_EVENT", axis=1)
y = simple_heart["DEATH_EVENT"]
print(X.shape, y.shape)

(299, 6) (299,)


In [6]:
# Spliting data into training and testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [7]:
X_test

Unnamed: 0,age,anaemia,diabetes,high_blood_pressure,sex,smoking
173,50.0,1,0,0,1,0
287,45.0,0,1,0,0,0
51,53.0,1,0,1,0,0
146,52.0,0,0,0,1,1
214,65.0,1,0,1,1,0
...,...,...,...,...,...,...
91,60.0,0,1,0,1,0
291,60.0,0,0,0,1,0
42,70.0,0,1,1,1,1
187,60.0,0,1,0,0,0


In [8]:
# Creating a logistic regression model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [9]:
# Fit (train) or model using the training data
classifier.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [10]:
# Validate the model using the test data
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.6919642857142857
Testing Data Score: 0.7466666666666667


In [11]:
# Make predictions
predictions = classifier.predict_proba(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

First 10 Predictions:   [[0.76097203 0.23902797]
 [0.80837018 0.19162982]
 [0.72377757 0.27622243]
 [0.78812738 0.21187262]
 [0.59932402 0.40067598]
 [0.68389017 0.31610983]
 [0.64153002 0.35846998]
 [0.76596968 0.23403032]
 [0.56090975 0.43909025]
 [0.78164913 0.21835087]]
First 10 Actual labels: [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]


In [12]:
l1 = [p[0] for p in predictions]
l2 = [p[1] for p in predictions]
predict_df = pd.DataFrame({"Prediction_0": l1, 'Prediction_1': l2, "Actual": y_test})
predict_df

Unnamed: 0,Prediction_0,Prediction_1,Actual
173,0.760972,0.239028,0
287,0.808370,0.191630,0
51,0.723778,0.276222,1
146,0.788127,0.211873,0
214,0.599324,0.400676,0
...,...,...,...
91,0.679363,0.320637,0
291,0.718354,0.281646,0
42,0.560910,0.439090,1
187,0.689876,0.310124,1


In [13]:
# Practicing inputting patient stats
test= [[90, 1, 0, 1, 0, 1]]
predict = classifier.predict_proba(test)
print(f"Prediction:   {predict[:10]}")

Prediction:   [[0.35913965 0.64086035]]


In [14]:
# save fitted model to file
import joblib
filename = 'simple_model.sav'
joblib.dump(classifier, filename)

['simple_model.sav']

In [15]:
# load and score the 's model
import joblib
# update file name with  file
filename = 'simple_model.sav'
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test, y_test)
print(result)

0.7466666666666667
