In [1]:
import datetime
import sqlalchemy
import pandas as pd
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

In [2]:
# Database Setup
engine = create_engine(f"postgresql+psycopg2://postgres:postgres@localhost:5432/final_project")

# Reflect existing database into a new model
Base = automap_base()

# Reflect the tables
Base.prepare(autoload_with=engine)

# Create session (link) from Python to PG Admin
session = Session(engine)

# Create list and load to dataframe
heart_data_list = []
heart_data = engine.execute("SELECT * FROM heart_failure")
for results in heart_data:
    heart_failure = {}
    heart_failure["age"] = results[0]
    heart_failure["sex"] = results[1]
    heart_failure["chest_pain_type"] = results[2]
    heart_failure["resting_bp"] = results[3]
    heart_failure["cholesterol"] = results[4]
    heart_failure["fasting_bs"] = results[5]
    heart_failure["resting_ecg"] = results[6]
    heart_failure["max_hr"] = results[7]
    heart_failure["exercise_aniga"] = results[8]
    heart_failure["old_peak"] = results[9]
    heart_failure["st_slope"] = results[10]
    heart_failure["heart_disease"] = results[11]
    heart_data_list.append(heart_failure)

session.close()
heart_df = pd.DataFrame(heart_data_list)
heart_df

Unnamed: 0,age,sex,chest_pain_type,resting_bp,cholesterol,fasting_bs,resting_ecg,max_hr,exercise_aniga,old_peak,st_slope,heart_disease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [3]:
y = heart_df['heart_disease']
x = heart_df.drop(columns = 'heart_disease')

In [4]:
X = pd.get_dummies(x, columns=['sex', 'chest_pain_type', 'resting_ecg', 'exercise_aniga', 'st_slope'])

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    random_state = 1
)

In [6]:
# Import the LogisticRegression module from SKLearn
from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression(random_state = 1)

log_model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(random_state=1)

In [7]:
predictions = log_model.predict(X_test)

In [8]:
balanced_accuracy_score(y_test, predictions)

0.8870826360666189

In [9]:
conf_matrix = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(conf_matrix, 
                                index = ['Actual heart disease', 'Actual no heart disease'], 
                                columns = ['Predicted heart disease', 'Predicted no heart disease']
                              )
cm_df

Unnamed: 0,Predicted heart disease,Predicted no heart disease
Actual heart disease,79,10
Actual no heart disease,16,125


In [10]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.83      0.89      0.86        89
           1       0.93      0.89      0.91       141

    accuracy                           0.89       230
   macro avg       0.88      0.89      0.88       230
weighted avg       0.89      0.89      0.89       230



In [11]:
import pickle 

file_name = "pickle_model.pkl"
with open(file_name, 'wb') as f:
    pickle.dump(log_model, f)

# Load the model from the file
with open(file_name, 'rb') as f:
    p_model = pickle.load(f)

In [12]:
preds = p_model.predict(X_test)

In [13]:
print(classification_report(y_test,preds))

              precision    recall  f1-score   support

           0       0.83      0.89      0.86        89
           1       0.93      0.89      0.91       141

    accuracy                           0.89       230
   macro avg       0.88      0.89      0.88       230
weighted avg       0.89      0.89      0.89       230

