In [1]:
import datetime
import sqlalchemy
import pandas as pd
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Database Setup
engine = create_engine(f"postgresql+psycopg2://postgres:yourpasswordhere@localhost:5432/final_project")

# Reflect existing database into a new model
Base = automap_base()

# Reflect the tables
Base.prepare(autoload_with=engine)

# Create session (link) from Python to PG Admin
session = Session(engine)

# Create list and load to dataframe
heart_data_list = []
heart_data = engine.execute("SELECT * FROM heart_failure")
for results in heart_data:
    heart_failure = {}
    heart_failure["age"] = results[0]
    heart_failure["sex"] = results[1]
    heart_failure["chest_pain_type"] = results[2]
    heart_failure["resting_bp"] = results[3]
    heart_failure["cholesterol"] = results[4]
    heart_failure["fasting_bs"] = results[5]
    heart_failure["resting_ecg"] = results[6]
    heart_failure["max_hr"] = results[7]
    heart_failure["exercise_aniga"] = results[8]
    heart_failure["old_peak"] = results[9]
    heart_failure["st_slope"] = results[10]
    heart_failure["heart_disease"] = results[11]
    heart_data_list.append(heart_failure)

session.close()
heart_df = pd.DataFrame(heart_data_list)
heart_df

Unnamed: 0,age,sex,chest_pain_type,resting_bp,cholesterol,fasting_bs,resting_ecg,max_hr,exercise_aniga,old_peak,st_slope,heart_disease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [3]:
# Load the model from the file
logistic_resampled_model = joblib.load('../Logistic_Regression/Resources/model_resampled.pkl')
logistic_model = joblib.load('../Logistic_Regression/Resources/pickle_model.pkl')
random_forest_model = joblib.load('../Random_Forest/Resources/random_forest.pickle')

In [4]:
import tensorflow as tf
# Load the model from the file
auto_neural_network_model = tf.keras.models.load_model('../Neural_Network/Resources/auto_model.h5')
neural_network_model = tf.keras.models.load_model('../Neural_Network/Resources/manual_model.h5')

In [5]:
dummies = pd.get_dummies(heart_df)
dummies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 21 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   age                  918 non-null    int64  
 1   resting_bp           918 non-null    int64  
 2   cholesterol          918 non-null    int64  
 3   fasting_bs           918 non-null    int64  
 4   max_hr               918 non-null    int64  
 5   old_peak             918 non-null    float64
 6   heart_disease        918 non-null    int64  
 7   sex_F                918 non-null    uint8  
 8   sex_M                918 non-null    uint8  
 9   chest_pain_type_ASY  918 non-null    uint8  
 10  chest_pain_type_ATA  918 non-null    uint8  
 11  chest_pain_type_NAP  918 non-null    uint8  
 12  chest_pain_type_TA   918 non-null    uint8  
 13  resting_ecg_LVH      918 non-null    uint8  
 14  resting_ecg_Normal   918 non-null    uint8  
 15  resting_ecg_ST       918 non-null    uin

In [6]:
y = dummies["heart_disease"]
X = dummies.drop(columns=["heart_disease"])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [7]:
Scaler = StandardScaler()
X_Scaler = Scaler.fit(X_train)
X_train_scaled = X_Scaler.transform(X_train)
X_test_scaled = X_Scaler.transform(X_test)

In [14]:
model_loss, auto_model_accuracy = auto_neural_network_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss, manual_model_accuracy = neural_network_model.evaluate(X_test_scaled,y_test,verbose=2)

8/8 - 0s - loss: 0.4645 - accuracy: 0.8522 - 27ms/epoch - 3ms/step
8/8 - 0s - loss: 0.4737 - accuracy: 0.8261 - 25ms/epoch - 3ms/step


In [9]:
lrm_predictions = logistic_resampled_model.predict(X_test_scaled)
lrm_classification = classification_report(y_test, lrm_predictions,output_dict=True)
lrm_accuracy = lrm_classification['accuracy']
print(lrm_accuracy)

0.8260869565217391


In [11]:
lm_predictions = logistic_model.predict(X_test)
lm_classification = classification_report(y_test, lm_predictions,output_dict=True)
lm_accuracy = lm_classification['accuracy']
print(lm_accuracy)

0.8347826086956521


In [16]:
random_forest = pd.read_csv('../Random_Forest/Resources/random_forest_df.csv')
random_forest.head(5)

Unnamed: 0,age,resting_bp,cholesterol,fasting_bs,max_hr,old_peak,sex_F,chest_pain_type_ASY,chest_pain_type_ATA,chest_pain_type_NAP,exercise_aniga_Y,st_slope_Flat,st_slope_Up
0,40,140,289,0,172,0.0,0,0,1,0,0,0,1
1,49,160,180,0,156,1.0,1,0,0,1,0,1,0
2,37,130,283,0,98,0.0,0,0,1,0,0,0,1
3,48,138,214,0,108,1.5,1,1,0,0,1,1,0
4,54,150,195,0,122,0.0,0,0,0,1,0,0,1


In [17]:
y = dummies["heart_disease"]
X = random_forest

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [18]:
Scaler = StandardScaler()
X_Scaler = Scaler.fit(X_train)
X_train_scaled = X_Scaler.transform(X_train)
X_test_scaled = X_Scaler.transform(X_test)

In [33]:
rfm_predictions = random_forest_model.predict(X_test_scaled)
accuracy_classification = classification_report(y_test, rfm_predictions,output_dict = True)
rfm_accuracy = accuracy_classification['accuracy']
print(rfm_accuracy)

0.8434782608695652


In [34]:
names=['Random Forest','Logistic','Logistic Resampled','Auto Neural Network','Manual Neural Network']
accuracy=[rfm_accuracy, lm_accuracy, lrm_accuracy, auto_model_accuracy, manual_model_accuracy]
data = {'Model Name':names,'Accuracy':accuracy}
models_accuracy=pd.DataFrame(data)
models_accuracy.head(5)

Unnamed: 0,Model Name,Accuracy
0,Random Forest,0.843478
1,Logistic,0.834783
2,Logistic Resampled,0.826087
3,Auto Neural Network,0.852174
4,Manual Neural Network,0.826087


In [39]:
models_accuracy.to_csv('models_accuracy.csv', index = False)