In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('/content/diabetes_binary_health_indicators_BRFSS2015.csv')
X = df.drop(columns=['Diabetes_binary', 'HeartDiseaseorAttack', 'Age'])
y_diabetes = df['Diabetes_binary']
y_heart = df['HeartDiseaseorAttack']

# preprocessing and smote
def preprocess_data(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
    return X_train_resampled, X_test, y_train_resampled, y_test

X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes = preprocess_data(X, y_diabetes)

# ANN model
def create_ann(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Training the model
input_dim = X_train_diabetes.shape[1]
model_diabetes = create_ann(input_dim)
model_diabetes.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_diabetes.fit(X_train_diabetes, y_train_diabetes, validation_split=0.2, epochs=50, batch_size=32, callbacks=[early_stopping])

# Model Evaluation
y_pred_diabetes_proba = model_diabetes.predict(X_test_diabetes).flatten()
y_pred_diabetes = (y_pred_diabetes_proba >= 0.5).astype(int)

accuracy_diabetes = accuracy_score(y_test_diabetes, y_pred_diabetes)
precision_diabetes = precision_score(y_test_diabetes, y_pred_diabetes)
recall_diabetes = recall_score(y_test_diabetes, y_pred_diabetes)
f1_diabetes = f1_score(y_test_diabetes, y_pred_diabetes)
auc_roc_diabetes = roc_auc_score(y_test_diabetes, y_pred_diabetes_proba)

print("\nDiabetes Prediction Metrics on Test Set:")
print(f"Accuracy: {accuracy_diabetes:.3f}")
print(f"Precision: {precision_diabetes:.3f}")
print(f"Recall: {recall_diabetes:.3f}")
print(f"F1 Score: {f1_diabetes:.3f}")
print(f"AUC-ROC: {auc_roc_diabetes:.3f}")

# Data pre processing for heart disease
X_train_heart, X_test_heart, y_train_heart, y_test_heart = preprocess_data(X, y_heart)

# Transfer learning is used here
model_heart = create_ann(input_dim)
# Transfer weights from the above model
model_heart.set_weights(model_diabetes.get_weights())
# Re-compiling  the model for heart disease
model_heart.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Fine-tuning the model for heart disease prediction
model_heart.fit(X_train_heart, y_train_heart, validation_split=0.2, epochs=50, batch_size=32, callbacks=[early_stopping])

# Evaluation of heart disease model
y_pred_heart_proba = model_heart.predict(X_test_heart).flatten()
y_pred_heart = (y_pred_heart_proba >= 0.5).astype(int)

accuracy_heart = accuracy_score(y_test_heart, y_pred_heart)
precision_heart = precision_score(y_test_heart, y_pred_heart)
recall_heart = recall_score(y_test_heart, y_pred_heart)
f1_heart = f1_score(y_test_heart, y_pred_heart)
auc_roc_heart = roc_auc_score(y_test_heart, y_pred_heart_proba)

print("\nHeart Disease Prediction Metrics on Test Set:")
print(f"Accuracy: {accuracy_heart:.3f}")
print(f"Precision: {precision_heart:.3f}")
print(f"Recall: {recall_heart:.3f}")
print(f"F1 Score: {f1_heart:.3f}")
print(f"AUC-ROC: {auc_roc_heart:.3f}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50

Diabetes Prediction Metrics on Test Set:
Accuracy: 0.783
Precision: 0.344
Recall: 0.628
F1 Score: 0.444
AUC-ROC: 0.817
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50

Heart Disease Prediction Metrics on Test Set:
Accuracy: 0.828
Precision: 0.285
Recall: 0.552
F1 Score: 0.376
AUC-ROC: 0.816
