In [1]:
import pandas as pd
path=r"heart_2022_no_nans.xlsx"
df=pd.read_excel(path,engine="openpyxl")

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# Identify categorical and numerical columns
#categorical_cols = ["State", "Sex", "GeneralHealth", "LastCheckupTime", "PhysicalActivities", "RemovedTeeth"]
#numerical_cols = ["PhysicalHealthDays", "MentalHealthDays", "SleepHours"]

# Automatically identify categorical and numerical columns
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()  # Select string-based categorical columns
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()  # Select numerical columns

# Print results
print("Categorical Columns:", categorical_cols)
print("Numerical Columns:", numerical_cols)


Categorical Columns: ['State', 'Sex', 'GeneralHealth', 'LastCheckupTime', 'PhysicalActivities', 'RemovedTeeth', 'HadHeartAttack', 'HadAngina', 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD', 'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis', 'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty', 'DifficultyConcentrating', 'DifficultyWalking', 'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus', 'ECigaretteUsage', 'ChestScan', 'RaceEthnicityCategory', 'AgeCategory', 'AlcoholDrinkers', 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver', 'TetanusLast10Tdap', 'HighRiskLastYear', 'CovidPos']
Numerical Columns: ['PhysicalHealthDays', 'MentalHealthDays', 'SleepHours', 'HeightInMeters', 'WeightInKilograms', 'BMI']


In [3]:
# Encode categorical columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save encoders for later use

In [4]:
print(df.head())

   State  Sex  GeneralHealth  PhysicalHealthDays  MentalHealthDays  \
0      0    0              4                   4                 0   
1      0    1              4                   0                 0   
2      0    1              4                   0                 0   
3      0    0              1                   5                 0   
4      0    0              2                   3                15   

   LastCheckupTime  PhysicalActivities  SleepHours  RemovedTeeth  \
0                3                   1           9             3   
1                3                   1           6             3   
2                3                   0           8             1   
3                3                   1           9             3   
4                3                   1           5             0   

   HadHeartAttack  ...  HeightInMeters  WeightInKilograms    BMI  \
0               0  ...            1.60              71.67  27.99   
1               0  ...            

In [5]:

# Normalize numerical columns
scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Define target variable (Modify this based on what you want to predict)
target_col = "HadHeartAttack"  # Example target column
df[target_col] = LabelEncoder().fit_transform(df[target_col])  # Convert "Yes/No" to 1/0

# Split into features and target
X = df.drop(columns=[target_col])
y = df[target_col]

In [6]:
print(df.head())

   State  Sex  GeneralHealth  PhysicalHealthDays  MentalHealthDays  \
0      0    0              4           -0.014160         -0.514292   
1      0    1              4           -0.490020         -0.514292   
2      0    1              4           -0.490020         -0.514292   
3      0    0              1            0.104805         -0.514292   
4      0    0              2           -0.133125          1.336949   

   LastCheckupTime  PhysicalActivities  SleepHours  RemovedTeeth  \
0                3                   1    1.373428             3   
1                3                   1   -0.708924             3   
2                3                   0    0.679311             1   
3                3                   1    1.373428             3   
4                3                   1   -1.403041             0   

   HadHeartAttack  ...  HeightInMeters  WeightInKilograms       BMI  \
0               0  ...       -0.985904          -0.560199 -0.104105   
1               0  ...      

In [8]:
import pickle

# Save label encoders
with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

# Save the scaler
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

In [9]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=3, batch_size=16, validation_data=(X_test, y_test))
# Save model weights
model.save("health_model.weights.h5")


Epoch 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m12302/12302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 2ms/step - accuracy: 0.9437 - loss: 0.2004 - val_accuracy: 0.9467 - val_loss: 0.1660
Epoch 2/3
[1m12302/12302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9463 - loss: 0.1609 - val_accuracy: 0.9475 - val_loss: 0.1507
Epoch 3/3
[1m12302/12302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2ms/step - accuracy: 0.9460 - loss: 0.1571 - val_accuracy: 0.9478 - val_loss: 0.1501




In [10]:
model.save("health_model.h5")



In [11]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9463 - loss: 0.1540
Test Accuracy: 0.9478


In [12]:
from tensorflow.keras.models import load_model
import os
import pickle
# Load the saved model
model = load_model("health_model.h5")
# Load label encoders
with open("label_encoders.pkl", "rb") as f:
    label_encoders = pickle.load(f)
# Load scaler
with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)



In [13]:
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras.models import load_model
#selecting sample from excel file and making predictions

# Load the original dataset
file_path = r"heart_2022_no_nans.xlsx"
df_original = pd.read_excel(file_path, engine="openpyxl")
# Select a random row from the dataset
random_index = 456
random_sample = df_original.iloc[random_index:random_index+1].copy()  # Keep it as a DataFrame
# Define target column
target_col = "HadHeartAttack"
print("True label",random_sample[target_col])
# Drop target column before prediction
if target_col in random_sample.columns:
    random_sample = random_sample.drop(columns=[target_col])
# Identify categorical and numerical columns
categorical_cols = random_sample.select_dtypes(include=['object']).columns.tolist()  
numerical_cols = random_sample.select_dtypes(include=['int64', 'float64']).columns.tolist()  
# Encode categorical columns using saved encoders
for col in categorical_cols:
    random_sample[col] = label_encoders[col].transform(random_sample[col])
# Normalize numerical columns using saved scaler
random_sample[numerical_cols] = scaler.transform(random_sample[numerical_cols])
# Make prediction
prediction = model.predict(random_sample)
# Convert prediction to "Yes" or "No"
predicted_class = "Yes" if prediction[0][0] > 0.1 else "No"
# Print results
print(f"\nSelected Row Index: {random_index}")
print(f"Prediction for HadHeartAttack: {predicted_class}")


True label 456    No
Name: HadHeartAttack, dtype: object
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step

Selected Row Index: 456
Prediction for HadHeartAttack: No


In [17]:
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras.models import load_model


# Load the original dataset
file_path = "heart_2022_no_nans.xlsx"
df_original = pd.read_excel(file_path, engine="openpyxl")
# Select a random row from the dataset
random_index = 456
random_sample = df_original.iloc[random_index:random_index+1].copy()  # Keep it as a DataFrame
# Define target column
target_col = "HadHeartAttack"
print("True label",random_sample[target_col])
# Drop target column before prediction
if target_col in random_sample.columns:
    random_sample = random_sample.drop(columns=[target_col])
# Identify categorical and numerical columns
categorical_cols = random_sample.select_dtypes(include=['object']).columns.tolist()  
numerical_cols = random_sample.select_dtypes(include=['int64', 'float64']).columns.tolist()  
# Encode categorical columns using saved encoders
for col in categorical_cols:
    random_sample[col] = label_encoders[col].transform(random_sample[col])
# Normalize numerical columns using saved scaler
random_sample[numerical_cols] = scaler.transform(random_sample[numerical_cols])
# Make prediction
prediction = model.predict(random_sample)
# Convert prediction to "Yes" or "No"
predicted_class = "Yes" if prediction[0][0] > 0.2 else "No"

# Print results
print(f"\nSelected Row Index: {random_index}")
print(f"Prediction for HadHeartAttack: {predicted_class}")

True label 456    No
Name: HadHeartAttack, dtype: object
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

Selected Row Index: 456
Prediction for HadHeartAttack: No
