In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv(r'/Users/briangor/Downloads/cardio_train.csv', sep=';')
df

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,19240,2,168,76.0,120,80,1,1,1,0,1,0
69996,99995,22601,1,158,126.0,140,90,2,2,0,0,1,1
69997,99996,19066,2,183,105.0,180,90,3,1,0,1,0,1
69998,99998,22431,1,163,72.0,135,80,1,2,0,0,0,1


<h2> Data Preparation: </h2>
Prepare your data by splitting it into training, validation, and test sets. Preprocess the data if needed (scaling, normalization, etc.).

In [3]:
df['height']=df['height']*0.01
df['bmi'] = (df['weight'] / (df['height'] ** 2)).astype('int')
df['age'] = (df['age'] / 365).round().astype('int')

# Define BMI categories
def categorize_bmi(bmi):
    if bmi < 18.5:
        return 'Underweight'
    elif 18.5 <= bmi < 25:
        return 'Normal'
    elif 25 <= bmi < 30:
        return 'Overweight'
    else:
        return 'Obese'

# Apply the function to create a new feature
df['bmi_category'] = df['bmi'].apply(categorize_bmi)

# Define age groups
def categorize_age(age):
    if age < 40:
        return 'Young'
    elif 40 <= age < 60:
        return 'Middle-aged'
    else:
        return 'Senior'

# Apply the function to create a new feature
df['age_group'] = df['age'].apply(categorize_age)

# Define blood pressure categories
def categorize_bp(ap_hi, ap_lo):
    if ap_hi < 120 and ap_lo < 80:
        return 'Normal'
    elif ap_hi >= 140 or ap_lo >= 90:
        return 'Hypertension'
    else:
        return 'High-Normal'

# Apply the function to create a new feature
df['bp_category'] = df.apply(lambda row: categorize_bp(row['ap_hi'], row['ap_lo']), axis=1)

In [4]:
df = df.drop(columns=['id'])

In [5]:
df=pd.get_dummies(df)

In [6]:
from sklearn.model_selection import train_test_split

# Define features (X) and target (y)
X = df.drop(columns=['cardio'])  # Exclude the target variable
y = df['cardio']

In [16]:
from sklearn.model_selection import train_test_split

# Assuming you have X (features) and y (target) prepared

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure X_train, y_train, X_val, and y_val are correctly defined here

<h2> Building a Neural Network: </h2>
Use TensorFlow's Keras API to create a neural network model:
input_shape represents the shape of your input data or features. Before you define the model, ensure you know the number of features in your dataset. For example, if you have a dataset with 10 features: This ensures the input layer of your neural network matches the number of features you're providing to the model.

In [17]:
df.shape

(70000, 23)

In [18]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [19]:
input_shape = X_train.shape[1]  # Number of features in your dataset


# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(input_shape,)),  # input_shape should match your feature dimensions
    Dropout(0.5),  # Dropout layer to prevent overfitting
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer with sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


<h2> Model Training: </h2>
Train the neural network model using the fit method, providing the training data, validation data, number of epochs, and batch size.

In [20]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<h2> Model Evaluation: </h2>
Evaluate the trained model on the test set:

In [21]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.7252857089042664


In [22]:
# Make predictions
predictions = model.predict(X_test)



<h2> Other Evaluation Metrics </h2>

from sklearn.metrics import classification_report, confusion_matrix

# Assuming you have predictions and true labels from your model
# predictions = model.predict(X_test)
# true_labels = y_test

# Calculate confusion matrix
#cm = confusion_matrix(true_labels, (predictions > 0.5))  # Adjust threshold if needed

# Calculate classification report
#classification_rep = classification_report(true_labels, (predictions > 0.5))

print("Confusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_rep)