# Columns Description


- HighBP - Binary - 0 = no high BP 1 = high BP
- HighChol - Binary- 0 = no high cholesterol 1 = high cholesterol
- CholCheck - Binary - 0 = no cholesterol check in 5 years 1 = yes cholesterol check in 5 years
- BMI - Integer - Body Mass Index
- Smoker - Binary - Have you smoked at least 100 cigarettes in your entire life? 0 = no 1 = yes
- Stroke - Binary - (Ever told) you had a stroke. 0 = no 1 = yes
- HeartDiseaseorAttack - Binary - coronary heart disease (CHD) or myocardial infarction (MI) 0 = no 1 = yes
- PhysActivity - Binary - physical activity in past 30 days - not including job 0 = no 1 = yes
- Fruits - Binary - Consume Fruit 1 or more times per day 0 = no 1 = yes
- Veggies - Binary - Consume Vegetables 1 or more times per day 0 = no 1 = yes
- HvyAlcoholConsump - Binary - Heavy drinkers (adult men having more than 14 drinks per week and adult women having more than 7 drinks per week) 0 = no 1 = yes
- AnyHealthcare - Binary - Have any kind of health care coverage, including health insurance, prepaid plans such as HMO, etc. 0 = no 1 = yes
- NoDocbcCost -Binary - Was there a time in the past 12 months when you needed to see a doctor but could not because of cost? 0 = no 1 = yes
- GenHlth - Integer - Would you say that in general your health is: scale 1-5 1 = excellent 2 = very good 3 = good 4 = fair 5 = poor
- MentHlth - Integer - Now thinking about your mental health, which includes stress, depression, and problems with emotions, for how many days during the past 30 days was your mental health not good? scale 1-30 days
- PhysHlth - Integer - Now thinking about your physical health, which includes physical illness and injury, for how many days during the past 30 days was your physical health not good? scale 1-30 days
- DiffWalk - Binary - Do you have serious difficulty walking or climbing stairs? 0 = no 1 = yes
- Gender - Binary - Female, Male


# Imports


In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow

from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Input, BatchNormalization, Dropout
from keras.optimizers import Adam, AdamW
from keras.regularizers import L1, L2

from sklearn.model_selection import train_test_split
import mlflow

# MLFlow


# Model tracking


In [None]:
mlflow.tensorflow.autolog(silent=True)  # type: ignore # That's it! 🎉

mlflow.set_tracking_uri("file:./mlruns")

# Define an experiment name
mlflow.set_experiment("AML Competition 3")

import absl.logging

absl.logging.set_verbosity(absl.logging.ERROR)

In [11]:
X_train_raw = pd.read_csv("data/X_train.csv")
y_train = pd.read_csv("data/y_train.csv")
X_train = X_train_raw.drop("ID", axis=1)

# Extract the correct target variable and encode it to numeric (No=0, Yes=1)
y = y_train["Diabetes"].map({"No": 0, "Yes": 1})

X_train.drop(columns=["Gender"], inplace=True)

test_size = 0.01
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train, y, test_size=test_size, random_state=42
)


model = Sequential(
    [
        Input(shape=(X_train_split.shape[1],)),
        Dense(1024, activation="relu", kernel_regularizer=L2(0.001)),
        Dense(512, activation="relu", kernel_regularizer=L2(0.001)),
        Dense(256, activation="relu", kernel_regularizer=L2(0.001)),
        Dense(128, activation="relu", kernel_regularizer=L2(0.001)),
        Dense(64, activation="relu", kernel_regularizer=L2(0.001)),
        Dense(1, activation="sigmoid"),
    ]
)
model.compile(
    optimizer=Adam(),  # type: ignore
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
with mlflow.start_run() as run:
    history = model.fit(
        X_train_split,
        y_train_split,
        epochs=10,
        batch_size=256,
        validation_data=(X_val_split, y_val_split),
    )

    test_loss, test_acc = model.evaluate(X_val_split, y_val_split)
    print(f"Test accuracy: {test_acc:.3f}")
    print(f"Test loss: {test_loss:.3f}")

    mlflow.log_metric("test_size", test_size)

Epoch 1/10
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7142 - loss: 1.0236 - val_accuracy: 0.7495 - val_loss: 0.7472
Epoch 2/10
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7377 - loss: 0.7005 - val_accuracy: 0.7495 - val_loss: 0.6595
Epoch 3/10
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7384 - loss: 0.6306 - val_accuracy: 0.6648 - val_loss: 0.6621
Epoch 4/10
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7403 - loss: 0.5953 - val_accuracy: 0.7495 - val_loss: 0.5766
Epoch 5/10
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7438 - loss: 0.5693 - val_accuracy: 0.7495 - val_loss: 0.5623
Epoch 6/10
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7452 - loss: 0.5563 - val_accuracy: 0.7250 - val_loss: 0.5716
Epoch 7/10
[1m206/206[0m 