In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Example dataset
data = pd.read_csv("heart.csv")
print(data.head())

columns = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "num"]
data.columns = columns
data.dropna(inplace=True)



y = data["num"].values
X = data.drop("num", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

# ------------------------------
# Preprocessing inputs
# ------------------------------

preprocessed_inputs = []
all_inputs = []

# Numeric features -> Normalization
for col in numerical_cols:
    inp = tf.keras.Input(shape=(1,), name=col)
    norm = tf.keras.layers.Normalization(axis=None)
    norm.adapt(np.array(X_train[col]).reshape(-1, 1))
    encoded = norm(inp)
    all_inputs.append(inp)
    preprocessed_inputs.append(encoded)

# Categorical features -> Embeddings
for col in categorical_cols:
    inp = tf.keras.Input(shape=(1,), dtype=tf.string, name=col)
    lookup = tf.keras.layers.StringLookup(output_mode="int")
    lookup.adapt(X_train[col])
    vocab_size = lookup.vocabulary_size()
    embed_dim = min(50, vocab_size // 2)

    x = lookup(inp)
    x = tf.keras.layers.Embedding(vocab_size, embed_dim)(x)
    x = tf.keras.layers.Flatten()(x)

    all_inputs.append(inp)
    preprocessed_inputs.append(x)

# Concatenate all preprocessed features
concatenated = tf.keras.layers.Concatenate()(preprocessed_inputs)

# ------------------------------
# Now wrap into Sequential body
# ------------------------------
seq_body = tf.keras.models.Sequential([
    tf.keras.Input(shape=(concatenated.shape[1],)),  # e.g., 13 features
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

output = seq_body(concatenated)

# Full Model = Preprocessing + Sequential
model = tf.keras.Model(inputs=all_inputs, outputs=output)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# ------------------------------
# Training
# ------------------------------
train_dict = {col: X_train[col].values for col in X_train.columns}
test_dict = {col: X_test[col].values for col in X_test.columns}

model.fit(train_dict, y_train, epochs=5, batch_size=32, validation_split=0.2)

print("Test Accuracy:", model.evaluate(test_dict, y_test))


   70  1  4  130  322  0  2  109  0.1  2.4  2.1  3  3.1  -1
0  67  0  3  115  564  0  2  160    0  1.6    2  0    7   1
1  57  1  2  124  261  0  0  141    0  0.3    1  0    7  -1
2  64  1  4  128  263  0  0  105    1  0.2    2  1    7   1
3  74  0  2  120  269  0  2  121    1  0.2    1  1    3   1
4  65  1  4  120  177  0  0  140    0  0.4    1  0    7   1
Epoch 1/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 120ms/step - accuracy: 0.2209 - loss: 0.7922 - val_accuracy: 0.2791 - val_loss: 0.6884
Epoch 2/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.2558 - loss: 0.7763 - val_accuracy: 0.2558 - val_loss: 0.6303
Epoch 3/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.2151 - loss: 0.7022 - val_accuracy: 0.2558 - val_loss: 0.5770
Epoch 4/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.2326 - loss: 0.6878 - val_accuracy: 0.2791 - val_loss: 0.5255
Epoch 5