In [None]:

import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_openml
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Concatenate, Normalization, IntegerLookup, StringLookup, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load dataset
credit_data = fetch_openml(name='credit-g', version=1, as_frame=True)
X = credit_data.data
y = credit_data.target.map({'good': 1, 'bad': 0}).values

# Define feature columns
discrete_features = ['installment_commitment', 'residence_since', 'num_dependents', 'existing_credits']
categorical_features = X.select_dtypes(exclude='number').columns.tolist()
continous_features = ['duration', 'credit_amount']

# Create TensorFlow datasets
def create_tf_datasets(X, y, test_size=0.1, val_size=0.1, batch_size=128):
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
    dataset = dataset.shuffle(buffer_size=len(X), seed=seed)
    test_size = int(len(X) * test_size)
    val_size = int(len(X) * val_size)
    test_dataset = dataset.take(test_size)
    val_dataset = dataset.skip(test_size).take(val_size)
    train_dataset = dataset.skip(test_size + val_size)
    return (
        train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    )

batch_size = 128
seed=2025
train_dataset_raw, val_dataset_raw, test_dataset_raw = create_tf_datasets(X, y)

# Precompute normalization statistics
train_features = {col: X[col].values for col in X.columns}
continuous_train_data = {col: train_features[col].astype(float) for col in continous_features}

# Compute mean and variance for each continuous feature to avoid
#unmatched batch size for the last bactch during the normalization process.
# Normalization layers require all batches must have the same size
normalization_stats = {}
for col in continous_features:
    data = continuous_train_data[col]
    mean = np.mean(data)
    var = np.var(data)
    normalization_stats[col] = (mean, var)

# Adapt preprocessing layers on training data only
def create_preprocessing_layers():
    normalizers = {}
    for col in continous_features:
        mean, var = normalization_stats[col]
        normalizer = Normalization()
        normalizer.mean = [mean]
        normalizer.variance = [var]
        normalizers[col] = normalizer

    ordinal_encoders = {col: IntegerLookup(output_mode='int', num_oov_indices=1) for col in discrete_features}
    categorical_encoders = {col: StringLookup(output_mode='int', num_oov_indices=1) for col in categorical_features}

    # Adapt encoders
    for batch in train_dataset_raw:
        features, _ = batch
        for col in discrete_features:
            ordinal_encoders[col].adapt(features[col])
        for col in categorical_features:
            categorical_encoders[col].adapt(features[col])

    return normalizers, ordinal_encoders, categorical_encoders

normalizers, ordinal_encoders, categorical_encoders = create_preprocessing_layers()

# Preprocessing integrated into the model
def build_preprocessing_model():
    # Inputs
    continuous_inputs = {col: Input(shape=(1,), dtype=tf.float32, name=f"{col}_input") for col in continous_features}
    discrete_inputs = {col: Input(shape=(1,), dtype=tf.int32, name=f"{col}_input") for col in discrete_features}
    categorical_inputs = {col: Input(shape=(1,), dtype=tf.string, name=f"{col}_input") for col in categorical_features}

    def log1p_cast(x):
      x = tf.cast(x, tf.float32)
      return tf.math.log1p(x)


   # Preprocess continuous features (log1p and normalize)
    processed_continuous = [
        normalizers[col](tf.keras.layers.Lambda(log1p_cast, name=f'log1p_lambda_{col}')(continuous_inputs[col]))
        for col in continous_features
    ]

    # Preprocess discrete numeric features
    def cast_to_float(x):
      return tf.cast(x, tf.float32)

    processed_discrete = [
        tf.keras.layers.Lambda(cast_to_float, name=f'cast_lambda{col}')(discrete_inputs[col]) for col in discrete_features
    ]

#(ordinal_encoders[col](discrete_inputs[col]), tf.float32
    # Preprocess categorical features with embeddings
    embedding_size = 8
    embedded_features = [
        Embedding(input_dim=categorical_encoders[col].vocabulary_size(), output_dim=embedding_size)(
            categorical_encoders[col](categorical_inputs[col])
        ) for col in categorical_features
    ]
    embedded_features = [Flatten()(embed) for embed in embedded_features]

    # Concatenate all features
    all_features = Concatenate()(processed_continuous + processed_discrete + embedded_features)

    return continuous_inputs, discrete_inputs, categorical_inputs, all_features

# Build the full model
continuous_inputs, discrete_inputs, categorical_inputs, processed_features = build_preprocessing_model()
x = Dense(128, activation='relu', kernel_initializer='he_normal')(processed_features)
x = Dropout(0.1)(x)
x = Dense(64, activation='relu',  kernel_initializer='he_normal')(x)
x = Dropout(0.1)(x)
output = Dense(1, activation='sigmoid')(x)

# Combine inputs and outputs
model_inputs = list(continuous_inputs.values()) + list(discrete_inputs.values()) + list(categorical_inputs.values())
model = Model(inputs=model_inputs, outputs=output)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Preprocess datasets for model input
def preprocess_batch(features, labels):
    inputs = {
        **{f"{col}_input": tf.cast(features[col], tf.float32) for col in continous_features},
        **{f"{col}_input": tf.cast(features[col], tf.int32) for col in discrete_features},
        **{f"{col}_input": features[col] for col in categorical_features}
    }
    return inputs, labels

train_dataset = train_dataset_raw.map(preprocess_batch).cache()
val_dataset = val_dataset_raw.map(preprocess_batch).cache()
test_dataset = test_dataset_raw.map(preprocess_batch).cache()

# Callbacks
callbacks = [EarlyStopping(patience=15,
                           restore_best_weights=True,
                           monitor='val_loss'),
            ReduceLROnPlateau(monitor='val_loss',
                              fact=0.5,
                              patience=15,
                              min_lr=1e-6),
            ModelCheckpoint('best_logistic_credit_model_tf.keras',
                            monitor='val_loss',
                            save_best_only=True)
            ]


# Train the model
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=50,
    callbacks=callbacks
)

# Evaluate
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc}")

# Save model
model.save('credit_model_tf_large.keras')


Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 112ms/step - accuracy: 0.3306 - loss: 1.1813 - val_accuracy: 0.7100 - val_loss: 0.6029 - learning_rate: 0.0010
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7120 - loss: 0.6583 - val_accuracy: 0.7100 - val_loss: 0.6904 - learning_rate: 0.0010
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7176 - loss: 0.6797 - val_accuracy: 0.7100 - val_loss: 0.5997 - learning_rate: 0.0010
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7100 - loss: 0.6124 - val_accuracy: 0.7100 - val_loss: 0.6081 - learning_rate: 0.0010
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.6955 - loss: 0.6170 - val_accuracy: 0.7100 - val_loss: 0.5913 - learning_rate: 0.0010
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - a

In [None]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Concatenate, IntegerLookup, StringLookup, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Load dataset
credit_data = fetch_openml(name='credit-g', version=1, as_frame=True)
X = credit_data.data
y = credit_data.target.map({'good': 1, 'bad': 0}).values

# Define feature columns
discrete_features = ['installment_commitment', 'residence_since', 'num_dependents', 'existing_credits']
categorical_features = X.select_dtypes(exclude='number').columns.tolist()
continuous_features = ['duration', 'credit_amount']

# Preprocess continuous features
scaler = StandardScaler()
X[continuous_features] = np.log1p(X[continuous_features])  # Apply log1p
X[continuous_features] = scaler.fit_transform(X[continuous_features])  # Standardize

# Create TensorFlow datasets with 80/10/10 split
def create_tf_datasets(X, y, train_size=0.8, val_size=0.1, batch_size=128, seed=None):
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
    dataset = dataset.shuffle(buffer_size=len(X), seed=seed)
    n = len(X)
    train_size = int(n * train_size)
    val_size = int(n * val_size)
    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size).take(val_size)
    test_dataset = dataset.skip(train_size + val_size)
    return (
        train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    )

seed = 2025  # Change to None for randomness, or test 0, 1, 42, etc.
train_dataset_raw, val_dataset_raw, test_dataset_raw = create_tf_datasets(X, y, seed=seed)

# Adapt lookup layers
def adapt_preprocessing_layers(dataset):
    ordinal_encoders = {col: IntegerLookup(output_mode='int', num_oov_indices=1) for col in discrete_features}
    categorical_encoders = {col: StringLookup(output_mode='int', num_oov_indices=1) for col in categorical_features}

    for batch in dataset:
        features, _ = batch
        for col in discrete_features:
            ordinal_encoders[col].adapt(features[col])
        for col in categorical_features:
            categorical_encoders[col].adapt(features[col])

    return ordinal_encoders, categorical_encoders

ordinal_encoders, categorical_encoders = adapt_preprocessing_layers(train_dataset_raw)

# Build model with preprocessing
def build_preprocessing_model():
    continuous_inputs = {col: Input(shape=(1,), dtype=tf.float32, name=f"{col}_input") for col in continuous_features}
    discrete_inputs = {col: Input(shape=(1,), dtype=tf.int32, name=f"{col}_input") for col in discrete_features}
    categorical_inputs = {col: Input(shape=(1,), dtype=tf.string, name=f"{col}_input") for col in categorical_features}

    processed_continuous = [continuous_inputs[col] for col in continuous_features]  # Already preprocessed

    processed_discrete = [
        Lambda(lambda x: tf.cast(ordinal_encoders[col](x), tf.float32), name=f'cast_lambda_{col}')(discrete_inputs[col])
        for col in discrete_features
    ]

    embedding_size = 8
    embedded_features = [
        Flatten()(Embedding(input_dim=categorical_encoders[col].vocabulary_size(), output_dim=embedding_size)(
            categorical_encoders[col](categorical_inputs[col])
        )) for col in categorical_features
    ]

    all_features = Concatenate()(processed_continuous + processed_discrete + embedded_features)
    return continuous_inputs, discrete_inputs, categorical_inputs, all_features

# Build full model
continuous_inputs, discrete_inputs, categorical_inputs, processed_features = build_preprocessing_model()
x = Dense(128, activation='relu', kernel_initializer='he_normal')(processed_features)
x = tf.keras.layers.Dropout(0.1)(x)
x = Dense(64, activation='relu', kernel_initializer='he_normal')(x)
x = tf.keras.layers.Dropout(0.1)(x)
output = Dense(1, activation='sigmoid')(x)

model_inputs = list(continuous_inputs.values()) + list(discrete_inputs.values()) + list(categorical_inputs.values())
model = Model(inputs=model_inputs, outputs=output)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Preprocess datasets
def preprocess_batch(features, labels):
    inputs = {
        **{f"{col}_input": features[col] for col in continuous_features},
        **{f"{col}_input": tf.cast(features[col], tf.int32) for col in discrete_features},
        **{f"{col}_input": features[col] for col in categorical_features}
    }
    return inputs, labels

train_dataset = train_dataset_raw.map(preprocess_batch).cache()
val_dataset = val_dataset_raw.map(preprocess_batch).cache()
test_dataset = test_dataset_raw.map(preprocess_batch).cache()

# Callbacks
callbacks = [
    EarlyStopping(patience=15, restore_best_weights=True, monitor='val_loss'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-6),
    ModelCheckpoint('best_logistic_credit_model_tf.keras', monitor='val_loss', save_best_only=True)
]

# Train
model.fit(train_dataset, validation_data=val_dataset, epochs=50, callbacks=callbacks)

# Evaluate
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test loss: {test_loss} - Test accuracy: {test_acc}")

# Save model
model.save("logistic_credit_model_tf.keras")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[continuous_features] = np.log1p(X[continuous_features])  # Apply log1p
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[continuous_features] = scaler.fit_transform(X[continuous_features])  # Standardize


Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 204ms/step - accuracy: 0.6589 - loss: 0.6385 - val_accuracy: 0.6600 - val_loss: 0.6473 - learning_rate: 0.0010
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7052 - loss: 0.5961 - val_accuracy: 0.6700 - val_loss: 0.6471 - learning_rate: 0.0010
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.7057 - loss: 0.5861 - val_accuracy: 0.6700 - val_loss: 0.6311 - learning_rate: 0.0010
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.7109 - loss: 0.5708 - val_accuracy: 0.6700 - val_loss: 0.6220 - learning_rate: 0.0010
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.7336 - loss: 0.5626 - val_accuracy: 0.6600 - val_loss: 0.6123 - learning_rate: 0.0010
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step -

In [None]:
import tensorflow as tf
!pip install silence_tensorflow
from silence_tensorflow import silence_tensorflow
silence_tensorflow()
import numpy as np
from sklearn.datasets import fetch_openml
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Concatenate, BatchNormalization, IntegerLookup, StringLookup
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Load dataset
credit_data = fetch_openml(name='credit-g', version=1, as_frame=True)
X = credit_data.data
y = credit_data.target.map({'good': 1, 'bad': 0}).values

# Define feature columns
discrete_features = ['installment_commitment', 'residence_since', 'num_dependents', 'existing_credits']
categorical_features = X.select_dtypes(exclude='number').columns.tolist()
continuous_features = ['duration', 'credit_amount']

# Create TensorFlow datasets with 80/10/10 split
def create_tf_datasets(X, y, train_size=0.8, val_size=0.1, batch_size=128, seed=None):
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
    dataset = dataset.shuffle(buffer_size=len(X), seed=seed)  # Configurable seed
    n = len(X)
    train_size = int(n * train_size)
    val_size = int(n * val_size)
    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size).take(val_size)
    test_dataset = dataset.skip(train_size + val_size)
    return (
        train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    )

# Try different seeds (None for unseeded, or specific values)
seed = 2025  # Change to None for randomness, or test 0, 1, 42, etc.
train_dataset_raw, val_dataset_raw, test_dataset_raw = create_tf_datasets(X, y, seed=seed)

# Adapt lookup layers
def adapt_preprocessing_layers(dataset):
    ordinal_encoders = {col: IntegerLookup(output_mode='int', num_oov_indices=1) for col in discrete_features}
    categorical_encoders = {col: StringLookup(output_mode='int', num_oov_indices=1) for col in categorical_features}

    for batch in dataset:
        features, _ = batch
        for col in discrete_features:
            ordinal_encoders[col].adapt(features[col])
        for col in categorical_features:
            categorical_encoders[col].adapt(features[col])

    return ordinal_encoders, categorical_encoders

ordinal_encoders, categorical_encoders = adapt_preprocessing_layers(train_dataset_raw)

def log1p_with_shape(x):
    return tf.math.log1p(x)

def cast_to_float_with_shape(x):
    return tf.cast(x, tf.float32)


# Build model with preprocessing
def build_preprocessing_model():
    continuous_inputs = {col: Input(shape=(1,), dtype=tf.float32, name=f"{col}_input") for col in continuous_features}
    discrete_inputs = {col: Input(shape=(1,), dtype=tf.int32, name=f"{col}_input") for col in discrete_features}
    categorical_inputs = {col: Input(shape=(1,), dtype=tf.string, name=f"{col}_input") for col in categorical_features}


    processed_continuous = [
        tf.keras.layers.Lambda(
            lambda x: (x - tf.reduce_mean(x)) / tf.math.reduce_std(x),
            output_shape=(1,),
            name=f'standardize_lambda_{col}'
        )(
            tf.keras.layers.Lambda(
                lambda x: log1p_with_shape(x),
                output_shape=(1,),
                name=f'log1p_lambda_{col}'
            )
        (continuous_inputs[col])) for col in continuous_features
    ]


    #processed_continuous = [
    #    BatchNormalization(momentum=0.1, epsilon=1e-5)(
    #        tf.keras.layers.Lambda(
    #            log1p_with_shape,
    #            output_shape=(1,),
    #            name=f'log1p_lambda_{col}'
    #        )(continuous_inputs[col])
    #    ) for col in continuous_features
    #]


    processed_discrete = [
        tf.keras.layers.Lambda(
            lambda x: cast_to_float_with_shape(ordinal_encoders[col](x)),
            output_shape=(1,),
            name=f'cast_lambda_{col}'
        )(discrete_inputs[col])
        for col in discrete_features
    ]

    embedding_size = 8
    embedded_features = [
        Flatten()(Embedding(input_dim=categorical_encoders[col].vocabulary_size(), output_dim=embedding_size)(
            categorical_encoders[col](categorical_inputs[col])
        )) for col in categorical_features
    ]

    all_features = Concatenate()(processed_continuous + processed_discrete + embedded_features)
    return continuous_inputs, discrete_inputs, categorical_inputs, all_features

# Build full model
continuous_inputs, discrete_inputs, categorical_inputs, processed_features = build_preprocessing_model()
x = Dense(128, activation='relu', kernel_initializer='he_normal')(processed_features)
x = tf.keras.layers.Dropout(0.1)(x)
x = Dense(64, activation='relu', kernel_initializer='he_normal')(x)
x = tf.keras.layers.Dropout(0.1)(x)
output = Dense(1, activation='sigmoid')(x)

model_inputs = list(continuous_inputs.values()) + list(discrete_inputs.values()) + list(categorical_inputs.values())
model = Model(inputs=model_inputs, outputs=output)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
#model.summary()

# Preprocess datasets
def preprocess_batch(features, labels):
    inputs = {
        **{f"{col}_input": features[col] for col in continuous_features},
        **{f"{col}_input": tf.cast(features[col], tf.int32) for col in discrete_features},
        **{f"{col}_input": features[col] for col in categorical_features}
    }
    return inputs, labels

train_dataset = train_dataset_raw.map(preprocess_batch).cache()
val_dataset = val_dataset_raw.map(preprocess_batch).cache()
test_dataset = test_dataset_raw.map(preprocess_batch).cache()

# Callbacks
callbacks = [EarlyStopping(patience=15,
                           restore_best_weights=True,
                           monitor='val_loss'),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.5,
                              patience=15,
                              min_lr=1e-6),
            ModelCheckpoint('best_logistic_credit_model_tf.keras',
                            monitor='val_loss',
                            save_best_only=True)
            ]

# Train
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=50,
    callbacks=callbacks
)

# Evaluate
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test loss: {test_loss} - Test accuracy: {test_acc}")

# Save model
custom_objects = {
    'log1p_with_shape': log1p_with_shape,
    'cast_to_float_with_shape': cast_to_float_with_shape
}

model.save("logistic_credit_model_tf.keras")

Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 84ms/step - accuracy: 0.6770 - loss: 0.6262 - val_accuracy: 0.6600 - val_loss: 0.6637 - learning_rate: 0.0010
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7074 - loss: 0.6008 - val_accuracy: 0.6600 - val_loss: 0.6368 - learning_rate: 0.0010
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7094 - loss: 0.5812 - val_accuracy: 0.6600 - val_loss: 0.6222 - learning_rate: 0.0010
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7162 - loss: 0.5735 - val_accuracy: 0.6700 - val_loss: 0.6110 - learning_rate: 0.0010
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7318 - loss: 0.5537 - val_accuracy: 0.6800 - val_loss: 0.5961 - learning_rate: 0.0010
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - 

In [None]:
### Augment class based code 
import tensorflow as tf
from silence_tensorflow import silence_tensorflow
silence_tensorflow()
from sklearn.datasets import fetch_openml
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Concatenate, IntegerLookup, StringLookup
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint


@tf.keras.utils.register_keras_serializable(package="Custom", name="LogTransform")
class LogTransform(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.math.log1p(inputs)

    def get_config(self):  # Required for serialization
        return super().get_config()

@tf.keras.utils.register_keras_serializable(package="Custom", name="LogTransform")
class Standardize(tf.keras.layers.Layer):
    def call(self, inputs):
        return (inputs - tf.reduce_mean(inputs)) / tf.math.reduce_std(inputs)

    def get_config(self):  # Required for serialization
        return super().get_config()


class CreditDataPreprocessor:
    def __init__(self, discrete_features, categorical_features, continuous_features):
        self.discrete_features = discrete_features
        self.categorical_features = categorical_features
        self.continuous_features = continuous_features
        
        # Initialize encoders
        self.ordinal_encoders = {
            col: IntegerLookup(output_mode='int', num_oov_indices=1) 
            for col in discrete_features
        }
        self.categorical_encoders = {
            col: StringLookup(output_mode='int', num_oov_indices=1) 
            for col in categorical_features
        }
        
    def adapt(self, dataset):
        """Adapt all encoders to the data"""
        for batch in dataset:
            features, _ = batch
            for col in self.discrete_features:
                self.ordinal_encoders[col].adapt(features[col])
            for col in self.categorical_features:
                self.categorical_encoders[col].adapt(features[col])
    
    def preprocess_batch(self, features, labels):
        """Transform a batch of data"""
        inputs = {
            **{f"{col}_input": features[col] for col in self.continuous_features},
            **{f"{col}_input": tf.cast(features[col], tf.int32) for col in self.discrete_features},
            **{f"{col}_input": features[col] for col in self.categorical_features}
        }
        return inputs, labels
    
    def prepare_dataset(self, dataset):
        """Prepare a dataset for training"""
        return dataset.map(self.preprocess_batch).cache()

@tf.keras.utils.register_keras_serializable(package="Custom", name="DiscreteFeatureEncoder")
class DiscreteFeatureEncoder(tf.keras.layers.Layer):
    def __init__(self, encoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
    
    def build(self, input_shape):
        # Mark the layer as built
        super().build(input_shape)    
    
    def call(self, inputs):
        return tf.cast(self.encoder(inputs), tf.float32)

    def get_config(self):
        # Serialize the encoder along with other configurations
        config = super().get_config()
        config.update({
            "encoder": self.encoder.get_config()  # Serialize the encoder
        })
        return config

    @classmethod
    def from_config(cls, config):
        # Deserialize the encoder
        encoder_config = config.pop("encoder")
        encoder = IntegerLookup.from_config(encoder_config)  # Reconstruct the encoder
        return cls(encoder=encoder, **config)


class CreditRiskModel(tf.keras.Model):  # Inherit from tf.keras.Model
    def __init__(self, preprocessor, embedding_size=8, **kwargs):
        super().__init__(**kwargs)  # Ensure proper initialization
        self.preprocessor = preprocessor
        self.embedding_size = embedding_size
        self.model = self.build_model()  # Store Keras model

    def build_model(self):
        """Builds and returns a Keras model"""
        continuous_inputs = {
            col: Input(shape=(1,), dtype=tf.float32, name=f"{col}_input") 
            for col in self.preprocessor.continuous_features
        }
        discrete_inputs = {
            col: Input(shape=(1,), dtype=tf.int32, name=f"{col}_input") 
            for col in self.preprocessor.discrete_features
        }
        categorical_inputs = {
            col: Input(shape=(1,), dtype=tf.string, name=f"{col}_input") 
            for col in self.preprocessor.categorical_features
        }
        
        processed_features = self._process_features(
            continuous_inputs, discrete_inputs, categorical_inputs)
        
        x = Dense(128, activation='relu', kernel_initializer='he_normal')(processed_features)
        x = tf.keras.layers.Dropout(0.1)(x)
        x = Dense(64, activation='relu', kernel_initializer='he_normal')(x)
        x = tf.keras.layers.Dropout(0.1)(x)
        output = Dense(1, activation='sigmoid')(x)

        model_inputs = list(continuous_inputs.values()) + list(discrete_inputs.values()) + list(categorical_inputs.values())
        return Model(inputs=model_inputs, outputs=output)

    def call(self, inputs):
        """Forward pass for Keras"""
        return self.model(inputs)

    def get_config(self):
        """Required for serialization"""
        return {
            "embedding_size": self.embedding_size,
        }

    @classmethod
    def from_config(cls, config):
        """Load model from config"""
        return cls(**config)

    def save_model(self, path="logistic_credit_model_tf.keras"):
        """Save the model properly"""
        self.model.save(path)  # Save only the inner Keras model




    
    def _process_features(self, continuous_inputs, discrete_inputs, categorical_inputs):
        log_transform = LogTransform()
        standardize = Standardize()
    
        # Process continuous features
        processed_continuous = [
            standardize(log_transform(continuous_inputs[col]))
            for col in self.preprocessor.continuous_features
        ]
        
        processed_discrete = [
            DiscreteFeatureEncoder(self.preprocessor.ordinal_encoders[col])(discrete_inputs[col])
            for col in self.preprocessor.discrete_features
        ]

    
        # Process categorical features
        embedded_features = [
            Flatten()(Embedding(
                input_dim=self.preprocessor.categorical_encoders[col].vocabulary_size(),
                output_dim=self.embedding_size
            )(self.preprocessor.categorical_encoders[col](categorical_inputs[col])))
            for col in self.preprocessor.categorical_features
        ]
    
        return Concatenate()(processed_continuous + processed_discrete + embedded_features)

    



def create_tf_datasets(X, y, train_size=0.8, val_size=0.1, batch_size=128, seed=None):
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
    dataset = dataset.shuffle(buffer_size=len(X), seed=seed)
    n = len(X)
    train_size_n = int(n * train_size)
    val_size_n = int(n * val_size)
    
    train_dataset = dataset.take(train_size_n)
    val_dataset = dataset.skip(train_size_n).take(val_size_n)
    test_dataset = dataset.skip(train_size_n + val_size_n)
    
    return (
        train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    )

# Load and prepare data
credit_data = fetch_openml(name='credit-g', version=1, as_frame=True)
X = credit_data.data
y = credit_data.target.map({'good': 1, 'bad': 0}).values

# Define feature columns
discrete_features = ['installment_commitment', 'residence_since', 'num_dependents', 'existing_credits']
categorical_features = X.select_dtypes(exclude='number').columns.tolist()
continuous_features = ['duration', 'credit_amount']

# Create datasets
seed = 2025
train_dataset_raw, val_dataset_raw, test_dataset_raw = create_tf_datasets(X, y, seed=seed)

# Initialize and adapt preprocessor
preprocessor = CreditDataPreprocessor(
    discrete_features=discrete_features,
    categorical_features=categorical_features,
    continuous_features=continuous_features
)
preprocessor.adapt(train_dataset_raw)

# Prepare datasets
train_dataset = preprocessor.prepare_dataset(train_dataset_raw)
val_dataset = preprocessor.prepare_dataset(val_dataset_raw)
test_dataset = preprocessor.prepare_dataset(test_dataset_raw)

# Create and compile model
credit_model = CreditRiskModel(preprocessor)  
credit_model.model.compile(optimizer=Adam(learning_rate=0.001),
                           loss='binary_crossentropy',
                           metrics=['accuracy'])


callbacks = [
    EarlyStopping(patience=15, restore_best_weights=True, monitor='val_loss'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-6),
    ModelCheckpoint('best_logistic_credit_model_tf.keras', monitor='val_loss', save_best_only=True)
]

credit_model.model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=200,
    callbacks=callbacks
)

# Evaluate
test_loss, test_acc = credit_model.model.evaluate(test_dataset)
print(f"Test loss: {test_loss} - Test accuracy: {test_acc}")
credit_model.save_model()  # Now works correctly






Epoch 1/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 288ms/step - accuracy: 0.6645 - loss: 0.6458 - val_accuracy: 0.6600 - val_loss: 0.6495 - learning_rate: 0.0010
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.7079 - loss: 0.6004 - val_accuracy: 0.6500 - val_loss: 0.6213 - learning_rate: 0.0010
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.7117 - loss: 0.5798 - val_accuracy: 0.6400 - val_loss: 0.6199 - learning_rate: 0.0010
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.7109 - loss: 0.5714 - val_accuracy: 0.6600 - val_loss: 0.6177 - learning_rate: 0.0010
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.7163 - loss: 0.5706 - val_accuracy: 0.6700 - val_loss: 0.5981 - learning_rate: 0.0010
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/

In [8]:
from tensorflow.keras.models import load_model

# Define custom objects for deserialization
custom_objects = {
    "LogTransform": LogTransform,
    "Standardize": Standardize,
    "DiscreteFeatureEncoder": DiscreteFeatureEncoder
}

# Load the model
model = load_model("logistic_credit_model_tf.keras", custom_objects=custom_objects)

In [7]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import get_custom_objects
from tensorflow.data import AUTOTUNE

class LogisticModel(tf.keras.Model):
    def __init__(self, continuous_features, discrete_features, categorical_features, 
                 ordinal_encoders, categorical_encoders, embedding_size=8):
        super().__init__()
        self.continuous_features = continuous_features
        self.discrete_features = discrete_features
        self.categorical_features = categorical_features
        self.ordinal_encoders = ordinal_encoders
        self.categorical_encoders = categorical_encoders
        self.embedding_size = embedding_size
        
        # Build model architecture
        self._build_model()
    
    def _build_model(self):
        # Inputs
        self.continuous_inputs = {col: Input(shape=(1,), dtype=tf.float32, name=f"{col}_input") for col in self.continuous_features}
        self.discrete_inputs = {col: Input(shape=(1,), dtype=tf.int32, name=f"{col}_input") for col in self.discrete_features}
        self.categorical_inputs = {col: Input(shape=(1,), dtype=tf.string, name=f"{col}_input") for col in self.categorical_features}
        
        # Continuous processing
        self.processed_continuous = [tf.keras.layers.Lambda(lambda x: tf.math.log1p(x))(self.continuous_inputs[col]) for col in self.continuous_features]
        
        # Discrete processing
        self.processed_discrete = [tf.keras.layers.Lambda(lambda x: tf.cast(self.ordinal_encoders[col](x), tf.float32))(self.discrete_inputs[col]) for col in self.discrete_features]
        
        # Categorical embeddings
        self.embedded_features = [
            Flatten()(Embedding(input_dim=self.categorical_encoders[col].vocabulary_size(), 
                                 output_dim=self.embedding_size)(self.categorical_encoders[col](self.categorical_inputs[col])))
            for col in self.categorical_features
        ]
        
        # Concatenate features
        self.all_features = Concatenate()(self.processed_continuous + self.processed_discrete + self.embedded_features)
        
        # Dense layers
        x = Dense(128, activation='relu', kernel_initializer='he_normal')(self.all_features)
        x = tf.keras.layers.Dropout(0.1)(x)
        x = Dense(64, activation='relu', kernel_initializer='he_normal')(x)
        x = tf.keras.layers.Dropout(0.1)(x)
        output = Dense(1, activation='sigmoid')(x)
        
        # Create model
        self.model = Model(inputs=list(self.continuous_inputs.values()) + 
                                 list(self.discrete_inputs.values()) + 
                                 list(self.categorical_inputs.values()), 
                           outputs=output)
        
    def call(self, inputs):
        return self.model(inputs)
    
    def get_config(self):
        return {"continuous_features": self.continuous_features,
                "discrete_features": self.discrete_features,
                "categorical_features": self.categorical_features}
    
    def compile_model(self, learning_rate=0.001):
        self.model.compile(optimizer=Adam(learning_rate=learning_rate), 
                           loss='binary_crossentropy', 
                           metrics=['accuracy'])
    
    def fit_model(self, train_dataset, val_dataset, epochs=50):
        callbacks = [
            EarlyStopping(patience=15, restore_best_weights=True, monitor='val_loss'),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-6),
            ModelCheckpoint('best_logistic_credit_model_tf.keras', monitor='val_loss', save_best_only=True)
        ]
        return self.model.fit(train_dataset, validation_data=val_dataset, epochs=epochs, callbacks=callbacks)
    
    def evaluate_model(self, test_dataset):
        return self.model.evaluate(test_dataset)
    
    def save_model(self, path="logistic_credit_model_tf.keras"):
        self.model.save(path)


from sklearn.datasets import fetch_openml
import tensorflow as tf

# Load dataset
credit_data = fetch_openml(name='credit-g', version=1, as_frame=True)
X = credit_data.data
y = credit_data.target.map({'good': 1, 'bad': 0}).values

# Define feature columns
discrete_features = ['installment_commitment', 'residence_since', 'num_dependents', 'existing_credits']
categorical_features = X.select_dtypes(exclude='number').columns.tolist()
continuous_features = ['duration', 'credit_amount']

# Create TensorFlow datasets
def create_tf_datasets(X, y, train_size=0.8, val_size=0.1, batch_size=128, seed=None):
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
    dataset = dataset.shuffle(buffer_size=len(X), seed=seed)
    n = len(X)
    train_size = int(n * train_size)
    val_size = int(n * val_size)
    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size).take(val_size)
    test_dataset = dataset.skip(train_size + val_size)
    return (
        train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE),
        test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    )

seed = 2025
train_dataset_raw, val_dataset_raw, test_dataset_raw = create_tf_datasets(X, y, seed=seed)

# Adapt preprocessing layers
def adapt_preprocessing_layers(dataset):
    ordinal_encoders = {col: tf.keras.layers.IntegerLookup(output_mode='int', num_oov_indices=1) for col in discrete_features}
    categorical_encoders = {col: tf.keras.layers.StringLookup(output_mode='int', num_oov_indices=1) for col in categorical_features}

    for batch in dataset:
        features, _ = batch
        for col in discrete_features:
            ordinal_encoders[col].adapt(features[col])
        for col in categorical_features:
            categorical_encoders[col].adapt(features[col])

    return ordinal_encoders, categorical_encoders

ordinal_encoders, categorical_encoders = adapt_preprocessing_layers(train_dataset_raw)

# Preprocess dataset
def preprocess_batch(features, labels):
    inputs = {
        **{f"{col}_input": features[col] for col in continuous_features},
        **{f"{col}_input": tf.cast(features[col], tf.int32) for col in discrete_features},
        **{f"{col}_input": features[col] for col in categorical_features}
    }
    return inputs, labels

train_dataset = train_dataset_raw.map(preprocess_batch).cache()
val_dataset = val_dataset_raw.map(preprocess_batch).cache()
test_dataset = test_dataset_raw.map(preprocess_batch).cache()

# Initialize and train the model
model = LogisticModel(continuous_features, discrete_features, categorical_features, ordinal_encoders, categorical_encoders)
model.compile_model(learning_rate=0.001)
model.fit_model(train_dataset, val_dataset, epochs=200)

# Evaluate the model
test_loss, test_acc = model.evaluate_model(test_dataset)
print(f"Test loss: {test_loss} - Test accuracy: {test_acc}")

# Save the model
model.save_model("logistic_credit_model_tf.keras")



Epoch 1/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 191ms/step - accuracy: 0.6482 - loss: 0.6599 - val_accuracy: 0.6600 - val_loss: 0.6688 - learning_rate: 0.0010
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.6974 - loss: 0.6297 - val_accuracy: 0.6600 - val_loss: 0.6488 - learning_rate: 0.0010
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.6981 - loss: 0.6207 - val_accuracy: 0.6600 - val_loss: 0.6431 - learning_rate: 0.0010
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.6983 - loss: 0.6130 - val_accuracy: 0.6600 - val_loss: 0.6393 - learning_rate: 0.0010
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.6925 - loss: 0.6087 - val_accuracy: 0.6600 - val_loss: 0.6343 - learning_rate: 0.0010
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/