In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau

from imblearn.over_sampling import SMOTE


Tutorial link: https://www.tensorflow.org/tutorials/structured_data/feature_columns

In [3]:
dataframe = pd.read_csv("social.csv")

In [4]:
dataframe.dropna(inplace=True)

In [5]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19451 entries, 0 to 19450
Data columns (total 59 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   q14     19451 non-null  float64
 1   q15     19451 non-null  float64
 2   q16     19451 non-null  float64
 3   q18     19451 non-null  float64
 4   q21     19451 non-null  float64
 5   q22     19451 non-null  float64
 6   q23     19451 non-null  float64
 7   q24     19451 non-null  float64
 8   q25     19451 non-null  float64
 9   q45     19451 non-null  float64
 10  q86     19451 non-null  float64
 11  q88     19451 non-null  float64
 12  q89     19451 non-null  float64
 13  q90     19451 non-null  float64
 14  q91     19451 non-null  float64
 15  q99     19451 non-null  float64
 16  q100    19451 non-null  float64
 17  q102    19451 non-null  float64
 18  q103    19451 non-null  float64
 19  q105    19451 non-null  float64
 20  q8      19451 non-null  float64
 21  q9      19451 non-null  float64
 22

In [6]:
dataframe['target'] = dataframe['MB']


In [7]:
dataframe.head()

Unnamed: 0,q14,q15,q16,q18,q21,q22,q23,q24,q25,q45,...,q79,q80,q81,q82,q84,q85,q87,q92,MB,target
0,0.5,0.0,0.0,1.0,0.2,0.2,0.0,1.0,1.0,0.0,...,0.0,0.714286,0.0,0.0,0.0,0.666667,0.200328,0.2,1.0,1.0
1,0.0,0.0,0.142857,1.0,0.2,0.2,0.5,0.0,1.0,0.0,...,0.0,0.428571,0.0,0.0,0.5,0.333333,0.5,0.0,1.0,1.0
2,0.0,0.0,0.0,1.0,0.2,0.2,0.0,1.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.25,1.0,0.333333,0.0,1.0,1.0
3,0.5,0.0,0.0,1.0,0.2,0.2,0.0,1.0,1.0,1.0,...,0.0,1.0,0.0,0.0,0.5,0.5,0.333333,0.0,1.0,1.0
4,0.0,0.0,0.285714,1.0,0.2,0.0,0.0,1.0,1.0,0.142857,...,0.25,0.714286,0.0,0.0,0.5,0.666667,0.5,0.0,1.0,1.0


In [8]:
dataframe = dataframe.drop(['MB'], axis=1)

In [9]:
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.125)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

13615 train examples
1945 validation examples
3891 test examples


In [10]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('target')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [11]:
X_train = train.drop('target', axis=1)  
y_train = train['target']  

In [12]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [13]:
train_resampled = X_train_resampled.copy()
train_resampled['target'] = y_train_resampled

In [14]:
feature_columns = []

for header in ["q14", "q15", "q16", "q18", "q21", "q22", "q23", "q24", "q25", "q45", "q86", "q88", "q89", "q90", "q91", "q99", "q100", "q102", "q103", "q105", "q8", "q9", "q10", "q11", "q12", "q17",	"q19", "q26", "q27", "q28",	"q29", "q30", "q31", "q35", "q38", "q39", "q46", "q49", "q50", "q51", "q52", "q53", "q54", "q55",	"q58", "q59", "q60", "q61",	"q74", "q75", "q79", "q80", "q81", "q82", "q84", "q85", "q87", "q92"]:
    feature_columns.append(feature_column.numeric_column(header))


Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.


In [15]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [16]:
batch_size = 32
train_ds = df_to_dataset(train_resampled, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [17]:
model = tf.keras.Sequential([
    feature_layer,
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(.3),
    layers.Dense(1)
])

In [18]:
from sklearn.utils.class_weight import compute_class_weight

classes = np.unique(train['target'])
class_weights = compute_class_weight(
    class_weight='balanced', 
    classes=classes, 
    y=train['target']
)
class_weight_dict = {i: class_weights[i] for i in range(len(classes))}
print(f"Class weights: {class_weight_dict}")

Class weights: {0: 83.01829268292683, 1: 0.5030296312717062}


In [19]:
def specificity(y_true, y_pred):
    y_pred = tf.round(tf.nn.sigmoid(y_pred))  
    true_negatives = tf.reduce_sum(tf.cast((y_true == 0) & (y_pred == 0), 'float'))
    false_positives = tf.reduce_sum(tf.cast((y_true == 0) & (y_pred == 1), 'float'))
    
    specificity = true_negatives / (true_negatives + false_positives + tf.keras.backend.epsilon())
    return specificity

In [20]:
def f1_score(y_true, y_pred):
    y_pred = tf.round(tf.nn.sigmoid(y_pred))  
    tp = tf.reduce_sum(tf.cast(y_true * y_pred, 'float'))
    fp = tf.reduce_sum(tf.cast((1 - y_true) * y_pred, 'float'))
    fn = tf.reduce_sum(tf.cast(y_true * (1 - y_pred), 'float'))

    precision = tp / (tp + fp + tf.keras.backend.epsilon())
    recall = tp / (tp + fn + tf.keras.backend.epsilon())
    f1 = 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())
    return f1

In [21]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=[
                  'accuracy',
                  tf.keras.metrics.AUC(name='auc'),
                  specificity,
                  tf.keras.metrics.Recall(name='sensitivity'),
                  f1_score
                      ]
             )

In [22]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [23]:
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6)


In [24]:
model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    callbacks=[early_stopping, lr_scheduler]
)

Epoch 1/50


2025-01-11 18:03:23.564548: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7245 of 27066
2025-01-11 18:03:43.573833: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8298 of 27066
2025-01-11 18:04:03.543927: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8926 of 27066
2025-01-11 18:04:13.552328: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9198 of 27066
2025-01-11 18:04:33.600758: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9702 of 27066
2025-01-11 18:04:53.542395: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may tak

  3/846 [..............................] - ETA: 22s - loss: 0.8964 - accuracy: 0.4583 - auc: 0.5588 - specificity: 0.4296 - sensitivity: 0.0000e+00 - f1_score: 0.5664      

2025-01-11 18:13:19.602547: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 2/50


2025-01-11 18:13:31.365818: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8728 of 27066
2025-01-11 18:13:51.345724: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9285 of 27066
2025-01-11 18:14:01.393846: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9528 of 27066
2025-01-11 18:14:21.364184: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10150 of 27066
2025-01-11 18:14:31.390042: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10427 of 27066
2025-01-11 18:14:51.340308: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may t

 61/846 [=>............................] - ETA: 1s - loss: 0.0944 - accuracy: 0.9887 - auc: 0.9922 - specificity: 0.9979 - sensitivity: 0.9771 - f1_score: 0.9909     

2025-01-11 18:24:20.007071: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 3/50


2025-01-11 18:24:31.170174: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7464 of 27066
2025-01-11 18:24:41.176510: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8492 of 27066
2025-01-11 18:24:51.188847: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9876 of 27066
2025-01-11 18:25:11.202055: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10450 of 27066
2025-01-11 18:25:31.192857: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 11034 of 27066
2025-01-11 18:25:51.185389: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may t

 21/846 [..............................] - ETA: 6s - loss: 0.0559 - accuracy: 0.9911 - auc: 0.9955 - specificity: 1.0000 - sensitivity: 0.9820 - f1_score: 0.9957   

2025-01-11 18:35:09.030697: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 4/50


2025-01-11 18:35:20.273496: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7045 of 27066
2025-01-11 18:35:40.339946: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8836 of 27066
2025-01-11 18:36:00.340493: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10372 of 27066
2025-01-11 18:36:20.278053: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 12143 of 27066
2025-01-11 18:36:30.303677: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 12426 of 27066
2025-01-11 18:36:50.275507: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may 

  1/846 [..............................] - ETA: 134:00:32 - loss: 0.0341 - accuracy: 1.0000 - auc: 1.0000 - specificity: 1.0000 - sensitivity: 1.0000 - f1_score: 1.0000

2025-01-11 18:44:41.030991: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 5/50


2025-01-11 18:44:52.433598: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7559 of 27066
2025-01-11 18:45:02.497352: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8476 of 27066
2025-01-11 18:45:22.438781: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9375 of 27066
2025-01-11 18:45:32.453634: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9679 of 27066
2025-01-11 18:45:42.474736: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10036 of 27066
2025-01-11 18:46:02.461418: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may ta

  4/846 [..............................] - ETA: 38s - loss: 0.0944 - accuracy: 0.9766 - auc: 0.9754 - specificity: 1.0000 - sensitivity: 0.9508 - f1_score: 0.9559      

2025-01-11 18:55:56.033101: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 6/50


2025-01-11 18:56:07.472224: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7013 of 27066
2025-01-11 18:56:27.414614: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8487 of 27066
2025-01-11 18:56:37.447884: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8934 of 27066
2025-01-11 18:56:57.417555: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9599 of 27066
2025-01-11 18:57:17.408868: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10149 of 27066
2025-01-11 18:57:27.441317: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may ta

  3/846 [..............................] - ETA: 1:00 - loss: 0.0223 - accuracy: 1.0000 - auc: 1.0000 - specificity: 1.0000 - sensitivity: 1.0000 - f1_score: 1.0000    

2025-01-11 19:06:11.505181: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 7/50


2025-01-11 19:06:22.926453: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7261 of 27066
2025-01-11 19:06:42.866948: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9107 of 27066
2025-01-11 19:06:52.876087: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9476 of 27066
2025-01-11 19:07:02.908989: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9829 of 27066
2025-01-11 19:07:22.862292: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10530 of 27066
2025-01-11 19:07:32.919345: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may ta

  2/846 [..............................] - ETA: 45s - loss: 0.0202 - accuracy: 1.0000 - auc: 1.0000 - specificity: 1.0000 - sensitivity: 1.0000 - f1_score: 1.0000      

2025-01-11 19:17:07.887091: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 8/50


2025-01-11 19:17:19.337924: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7711 of 27066
2025-01-11 19:17:29.362298: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 7970 of 27066
2025-01-11 19:17:49.365910: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9058 of 27066
2025-01-11 19:18:09.338701: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9620 of 27066
2025-01-11 19:18:19.378315: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9869 of 27066
2025-01-11 19:18:39.356193: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may tak

111/846 [==>...........................] - ETA: 1s - loss: 0.0337 - accuracy: 0.9944 - auc: 0.9957 - specificity: 1.0000 - sensitivity: 0.9886 - f1_score: 0.9959     

2025-01-11 19:28:12.858829: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 9/50


2025-01-11 19:28:23.955035: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9015 of 27066
2025-01-11 19:28:43.957349: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9959 of 27066
2025-01-11 19:29:03.951168: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10648 of 27066
2025-01-11 19:29:13.974248: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 11026 of 27066
2025-01-11 19:29:23.999399: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 11376 of 27066
2025-01-11 19:29:43.970773: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may 

  1/846 [..............................] - ETA: 123:24:44 - loss: 0.0174 - accuracy: 1.0000 - auc: 1.0000 - specificity: 1.0000 - sensitivity: 1.0000 - f1_score: 1.0000

2025-01-11 19:36:59.588388: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 10/50


2025-01-11 19:37:10.996665: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 8165 of 27066
2025-01-11 19:37:30.960681: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9260 of 27066
2025-01-11 19:37:50.996645: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10112 of 27066
2025-01-11 19:38:10.962408: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10699 of 27066
2025-01-11 19:38:20.996961: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10995 of 27066
2025-01-11 19:38:41.005103: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may 

  3/846 [..............................] - ETA: 29s - loss: 0.0169 - accuracy: 1.0000 - auc: 1.0000 - specificity: 1.0000 - sensitivity: 1.0000 - f1_score: 1.0000      

2025-01-11 19:47:35.999458: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 11/50


2025-01-11 19:47:47.276044: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9691 of 27066
2025-01-11 19:47:57.295064: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10307 of 27066
2025-01-11 19:48:17.302023: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 11362 of 27066
2025-01-11 19:48:37.299583: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 12280 of 27066
2025-01-11 19:48:57.261518: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 12922 of 27066
2025-01-11 19:49:07.276097: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may

  4/846 [..............................] - ETA: 25s - loss: 0.0191 - accuracy: 1.0000 - auc: 1.0000 - specificity: 1.0000 - sensitivity: 1.0000 - f1_score: 1.0000      

2025-01-11 19:57:19.457567: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.


Epoch 12/50


2025-01-11 19:57:30.855130: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 939 of 27066
2025-01-11 19:57:50.811014: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9034 of 27066
2025-01-11 19:58:00.847254: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 9389 of 27066
2025-01-11 19:58:20.811179: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 10835 of 27066
2025-01-11 19:58:30.847998: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may take a while): 11250 of 27066
2025-01-11 19:58:50.814008: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:1: Filling up shuffle buffer (this may ta

 82/846 [=>............................] - ETA: 1s - loss: 0.0190 - accuracy: 0.9977 - auc: 0.9989 - specificity: 1.0000 - sensitivity: 0.9955 - f1_score: 0.9988    

2025-01-11 20:04:54.853351: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:452] Shuffle buffer filled.




<keras.src.callbacks.History at 0x176bed210>

In [31]:
model.fit(
    test_ds,
    epochs=12,
)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.src.callbacks.History at 0x28be2da10>

In [33]:
weights = model.get_weights()


In [35]:
first_layer_weights = weights[0]  

weight_df = pd.DataFrame(
    first_layer_weights[:19],  
    columns=[f'Hidden_Node_{i}' for i in range(first_layer_weights.shape[1])], 
    index=X_train.columns[:19]  
)

In [37]:
feature_importance = weight_df.abs().sum(axis=1).sort_values(ascending=False)

print(feature_importance)

q102    5.379918
q15     4.993237
q100    4.742386
q21     4.442135
q16     4.369733
q88     4.270064
q18     4.070151
q103    3.605219
q14     3.320156
q99     2.959666
q91     2.915334
q45     2.801151
q22     2.359767
q89     1.677788
q24     1.463227
q23     1.182893
q86     1.172212
q90     0.793644
q25     0.407419
dtype: float32


In [40]:
weight_mb = pd.DataFrame(first_layer_weights, columns=[f'Hidden_Node_{i}' for i in range(first_layer_weights.shape[1])], index=X_train.columns)

mb_importance = weight_mb.abs().sum(axis=1).sort_values(ascending=False)
print(mb_importance)

q80     6.828224
q55     5.566679
q102    5.379918
q15     4.993237
q58     4.750099
q100    4.742386
q92     4.455136
q21     4.442135
q53     4.414062
q16     4.369733
q88     4.270064
q18     4.070151
q74     3.762798
q54     3.727466
q103    3.605219
q61     3.596669
q12     3.409683
q81     3.399585
q14     3.320156
q99     2.959666
q91     2.915334
q45     2.801151
q85     2.700571
q59     2.571386
q11     2.417346
q22     2.359767
q26     2.350342
q105    2.331945
q27     2.291080
q50     2.276624
q51     2.242931
q60     2.128657
q82     2.120507
q79     2.119487
q52     2.035435
q28     1.712253
q89     1.677788
q24     1.463227
q75     1.396846
q8      1.372790
q84     1.345930
q30     1.186552
q23     1.182893
q86     1.172212
q19     1.092819
q90     0.793644
q46     0.725277
q31     0.696386
q49     0.528589
q29     0.415034
q25     0.407419
q10     0.340193
q9      0.217782
q87     0.118342
q38     0.081938
q39     0.032714
q35     0.009105
q17     0.009091
dtype: float32