In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
print("TensorFlow version:", tf.__version__)

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model


In [26]:
# download and import dataset from Smartphone Rabbit Hole paper
csv_file = tf.keras.utils.get_file('rabbithole.pickle', 'https://raw.githubusercontent.com/mimuc/mobilehci23-mobile-phone-rabbit-hole/main/data/smartphone_sessions_with_features.pickle')
df_full = pd.read_pickle(csv_file)
df_full.head()
df_full.dtypes

session_id                             object
count                                   int64
studyID                                object
session_length                timedelta64[ns]
timestamp_1                    datetime64[ns]
                                   ...       
f_clicks_other                        float64
f_app_count_other                     float64
f_app_time_other                      float64
f_app_category_count_other            float64
f_app_category_time_other             float64
Length: 179, dtype: object

In [44]:
# filter for rows where target label exists
df = df_full[df_full['f_esm_regret'] != '']


# feature selection
numeric_feature_names = ['f_esm_regret']
for c in df.columns:
    if ("f_app_category" in c):
        numeric_feature_names.append(c)
        
# make target feature binary
df['f_esm_regret'] = df['f_esm_regret'].astype(float).astype(int).apply(lambda x: 1 if x>4 else 0)


df = df[numeric_feature_names]
df.head()


# dataset split
TRAIN_SPLIT=0.8
VAL_SPLIT=0.1
TEST_SPLIT=0.1
assert (TRAIN_SPLIT + VAL_SPLIT + TEST_SPLIT) == 1
ds_size = len(df)


df_sample = df.sample(frac=1, random_state=12)
indices_or_sections = [int(TRAIN_SPLIT * len(df)), int((1 - VAL_SPLIT) * len(df))]
    
df_train, df_val, df_test = np.split(df_sample, indices_or_sections)
    
# take out target feature    
y_train = df_train.pop('f_esm_regret')
y_val = df_val.pop('f_esm_regret')
y_test = df_test.pop('f_esm_regret')
x_train = df_train
x_val = df_val
x_test = df_test


tf.convert_to_tensor(x_train)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['f_esm_regret'] = df['f_esm_regret'].astype(float).astype(int).apply(lambda x: 1 if x>4 else 0)
  return bound(*args, **kwds)


<tf.Tensor: shape=(2132, 77), dtype=float64, numpy=
array([[0.000e+00, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [0.000e+00, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.000e+00, 7.593e+04, 1.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       ...,
       [0.000e+00, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [0.000e+00, 0.000e+00, 6.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.000e+00, 1.535e+04, 1.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00]])>

In [45]:
# normalize data
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(x_train)
normalizer.adapt(x_val)
normalizer.adapt(x_test)
normalizer(x_train.iloc[:3])


<tf.Tensor: shape=(3, 77), dtype=float32, numpy=
array([[-3.36975843e-01, -2.05045283e-01, -5.00157416e-01,
        -3.23191494e-01, -1.56712189e-01, -1.06720641e-01,
        -2.72429466e-01, -1.26599446e-01, -4.20319885e-01,
        -7.28201047e-02, -3.07369351e-01, -3.11384618e-01,
        -2.21139610e-01, -1.53691798e-01, -1.88300446e-01,
        -1.44714653e-01, -1.16436601e-01, -7.55944923e-02,
        -2.88741440e-01, -1.08906791e-01, -1.79489538e-01,
        -1.17039204e-01, -1.30930752e-01, -1.04411893e-01,
        -1.91545412e-01, -7.50595257e-02, -1.76503405e-01,
        -1.35102376e-01, -1.42766118e-01, -1.12518221e-01,
        -1.11355655e-01, -1.04014173e-01, -6.13139346e-02,
        -6.13139346e-02,  0.00000000e+00,  0.00000000e+00,
        -2.21139610e-01, -7.76621923e-02, -1.43673941e-01,
        -9.88566503e-02, -1.79489523e-01, -1.54224232e-01,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -1.59747228e-01, -1.28955692e-01,
       

In [49]:
def get_basic_model():
  model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model


model = get_basic_model()
model.fit(x_train, y_train, epochs=15, batch_size=2, validation_data=(x_val, y_val))


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x12074787340>

In [50]:
# test actual performance
model.evaluate(x_test,  y_test, verbose=2)


9/9 - 0s - loss: 0.2236 - accuracy: 0.9476 - 40ms/epoch - 4ms/step


[0.22364449501037598, 0.9475655555725098]