In [2]:
!pip install -r requirements.txt

Successfully installed Keras-2.4.3 absl-py-0.11.0 astunparse-1.6.3 flatbuffers-1.12 gast-0.3.3 google-auth-oauthlib-0.4.2 google-pasta-0.2.0 grpcio-1.32.0 h5py-2.10.0 keras-preprocessing-1.1.2 numpy-1.19.5 opt-einsum-3.3.0 protobuf-3.14.0 six-1.15.0 sklearn-0.0 tensorboard-2.4.0 tensorboard-plugin-wit-1.7.0 tensorflow-estimator-2.4.0 tensorflow-gpu-2.4.0 wheel-0.36.2 wrapt-1.12.1


In [3]:
!nvidia-smi

Sat Jan  9 17:49:38 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla M60           On   | 00000000:00:1E.0 Off |                    0 |
| N/A   36C    P8    22W / 150W |      0MiB /  7618MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [4]:
from sklearn.model_selection import train_test_split
from data_generator import TripletGenerator
from model import create_model
import tensorflow as tf
import pandas as pd
import random
print(tf.config.list_physical_devices())

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [5]:
# Training configs
MIN_MOUSE_TRACK_LEN = 100
N_USERS_TO_TRAIN = 100
EMBEDDING_SIZE = 128
PAD_SIZE = 200
POSITIVES_PER_ANCHOR = 10
NEGATIVES_PER_ANCHOR = 10
TRAIN_EPOCHS = 5
DROP_TIME_LINE = False

In [4]:
# Load data
df = pd.read_pickle('./sw_139_data.pickle')
df = df[df.mouse_track.apply(len) >= MIN_MOUSE_TRACK_LEN]

# Filter users to train on
cookies = df.cookie.value_counts()
random.seed(420)
cookies = random.sample(list(cookies[cookies >= POSITIVES_PER_ANCHOR].keys()), k=N_USERS_TO_TRAIN)

# Dataset split
df = df[df.cookie.isin(cookies)]
train_df, test_df = train_test_split(df, test_size=0.2, random_state=420)
train_df.shape[0], test_df.shape[0]

(6447, 3)

In [6]:
# Data generators
TG = TripletGenerator(pad_size=PAD_SIZE,
                      positives_per_anchor=POSITIVES_PER_ANCHOR,
                      negatives_per_anchor=NEGATIVES_PER_ANCHOR,
                      drop_time_line=DROP_TIME_LINE)
train_triplet_generator, train_n_batches = TG.create_data_generator(train_df, batch_size=32)
test_triplet_generator, test_n_batches = TG.create_data_generator(test_df, batch_size=32)

In [7]:
# Model training
model = create_model(input_shape=(PAD_SIZE, 3 - DROP_TIME_LINE), embedding_size=EMBEDDING_SIZE)
model.layers[3].summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_anchor (InputLayer)       [(None, 200, 3)]     0                                            
__________________________________________________________________________________________________
input_positive (InputLayer)     [(None, 200, 3)]     0                                            
__________________________________________________________________________________________________
input_negative (InputLayer)     [(None, 200, 3)]     0                                            
__________________________________________________________________________________________________
model (Functional)              (None, 128)          25728       input_anchor[0][0]               
                                                                 input_positive[0][0]       

In [8]:
my_callbacks = [
    # tf.keras.callbacks.ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.2f}.h5'),
    tf.keras.callbacks.TensorBoard(log_dir='./logs')
]
model.fit(x=train_triplet_generator, steps_per_epoch=train_n_batches,
          validation_data=test_triplet_generator, validation_steps=test_n_batches,
          epochs=TRAIN_EPOCHS, callbacks=my_callbacks)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 200, 3)]          0         
_________________________________________________________________
lstm (LSTM)                  (None, 64)                17408     
_________________________________________________________________
dense (Dense)                (None, 128)               8320      
Total params: 25,728
Trainable params: 25,728
Non-trainable params: 0
_________________________________________________________________
