In [1]:
!pip install -r requirements.txt



In [2]:
from sklearn.model_selection import train_test_split
from data_generator import TripletGenerator
from model import create_model
import tensorflow as tf
import pandas as pd
import random

MIN_MOUS_TRACK_LEN = 20
N_USERS_TO_TRAIN = 500
EMBEDDING_SIZE = 128
PAD_SIZE = 200
POSITIVES_PER_ANCHOR = 10
NEGATIVES_PER_ANCHOR = 10

random.seed(420)

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [4]:
df = pd.read_pickle('../sw-analysis/out/sw_139_data.pickle')
df = df[df.mouse_track.apply(len) >= MIN_MOUS_TRACK_LEN]
cookies = df.cookie.sample(n=N_USERS_TO_TRAIN, random_state=420)
df = df[df.cookie.isin(cookies)]
df.shape

(6447, 3)

In [5]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=420)
train_df.shape[0], test_df.shape[0]

(5157, 1290)

In [6]:
TG = TripletGenerator(pad_size=PAD_SIZE, 
                      positives_per_anchor=POSITIVES_PER_ANCHOR, 
                      negatives_per_anchor=NEGATIVES_PER_ANCHOR)

train_triplet_generator, train_n_batches = TG.create_data_generator(train_df, batch_size=32)
test_triplet_generator, test_n_batches = TG.create_data_generator(test_df, batch_size=32)

In [7]:
model = create_model(input_shape=(PAD_SIZE, 3), embedding_size=EMBEDDING_SIZE)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_anchor (InputLayer)       [(None, 200, 3)]     0                                            
__________________________________________________________________________________________________
input_positive (InputLayer)     [(None, 200, 3)]     0                                            
__________________________________________________________________________________________________
input_negative (InputLayer)     [(None, 200, 3)]     0                                            
__________________________________________________________________________________________________
model (Functional)              (None, 128)          25728       input_anchor[0][0]               
                                                                 input_positive[0][0]       

In [8]:
model.layers[3].summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 200, 3)]          0         
_________________________________________________________________
lstm (LSTM)                  (None, 64)                17408     
_________________________________________________________________
dense (Dense)                (None, 128)               8320      
Total params: 25,728
Trainable params: 25,728
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(x=train_triplet_generator, steps_per_epoch=train_n_batches,
          validation_data=test_triplet_generator, validation_steps=test_n_batches,
          epochs=3)