In [17]:
from sklearn.model_selection import train_test_split
from data_generator import TripletGenerator
from model import create_model
import pandas as pd
import random

MIN_MOUS_TRACK_LEN = 20
N_USERS_TO_TRAIN = 1000
EMBEDDING_SIZE = 128
PAD_SIZE = 200
POSITIVES_PER_ANCHOR = 10
NEGATIVES_PER_ANCHOR = 10

random.seed(420)

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [28]:
df = pd.read_pickle('../sw-analysis/out/sw_139_data.pickle')
df = df[df.mouse_track.apply(len) >= MIN_MOUS_TRACK_LEN]
cookies = df.cookie.sample(n=N_USERS_TO_TRAIN, random_state=420)
df = df[df.cookie.isin(cookies)]
df.shape

(10941, 3)

In [29]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=420)
train_df.shape[0], test_df.shape[0]

(8752, 2189)

In [30]:
TG = TripletGenerator(pad_size=PAD_SIZE, 
                      positives_per_anchor=POSITIVES_PER_ANCHOR, 
                      negatives_per_anchor=NEGATIVES_PER_ANCHOR)

train_triplet_generator, train_n_batches = TG.create_data_generator(train_df, batch_size=32)
test_triplet_generator, test_n_batches = TG.create_data_generator(test_df, batch_size=32)

In [None]:
model = create_model(input_shape=(PAD_SIZE, 3), embedding_size=EMBEDDING_SIZE)
model.fit(x=train_triplet_generator, steps_per_epoch=train_n_batches,
          validation_data=test_triplet_generator, validation_steps=test_n_batches,
          epochs=1)