In [10]:
import pandas as pd
from tqdm import tqdm

import random
import datetime as dt

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [2]:
df = pd.read_csv('actions.csv', index_col=0)
print(df.shape)
df.head()

  mask |= (ar1 == a)


(6188914, 6)


Unnamed: 0,user_id,created_at,event_time,action,theta,label
0,14,2020-09-21 20:57:06.025613,2020-09-14 18:56:54.933907,session,,0.0
1,14,2020-09-21 20:57:06.025613,2020-09-14 18:56:59.276285,paid_feed_view,22000.0,0.0
2,14,2020-09-21 20:57:06.025613,2020-09-14 18:56:59.276285,paid_feed_view,19000.0,0.0
3,14,2020-09-21 20:57:06.025613,2020-09-14 18:56:59.276285,paid_feed_view,0.0,0.0
4,14,2020-09-21 20:57:06.025613,2020-09-14 18:57:00.782144,non_paid_feed_view,,0.0


In [3]:
df = df[['user_id', 'event_time', 'action']]

In [4]:
idxs_drop = []
user_list = []
for idx, row in tqdm(df[df.action == 'referral_made'].iterrows()):
    user_id = row.user_id
    if user_id not in user_list:
        user_list.append(user_id)
    else:
        idxs_drop.append(idx)

12564it [00:01, 8774.35it/s]


In [5]:
df = df[~df.index.isin(idxs_drop)]
del idxs_drop
df.reset_index(drop=True, inplace=True)
df.shape

(6180271, 3)

In [6]:
df.sort_values(by=['user_id', 'event_time'], inplace=True);
df.reset_index(drop=True, inplace=True)

In [7]:
SOS_token = 0
EOS_token = 1


class Actions:
    def __init__(self):
        self.action2index = {}
        self.action2count = {}
        self.index2action = {0: "SOS", 1: "EOS"}
        self.n_actions = 2  # Count SOS and EOS

    def addSequence(self, sentence):
        for action in sentence:
            self.addAction(action)

    def addAction(self, action):
        if action not in self.action2index:
            self.action2index[action] = self.n_actions
            self.action2count[action] = 1
            self.index2action[self.n_actions] = action
            self.n_actions += 1
        else:
            self.action2count[action] += 1

In [8]:
def get_sequences():
    df_dict = df.to_dict()

    user_idxs = {}
    for k, v in df_dict['user_id'].items():
        user_idxs[v] = user_idxs.get(v, []) + [k]

    sequences = dict()

    for user_id, idxs in tqdm(user_idxs.items()):
        actions = [df_dict['action'][idx] for idx in idxs]
        taus = [df_dict['event_time'][idx] for idx in idxs]
        taus = [dt.datetime.timestamp(dt.datetime.strptime(t, '%Y-%m-%d %H:%M:%S.%f')) for t in taus]
        taus = [t / max(taus) for t in taus]
        sequences[user_id] = list(zip(actions, taus))
        
    return sequences

In [11]:
sequences = get_sequences()

100%|██████████| 108646/108646 [02:21<00:00, 766.40it/s]


In [200]:
from collections import Counter
Counter([len(seq) for _, seq in sequences.items()]).most_common()

[(1, 23569),
 (2, 6988),
 (3, 3283),
 (4, 2165),
 (5, 2110),
 (6, 1733),
 (7, 1584),
 (8, 1534),
 (9, 1387),
 (11, 1338),
 (10, 1315),
 (12, 1246),
 (13, 1185),
 (14, 1136),
 (15, 1132),
 (17, 1055),
 (18, 1016),
 (19, 1009),
 (16, 1001),
 (20, 949),
 (22, 890),
 (23, 869),
 (24, 865),
 (21, 860),
 (25, 824),
 (27, 823),
 (26, 809),
 (32, 764),
 (29, 759),
 (28, 759),
 (31, 728),
 (30, 710),
 (33, 700),
 (34, 662),
 (38, 651),
 (37, 643),
 (36, 613),
 (35, 603),
 (39, 580),
 (41, 578),
 (43, 550),
 (40, 537),
 (42, 531),
 (46, 522),
 (44, 511),
 (47, 500),
 (45, 485),
 (51, 470),
 (48, 461),
 (52, 458),
 (49, 445),
 (54, 435),
 (53, 435),
 (55, 435),
 (50, 420),
 (57, 416),
 (56, 396),
 (59, 382),
 (62, 382),
 (63, 362),
 (58, 359),
 (65, 358),
 (61, 355),
 (64, 348),
 (60, 345),
 (66, 340),
 (67, 328),
 (72, 320),
 (69, 320),
 (76, 316),
 (70, 309),
 (73, 299),
 (68, 298),
 (74, 297),
 (71, 288),
 (75, 278),
 (78, 263),
 (79, 259),
 (77, 257),
 (84, 253),
 (85, 248),
 (82, 248),
 (92,

In [35]:
actions_data = Actions()

for _, seq in tqdm(sequences.items()):
    actions_data.addSequence([s[0] for s in seq])
    
actions_data.n_actions

100%|██████████| 108646/108646 [00:03<00:00, 33581.04it/s]


27

In [42]:
device = 'cpu'

def indexesFromSentence(sequence):
    return [(actions_data.action2index[action], tau) for action, tau in sequence]


def tensorFromSentence(sequence):
    results = indexesFromSentence(sequence)
    results.append((EOS_token, 1))
    indexes = [i[0] for i in results]
    taus = [i[1] for i in results]
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1), taus


def tensorsFromPair(action):
    input_tensor, tau = tensorFromSentence(action)
    #target_tensor = tensorFromSentence(action)
    target_tensor = input_tensor[:]
    return (input_tensor, target_tensor, tau)

tensorsFromPair([('session', 0), ('paid_story_view', 1)])

(tensor([[2],
         [6],
         [1]]),
 tensor([[2],
         [6],
         [1]]),
 [0, 1, 1])

In [27]:
F.sigmoid(torch.ones(2))

tensor([0.7311, 0.7311])

In [94]:
class RNNTasa(nn.Module):
    def calc_proba(self, tau, inputs):
        #print(self.theta)
        #print(self.mu)
        #print()
        #print(inputs[0].item())
        #assert 1 == 2
        #ix = actions_data.action2index(inputs)
        if len(inputs.shape) == 1:
            ix = inputs[0].item()
        else:
            ix = inputs.item()
        #print(self.mu[ix], tau)
        #print(F.sigmoid(self.theta[ix] + self.mu[ix] * tau))
        return F.sigmoid(self.theta[ix] + self.mu[ix] * tau)


class EncoderTASA(RNNTasa):
    def __init__(self, actions_count, hidden_size):
        super(EncoderTASA, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(actions_count, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
        self.theta = nn.Parameter(torch.zeros(actions_count))
        self.mu = nn.Parameter(torch.zeros(actions_count))
    
    def forward(self, inputs, hidden, tau):
        embedded = self.embedding(inputs).view(1, 1, -1)
        output = embedded
        prob = self.calc_proba(tau, inputs)
        output *= prob
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)


class DecoderTASA(RNNTasa):
    def __init__(self, hidden_size, actions_count):
        super(DecoderTASA, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(actions_count, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.lin = nn.Linear(hidden_size, actions_count)
        self.softmax = nn.LogSoftmax(dim=1)
        
        self.theta = nn.Parameter(torch.zeros(actions_count))
        self.mu = nn.Parameter(torch.zeros(actions_count))

    def forward(self, inputs, hidden, tau):
        output = self.embedding(inputs).view(1, 1, -1)
        output = F.relu(output)
        prob = self.calc_proba(tau, inputs)
        output *= prob
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.lin(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)


In [194]:
teacher_forcing_ratio = 0.5
MAX_LENGTH = max(len(seq) for _, seq in sequences.items()) + 2


def train(input_tensor, target_tensor, taus, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0
    accuracy = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden, taus[ei])
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, taus[di])
            loss += criterion(decoder_output, target_tensor[di])
            #print(decoder_output[0].argmax().numpy())
            #print(target_tensor[di][0].argmax().numpy())
            #assert 1 == 2
            accuracy += decoder_output[0].argmax().numpy() == target_tensor[di][0].numpy()
            #print(decoder_output[0].argmax().numpy(), target_tensor[di][0].numpy())
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, taus[di])
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            accuracy += decoder_output[0].argmax().numpy() == target_tensor[di][0].numpy()
            decoder_input = target_tensor[di]  # Teacher forcing

            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()
    
    if pd.isna(loss.item() / target_length):
        print(loss.item(), target_length)
        print(input_tensor, target_tensor, taus)

    return loss.item() / target_length, accuracy / target_length

In [195]:
def trainIters(encoder, decoder, n_iters, learning_rate=0.01):
    random.seed(0)

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    #training_pairs = [tensorsFromPair(random.choice(pairs))
    #                  for i in range(n_iters)]
    training_pairs = [tensorsFromPair(seq) for _, seq in sequences.items()]
    random.shuffle(training_pairs)
    criterion = nn.NLLLoss()
    epochs = 10
    
    for epoch in range(epochs):
        running_loss = 0
        running_acc = 0

        pbar = tqdm(range(len(training_pairs)))

        for idx in pbar:
            training_pair = training_pairs[idx]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]
            taus = training_pair[2]

            loss, acc = train(input_tensor, target_tensor, taus, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion)

            if idx % 5000 == 0:
                print(loss)
            running_loss += loss
            running_acc += acc

            scale_value = 1 / max(idx, 1)
            pbar.set_description(
                "Epoch: {}/{}, Loss: {:.4f}, Acc: {:.4f}".format(
                    epoch,
                    epochs,
                    running_loss * scale_value,
                    running_acc * scale_value
                )
            )
            
    return encoder, decoder

In [196]:
hidden_size = 100
encoder = EncoderTASA(actions_data.n_actions, hidden_size).to(device)
decoder = DecoderTASA(hidden_size, actions_data.n_actions).to(device)

encoder, decoder = trainIters(encoder, decoder, 110000)


Epoch: 0/10, Loss: 3.8477, Acc: 0.1411:   0%|          | 5/108646 [00:00<1:30:38, 19.98it/s]

3.329814910888672


Epoch: 0/10, Loss: 38.3292, Acc: 0.3663:   5%|▍         | 5000/108646 [08:16<2:43:35, 10.56it/s] 

8.086479187011719


Epoch: 0/10, Loss: 36.2314, Acc: 0.3878:   9%|▉         | 10004/108646 [16:21<2:35:35, 10.57it/s]

16.30416488647461


Epoch: 0/10, Loss: 39.5948, Acc: 0.4017:  14%|█▍        | 15007/108646 [24:31<2:27:57, 10.55it/s] 

27.64348856608073


Epoch: 0/10, Loss: 38.7426, Acc: 0.4182:  18%|█▊        | 20004/108646 [32:44<1:56:13, 12.71it/s] 

25.59064178466797


Epoch: 0/10, Loss: 40.4145, Acc: 0.4232:  23%|██▎       | 25004/108646 [40:55<3:33:24,  6.53it/s] 

55.995003836495535


Epoch: 0/10, Loss: 40.9928, Acc: 0.4268:  28%|██▊       | 30000/108646 [49:16<2:01:02, 10.83it/s] 

1.1920927533992653e-07


Epoch: 0/10, Loss: 42.8623, Acc: 0.4279:  32%|███▏      | 35001/108646 [57:41<3:44:29,  5.47it/s] 

162.8965637703252


Epoch: 0/10, Loss: 43.7199, Acc: 0.4291:  37%|███▋      | 40002/108646 [1:06:16<2:00:14,  9.52it/s] 

2.9802313861182483e-07


Epoch: 0/10, Loss: 42.7647, Acc: 0.4308:  41%|████▏     | 45003/108646 [1:14:13<1:42:38, 10.33it/s]

19.502217955508474


Epoch: 0/10, Loss: 42.7956, Acc: 0.4318:  46%|████▌     | 50005/108646 [1:22:41<1:09:04, 14.15it/s]

33.88120727539062


Epoch: 0/10, Loss: 42.1225, Acc: 0.4353:  51%|█████     | 55000/108646 [1:30:52<1:10:31, 12.68it/s] 

51.14049784342448


Epoch: 0/10, Loss: 41.6557, Acc: 0.4369:  55%|█████▌    | 60003/108646 [1:39:17<55:15, 14.67it/s]  

72.13900232796718


Epoch: 0/10, Loss: 42.0969, Acc: 0.4360:  60%|█████▉    | 65002/108646 [1:47:45<1:06:00, 11.02it/s]

35.153201293945315


Epoch: 0/10, Loss: 41.5223, Acc: 0.4373:  64%|██████▍   | 70001/108646 [1:56:18<54:23, 11.84it/s]  

97.16193590666118


Epoch: 0/10, Loss: 41.0034, Acc: 0.4388:  69%|██████▉   | 75004/108646 [2:04:39<50:24, 11.12it/s]  

17.450724283854168


Epoch: 0/10, Loss: 40.6049, Acc: 0.4401:  74%|███████▎  | 80001/108646 [2:13:02<1:03:09,  7.56it/s]

13.850738245412844


Epoch: 0/10, Loss: 39.9744, Acc: 0.4424:  78%|███████▊  | 85006/108646 [2:21:37<19:04, 20.66it/s]  

0.0


Epoch: 0/10, Loss: 40.6071, Acc: 0.4440:  83%|████████▎ | 90002/108646 [2:29:41<34:15,  9.07it/s]  

32.27512003580729


Epoch: 0/10, Loss: 40.3611, Acc: 0.4458:  87%|████████▋ | 95000/108646 [2:38:13<20:56, 10.86it/s]  

21.689158313679247


Epoch: 0/10, Loss: 40.0075, Acc: 0.4480:  92%|█████████▏| 100000/108646 [2:46:45<08:16, 17.40it/s] 

4.597311019897461


Epoch: 0/10, Loss: 40.0332, Acc: 0.4493:  97%|█████████▋| 105002/108646 [2:54:47<04:18, 14.11it/s]  

23.763690766834078


Epoch: 0/10, Loss: 39.9923, Acc: 0.4502: 100%|██████████| 108646/108646 [3:00:47<00:00, 10.02it/s]
Epoch: 1/10, Loss: 56.6248, Acc: 0.4697:   0%|          | 3/108646 [00:00<1:30:55, 19.91it/s] 

199.62242713341345


Epoch: 1/10, Loss: 42.3161, Acc: 0.4830:   5%|▍         | 5000/108646 [08:44<3:00:26,  9.57it/s] 

0.0


Epoch: 1/10, Loss: 36.3249, Acc: 0.4909:   9%|▉         | 10004/108646 [16:37<2:17:45, 11.93it/s]

43.801849365234375


Epoch: 1/10, Loss: 39.2452, Acc: 0.4856:  14%|█▍        | 15002/108646 [24:32<2:50:11,  9.17it/s] 

29.482566833496094


Epoch: 1/10, Loss: 38.9151, Acc: 0.4836:  18%|█▊        | 20005/108646 [32:49<2:13:38, 11.05it/s] 

18.348182678222656


Epoch: 1/10, Loss: 40.6623, Acc: 0.4807:  23%|██▎       | 25003/108646 [44:03<5:00:14,  4.64it/s] 

100.78363037109375


Epoch: 1/10, Loss: 40.2312, Acc: 0.4807:  28%|██▊       | 30000/108646 [58:04<3:06:39,  7.02it/s] 

12.061574935913086


Epoch: 1/10, Loss: 42.9771, Acc: 0.4709:  32%|███▏      | 35001/108646 [1:10:39<4:31:35,  4.52it/s] 

132.91962017276424


Epoch: 1/10, Loss: 44.0513, Acc: 0.4678:  37%|███▋      | 40002/108646 [1:21:47<2:09:55,  8.81it/s] 

22.148048400878906


Epoch: 1/10, Loss: 43.4100, Acc: 0.4659:  41%|████▏     | 45002/108646 [1:32:15<2:34:39,  6.86it/s] 

21.18081096067267


Epoch: 1/10, Loss: 43.2485, Acc: 0.4646:  46%|████▌     | 50003/108646 [1:41:02<55:25, 17.63it/s]   

61.524017333984375


Epoch: 1/10, Loss: 42.4693, Acc: 0.4627:  51%|█████     | 55000/108646 [1:48:54<1:06:13, 13.50it/s]

43.80401611328125


Epoch: 1/10, Loss: 42.1906, Acc: 0.4617:  55%|█████▌    | 60004/108646 [1:57:30<48:03, 16.87it/s]   

98.86615175189394


Epoch: 1/10, Loss: 42.5186, Acc: 0.4607:  60%|█████▉    | 65001/108646 [2:06:13<1:27:24,  8.32it/s]

148.0573486328125


Epoch: 1/10, Loss: 42.4210, Acc: 0.4607:  64%|██████▍   | 70001/108646 [2:14:42<53:27, 12.05it/s]  

72.0745913856908


Epoch: 1/10, Loss: 41.9748, Acc: 0.4623:  69%|██████▉   | 75003/108646 [2:23:49<54:30, 10.29it/s]   

0.17801652352015176


Epoch: 1/10, Loss: 41.7110, Acc: 0.4629:  74%|███████▎  | 80000/108646 [2:32:50<26:14, 18.19it/s]  

15.873248459002294


Epoch: 1/10, Loss: 41.2439, Acc: 0.4648:  78%|███████▊  | 84970/108646 [2:41:15<44:56,  8.78it/s]  


KeyboardInterrupt: 

In [162]:
max(actions_data.action2count.items(), key=lambda x: x[1])[1] / sum(x[1] for x in actions_data.action2count.items())

0.3539202407143635

In [183]:
encoder(torch.tensor([1]), hidden=torch.tensor([[[0.]*100]]), tau=1)

(tensor([[[-1.9480e-02, -1.0658e-01, -1.2088e-01,  5.3807e-02,  1.5843e-01,
           -9.8912e-02, -3.1942e-02, -1.4115e-01,  5.2331e-02, -2.4757e-01,
           -1.2114e-01,  2.6260e-01, -5.8325e-02, -1.3696e-01,  1.3468e-01,
           -1.3414e-01, -3.6619e-01, -2.8182e-01, -1.2632e-01,  5.4148e-02,
            1.2105e-01, -3.5259e-04,  8.2975e-02,  3.9645e-03, -5.4616e-02,
           -1.0495e-01,  3.9120e-01,  2.2627e-01,  1.1876e-01, -7.7026e-02,
           -1.4068e-01,  1.1553e-01,  3.4097e-02,  6.7734e-02, -8.5112e-03,
            1.7252e-01, -6.1545e-02,  1.9534e-01, -6.5053e-02, -1.3920e-01,
           -4.9069e-02, -6.9064e-02,  2.2285e-01,  1.1560e-01,  1.7960e-01,
           -1.7477e-01, -3.5469e-02, -3.2298e-01, -1.3649e-01,  1.4075e-02,
           -1.1299e-01,  6.6542e-02, -2.2588e-01, -3.8220e-03,  3.2117e-02,
           -6.6468e-02, -4.6543e-02, -5.9039e-02,  2.4116e-01, -1.9042e-02,
           -1.1231e-01,  6.0434e-02,  1.3858e-01, -1.9344e-01, -2.6559e-01,
           -

In [189]:
list(p for p in encoder.parameters())

[Parameter containing:
 tensor([ 0.0000e+00,  3.9442e+05,  8.4693e+00, -1.1857e+01,  2.6935e+01,
         -1.1951e+02, -4.4665e+04,  2.9790e+00, -7.5319e+04,  1.8509e+00,
         -3.7516e+02,  3.9770e+00, -5.4403e+00,  6.3962e+02, -8.0507e+01,
         -2.1699e+00,  4.0143e+00,  8.6433e+00,  1.3554e+01, -1.5784e+00,
         -1.3280e+01, -1.5167e-01, -9.3447e-02, -2.8740e-01,  2.5208e-06,
          1.2130e-26, -1.3107e+06], requires_grad=True),
 Parameter containing:
 tensor([ 0.0000e+00,  3.9442e+05,  8.4688e+00, -1.1851e+01,  2.6921e+01,
         -1.1949e+02, -4.4665e+04,  2.9778e+00, -7.5319e+04,  1.8503e+00,
         -3.7515e+02,  3.9766e+00, -5.4400e+00,  6.3961e+02, -8.0505e+01,
         -2.1694e+00,  4.0142e+00,  8.6433e+00,  1.3554e+01, -1.5783e+00,
         -1.3280e+01, -1.5160e-01, -9.3441e-02, -2.8740e-01,  2.5231e-06,
          1.2126e-26, -1.3107e+06], requires_grad=True),
 Parameter containing:
 tensor([[ 5.3291e-02,  9.0535e-02, -6.1772e-01,  ..., -1.6330e+00,
         