### testing

In [1]:
import pandas as pd
import datetime
import numpy as  np
from tqdm import tqdm
from scipy.sparse import csr_matrix
np.random.seed(1337)
with open('./kaggle/rating_train.csv', 'r') as f:
    ls = f.readlines()[1:]
u_map = {}

dates = []
foods = []
users = []
    


with tqdm(total=len(ls)) as pbar:
    for l in ls:
        date_str, user, food = l.strip().split(',')
        date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
        user, food = int(user), int(food)
        if user not in u_map:
            u_map[user] = []
        u_map[user].append( (date, food) )
        
        dates.append(date)
        users.append(user)
        foods.append(food)
        pbar.update(1)
        

user_map = {u:i for i, u in enumerate(set(users))}        
food_map = {f:i for i, f in enumerate(set(foods))}


# for ranking sparse matrix
rows = [user_map[u] for u in users]
cols = [food_map[f] for f in foods]
R = csr_matrix((np.ones([len(rows), ]), (rows, cols)), shape=(len(user_map), len(food_map)))

pos_count = np.array(np.sum(R, axis=0)).flatten()
neg_count = len(ls) - pos_count

class_weight =  1. / pos_count
final_weight = neg_count*class_weight
pos_weight = neg_count / pos_count
print R.shape
print neg_count.shape
print pos_count.shape

100%|██████████| 2681494/2681494 [00:20<00:00, 131350.01it/s]


(2608, 5532)
(5532,)
(5532,)


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.init import xavier_normal_
# construct neuron network

def scaled_dot_attention(Q, K, V, mask):
    assert Q.size()[-1] == K.size()[-1]
    assert len(Q.size()) == 3 and len(K.size()) == 3 and len(V.size()) == 3
    dk = torch.tensor(K.size()[-1], dtype=torch.float32, requires_grad=False).cuda()
    out = torch.matmul(Q,K.permute(0,2,1)) / torch.sqrt(dk) 
    if mask is not None:
        out.masked_fill_(mask, -float('inf'))
    return torch.matmul(F.softmax(out, dim=-1), V)

def positional_encoding(d_model, pos):
    assert d_model % 2 == 0
    pos = torch.tensor(pos, dtype=torch.float32, requires_grad=False)
    pe = torch.zeros([1,d_model], dtype=torch.float32, requires_grad=False)
    for i in range(D_MODEL//2):
        a = torch.tensor(10000, dtype=torch.float32, requires_grad=False)
        b = torch.tensor(2.*i/float(D_MODEL), dtype=torch.float32, requires_grad=False)
        c = pos / torch.pow(a, b)
        pe[0, 2*i] = torch.sin(c)
        pe[0, 2*i+1] = torch.cos(c)
    return pe
                            
class Transformer_v2(nn.Module):

    def __init__(self, layer_num, dk, dv, dm, h, p_drop, d_ff, use_mask, use_cuda=True, posi_cache_length=200):
        super(Transformer_v2, self).__init__()
#         for construct cache positional encoding matrix.
        self.d_model = dm
        self.use_cuda = use_cuda
        
        self.decoder = Stack_Decoder(layer_num, dk, dv, dm, h, p_drop, d_ff, use_mask)
        self.emb_drop = nn.Dropout(p_drop)
        self.init_pos_mat(posi_cache_length)

    def forward(self, Q):
    
        
#         decoder
        batch, Q_len, d = Q.size()
        
        try:
            Q = Q + self.get_pos_mat(Q_len)
        except RuntimeError, e:
            if e.message == 'TensorIterator expected type torch.cuda.FloatTensor but got torch.FloatTensor':
                if Q.is_cuda != self.get_pos_mat(K_len).is_cuda:
                    print('Make sure cache positional matrix is same type of tensor with input, both cuda tensor or not.\nBy setting argument use_cuda=True to set cache positional encoding matrix as a cuda tensor.')
            raise
        
        Q = self.emb_drop(Q)
        
        de_out = self.decoder(Q)
        return de_out
    
#     To speed up the positional encoding by construct an cache matrix. 
    def init_pos_mat(self, cache_length):
        print('init postional matrix with length : %d ' % cache_length)
        self.positional_matrix = torch.cat([positional_encoding(self.d_model, i) for i in range(0,cache_length)], dim=0)
        self.positional_matrix.requires_grad = False
        if self.use_cuda:
            self.positional_matrix = self.positional_matrix.cuda()
            
        
    def get_pos_mat(self, length):
        if length > self.positional_matrix.shape[0]:
            print('input sequence length reach positional matrix maximum length. %d ' % length)
            ret = torch.cat([positional_encoding(self.d_model, i) for i in range(length)], dim=0)
            ret.requires_grad = False
            print('Increase positional matrix maximum length. %d ' % length)
            self.positional_matrix = ret
            if self.use_cuda:
                self.positional_matrix = self.positional_matrix.cuda()
            return ret
        else:
            return self.positional_matrix[:length]
        

    
    

class Stack_Decoder(nn.Module):
    """
    Stacked Decoder
    """
    def __init__(self, layer_num, dk, dv, dm, h, p_drop, d_ff, use_mask):
        super(Stack_Decoder, self).__init__()
        self.decoders = nn.ModuleList([Decoder(dk, dv, dm, h, p_drop, d_ff, use_mask) for i in range(layer_num)])
        
        
    def forward(self, Q):
        # ModuleList can act as an iterable, or be indexed using ints
        for lay in self.decoders:
            Q = lay(Q)
        return Q           

class Decoder(nn.Module):
    def __init__(self, dk, dv, dm, h, p_drop, d_ff, use_mask):
        super(Decoder, self).__init__()
        self.use_mask = use_mask
        
#         query attention residual block
        self.Q_attention_lay = Multi_Head_attention_layer(dk, dv, dm, h)
        self.Q_attention_norm_lay = nn.LayerNorm([dm, ])
        self.Q_att_drop = nn.Dropout(p_drop)

#         feed forward residual block
        self.fcn = PositionwiseFeedForward(dm, d_ff)
        self.ff_norm_lay = nn.LayerNorm([dm, ])
        self.linear_drop = nn.Dropout(p_drop)
        

    def forward(self, Q):
        if self.use_mask:
            batch, Q_len, d = Q.size()
            mask = self.mask_matrix(batch, Q_len)
        else:
            mask = None
#         query attention
        Q_attention_out = self.Q_attention_lay(Q, Q, Q, mask=mask)
        Q_attention_out = self.Q_att_drop(Q_attention_out)
        Q_att_out = self.Q_attention_norm_lay(Q + Q_attention_out)
        
#         feed forward
        linear_out = self.fcn(Q_att_out)
        out = self.ff_norm_lay(Q_att_out + linear_out)
        return out
    def mask_matrix(self, batch, Q_len):
#         ByteTensor
        mask = torch.zeros([1, Q_len, Q_len], dtype=torch.uint8, requires_grad=False)
        for i in range(Q_len):
            mask[0,i,i+1:] = 1
        return mask.repeat(batch,1, 1).cuda()


class Multi_Head_attention_layer(nn.Module):
    def __init__(self, dk, dv, dm, h):
        super(Multi_Head_attention_layer, self).__init__()
        self.Q_linears = nn.ModuleList([nn.Linear(dm, dk) for i in range(h)])
        self.K_linears = nn.ModuleList([nn.Linear(dm, dk) for i in range(h)])
        self.V_linears = nn.ModuleList([nn.Linear(dm, dv) for i in range(h)])
        self.output_linear = nn.Linear(h*dv, dm)
                            

    def forward(self, Q_input, K_input, V_input, mask):
        buf = []
        for Q_linear, K_linear, V_linear in zip(self.Q_linears, self.K_linears, self.V_linears):
            Q = Q_linear(Q_input)
            K = K_linear(K_input)
            V = V_linear(V_input)
            buf.append(scaled_dot_attention(Q, K, V, mask))
        
        buf = torch.cat(buf,dim=-1)
        out = self.output_linear(buf)
        
        return out      
class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(PositionwiseFeedForward, self).__init__()
        self.cnn1 = nn.Conv1d(d_model, d_ff, 1)
        self.cnn2 = nn.Conv1d(d_ff, d_model, 1)
                            

    def forward(self, x):
        bat,seq_len,d = x.size()
        x = x.permute(0,2,1)
        x = self.cnn1(x)
        x = F.relu(x)
        x = self.cnn2(x)
        x = x.permute(0,2,1)
        
        return x      
    

    
    
# Transformer paper baseline hyper-parameters
STACKED_NUM = 2
H = 4
D_MODEL = 128
DK = DV = D_MODEL//H
P_DROP = 0.05
D_FF = D_MODEL*4




    
# bat = 3
# Q = torch.rand([bat, 13, D_MODEL]).cuda()
# model = Transformer_v2(STACKED_NUM, DK, DV, D_MODEL, H, P_DROP, D_FF, use_mask=True, use_cuda=True).cuda()
# o = model(Q)
# print(o.size())

# Q = torch.rand([bat, 47, D_MODEL]).cuda()
# o = model(Q)
# print(o.size())
# # # print o
# def count_parameters(model):
#     return sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(count_parameters(model))

import torch
import torch.nn as nn
import torch.nn.functional as F### Transformer with ALS embedding Training
# import Transformer/

import numpy as np
from constants import FOOD_NUM, USER_NUM
class Net(nn.Module):

    def __init__(self, dm, p_drop):
        super(Net, self).__init__()
        self.drop = nn.Dropout(p_drop)
        self.food_emb = Food_embedding(FOOD_NUM, dm, 1, p_drop)
        self.transformer = Transformer_v2(STACKED_NUM, DK, DV, D_MODEL, H, P_DROP, D_FF, use_mask=True, use_cuda=True).cuda()

        self.output_linear = nn.Linear(dm, FOOD_NUM)

    def forward(self, history):
#         print(K.size(), get_pos_mat(MAX_SEQUENCE_LENGTH).size())
        
        x = self.food_emb(history)
        batch, x_len, d = x.size()
        
        x = self.transformer(x)
        x = self.output_linear(x)
#         x = torch.sigmoid(x)
        return x
        
class Food_embedding(nn.Module):
    def __init__(self, c_in, dm, layer_num, p_drop, activation_fn=F.selu):
        super(Food_embedding, self).__init__()
        self.activation_fn = activation_fn
        self.drop = nn.Dropout(p_drop)
        assert layer_num >= 1
        self.first_linear = nn.Linear(c_in, dm)
        self.linears = nn.ModuleList([nn.Linear(dm, dm) for i in range(layer_num-1)])
        

    def forward(self, x):
#         print(K.size(), get_pos_mat(MAX_SEQUENCE_LENGTH).size())
        x = self.first_linear(x)
        for lay in self.linears:
            x = self.activation_fn(lay(x))
            if lay != self.linears[-1]:
                x = self.drop(x)
        return x
    
# batch = 7
# dm = D_MODEL
# Q = torch.rand([batch, 18, FOOD_NUM]).cuda()
# model = Net(dm, 0.1).cuda()
# o = model(Q)
# # print t
# print(o.size())
# # print o

# def count_parameters(model):
#     return sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(count_parameters(model))




### Find best threshold

In [3]:
import torch
from constants import MAX_SEQ_LEN

model = torch.load('./best.pt')
model = model.cuda()
model.eval()
outputs = []
labels = []
rev_food_map = {v:k for k,v in food_map.items()}
with torch.no_grad():
    with tqdm(total=len(u_map)) as pbar:
        for user in u_map.keys():
            x = np.zeros([MAX_SEQ_LEN, len(food_map)])
            history = u_map[user]
            ds = np.array([d for d,f in history])
            fs = np.array([f for d,f in history])
            sorted_idx = np.argsort(ds)
            ds = ds[sorted_idx]
            fd = fs[sorted_idx]

            date_idx = 0
            now_date = ds[0]
            for food, date in zip(fs,ds):
                if date != now_date:
                    date_idx+=1
                    now_date = date
                x[date_idx, food_map[food]] = 1
            x = torch.FloatTensor(x).unsqueeze(0).cuda()
            out = torch.sigmoid(model(x))
            period = 10
            outputs.append(out[0,date_idx-period:date_idx,:].cpu().numpy())
            labels.append(x[0,date_idx-period+1:date_idx+1,:].cpu().numpy())
#             outputs.append(out[0,date_idx-period:date_idx,:])
#             labels.append(x[0,date_idx-period+1:date_idx+1,:])
            pbar.update(1)
print 'done'
outputs = np.vstack(outputs)
labels = np.vstack(labels)
# 
outputs = torch.FloatTensor(np.vstack(outputs)).cuda()
labels = torch.FloatTensor(np.vstack(labels)).cuda()

# 
best_threshold =[]
accs = []
with tqdm(total=len(food_map)) as pbar:
    for i in range(len(food_map)):
        o = outputs[:,i]
        l = labels[:,i].type(torch.uint8)
        best_acc = 0
        best_t = 0
        for t in np.linspace(0,1, 100):
#         for t in set(o):
            a = o > float(t)
            acc = torch.sum(a == l).item() / float(outputs.shape[0])
            if acc > best_acc:
                best_acc = acc
                best_t = t
        best_threshold.append(best_t)
        accs.append(best_acc)
        pbar.update(1)
best_threshold = torch.FloatTensor(best_threshold)
print best_threshold[:100]
print accs[:100]
    
# for test
import torch
from constants import MAX_SEQ_LEN

tt = best_threshold.cuda()
with torch.no_grad():
    for user in u_map.keys():
        x = np.zeros([MAX_SEQ_LEN, len(food_map)])
        history = u_map[user]
        ds = np.array([d for d,f in history])
        fs = np.array([f for d,f in history])
        sorted_idx = np.argsort(ds)
        ds = ds[sorted_idx]
        fd = fs[sorted_idx]

        date_idx = 0
        now_date = ds[0]
        for food, date in zip(fs,ds):
            if date != now_date:
                date_idx+=1
                now_date = date
            x[date_idx, food_map[food]] = 1
        x = torch.FloatTensor(x).unsqueeze(0).cuda()
        out = torch.sigmoid(model(x))
        
        for i in range(date_idx-10, date_idx+1, 1):
            a = out[0,i,:]
            b = x[0,i+1,:]
            print torch.sum(a>tt), torch.sum(a>0.5), torch.sum(b)
        break

            



100%|██████████| 2608/2608 [00:23<00:00, 109.33it/s]


done


100%|██████████| 5532/5532 [00:30<00:00, 179.22it/s]

tensor([0.7071, 0.5657, 0.5051, 0.5253, 0.5455, 0.5556, 0.2323, 0.3535, 0.5960,
        0.3939, 0.5556, 0.5051, 0.4747, 0.8081, 0.5455, 0.4141, 0.7475, 0.3939,
        0.5051, 0.5152, 0.3131, 0.3131, 0.5657, 0.3333, 0.4141, 0.4949, 0.0606,
        0.7172, 0.5455, 0.3131, 0.2222, 0.7273, 0.4747, 0.5960, 0.5051, 0.3535,
        0.3030, 0.2626, 0.4848, 0.5051, 0.1212, 0.4343, 0.4747, 0.3333, 0.5859,
        0.4747, 0.2424, 0.4949, 0.5152, 0.1212, 0.1212, 0.2525, 0.2626, 0.5758,
        0.6869, 0.2222, 0.5152, 0.1818, 0.3535, 0.3333, 0.7071, 0.5253, 0.5152,
        0.7980, 0.2828, 0.5253, 0.0606, 0.4848, 0.5657, 0.5657, 0.1919, 0.1414,
        0.3131, 0.6566, 0.2626, 0.3939, 0.3535, 0.2323, 0.2424, 0.2525, 0.4848,
        0.4949, 0.6162, 0.4141, 0.7677, 0.1313, 0.3838, 0.4141, 0.4545, 0.4646,
        0.4747, 0.3333, 0.0808, 0.5354, 0.3737, 0.3333, 0.4141, 0.4646, 0.6061,
        0.5354])
[0.9828220858895705, 0.9512653374233129, 0.9477760736196319, 0.9883819018404908, 0.9695935582822086, 0.




In [17]:
import torch
from constants import MAX_SEQ_LEN

tt = best_threshold.cuda()
with torch.no_grad():
    for user in u_map.keys():
        x = np.zeros([MAX_SEQ_LEN, len(food_map)])
        history = u_map[user]
        ds = np.array([d for d,f in history])
        fs = np.array([f for d,f in history])
        sorted_idx = np.argsort(ds)
        ds = ds[sorted_idx]
        fd = fs[sorted_idx]

        date_idx = 0
        now_date = ds[0]
        for food, date in zip(fs,ds):
            if date != now_date:
                date_idx+=1
                now_date = date
            x[date_idx, food_map[food]] = 1
        x = torch.FloatTensor(x).unsqueeze(0).cuda()
        out = torch.sigmoid(model(x))
        print date_idx, out.shape, type(date_idx), out[0, date_idx,:].shape
        arr = out[0, date_idx, :].flatten()
        k20 = [ i.item() for i in reversed(torch.argsort(arr)[-20:])]
        
        c = arr > 0.5
        idxs =[ i.item() for i in torch.argsort(c)[-int(torch.sum(c)):]]
        print idxs
        print k20
        for i in idxs:
            assert i in k20
        for i in k20:
            print i, R[user_map[user], i]
        
#         for i in range(date_idx-10, date_idx+1, 1):
#             a = out[0,i,:]
#             b = x[0,i+1,:]
#             print torch.sum(a>tt), torch.sum(a>0.5),torch.sum(b)
        break

            


100 torch.Size([1, 165, 5532]) <type 'int'> torch.Size([5532])
[19, 34, 110]
[110, 34, 19, 139, 107, 39, 80, 228, 25, 2, 105, 530, 217, 18, 224, 384, 173, 102, 166, 441]
110 89.0
34 80.0
19 15.0
139 22.0
107 35.0
39 93.0
80 31.0
228 2.0
25 65.0
2 35.0
105 6.0
530 27.0
217 21.0
18 42.0
224 0.0
384 0.0
173 6.0
102 19.0
166 3.0
441 9.0


In [6]:
c = a > tt
print c
idxs = torch.argsort(c)[-int(torch.sum(c)):]
print idxs
d = torch.FloatTensor([19,34])
print d in idxs  

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0', dtype=torch.uint8)
tensor([ 19,  34, 110], device='cuda:0')


RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'other'

In [8]:
import torch
from constants import MAX_SEQ_LEN

model = torch.load('./best-mse.pt')
model = model.cuda()
model.eval()
rev_food_map = {v:k for k,v in food_map.items()}
with torch.no_grad():
    with open('predict.csv', 'w') as f_out:
        f_out.write('userid,foodid\n')

        with tqdm(total=len(u_map)) as pbar:
            for user in u_map.keys():
                x = np.zeros([MAX_SEQ_LEN, len(food_map)])
                history = u_map[user]
                ds = np.array([d for d,f in history])
                fs = np.array([f for d,f in history])
                sorted_idx = np.argsort(ds)
                ds = ds[sorted_idx]
                fd = fs[sorted_idx]

                date_idx = 0
                now_date = ds[0]
                for food, date in zip(fs,ds):
                    if date != now_date:
                        date_idx+=1
                        now_date = date
                    x[date_idx, food_map[food]] = 1
                x = torch.FloatTensor(x).unsqueeze(0).cuda()
#                 out = torch.sigmoid(model(x))
                out = model(x)

                arr = out[0,date_idx,:].flatten()
                k20 = reversed(torch.argsort(arr)[-20:])
                s = ''
                for food_idx in k20:
                    s += ' %d' % rev_food_map[food_idx.item()]
                f_out.write('%d,%s\n' % (user, s) )

                pbar.update(1)
t = 1
buf = out[0,date_idx,:]>t
print 'last time' ,torch.sum(buf).item()

buf = out[0,date_idx-1,:]>t
buf2 = x[0,date_idx-1,:]>t
print 'pre_time',torch.sum(buf2).item(), torch.sum(buf).item()
print 'done'

    
            


100%|██████████| 2608/2608 [00:31<00:00, 82.18it/s]

last time 0
pre_time 0 0
done





In [5]:
import numpy as np
from collections import deque

q = deque(maxlen=10)

for i in range(10):
    q.append(i)
print q,np.mean(q)
for i in range(5):
    q.append(float('nan'))
print q, np.mean(q)
for i in range(10):
    q.append(i)
    print q, np.mean(q)



deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10) 4.5
deque([5, 6, 7, 8, 9, nan, nan, nan, nan, nan], maxlen=10) nan
deque([6, 7, 8, 9, nan, nan, nan, nan, nan, 0], maxlen=10) nan
deque([7, 8, 9, nan, nan, nan, nan, nan, 0, 1], maxlen=10) nan
deque([8, 9, nan, nan, nan, nan, nan, 0, 1, 2], maxlen=10) nan
deque([9, nan, nan, nan, nan, nan, 0, 1, 2, 3], maxlen=10) nan
deque([nan, nan, nan, nan, nan, 0, 1, 2, 3, 4], maxlen=10) nan
deque([nan, nan, nan, nan, 0, 1, 2, 3, 4, 5], maxlen=10) nan
deque([nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], maxlen=10) nan
deque([nan, nan, 0, 1, 2, 3, 4, 5, 6, 7], maxlen=10) nan
deque([nan, 0, 1, 2, 3, 4, 5, 6, 7, 8], maxlen=10) nan
deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10) 4.5


In [47]:
for i in range(date_idx):
    pred = out[0,i,:] > 0.
    label = x[0,i+1,:] 
    print 'pre_time',i , torch.sum(pred).item(), torch.sum(label).item()
# print torch.sigmoid(out[0,12,10:50])
# print out[0,13,13]
# print date_idx
# torch.sum(x[0, date_idx+1, :])

pre_time 0 13 9
pre_time 1 13 4
pre_time 2 9 3
pre_time 3 7 11
pre_time 4 11 9
pre_time 5 13 6
pre_time 6 7 11
pre_time 7 10 1
pre_time 8 7 5
pre_time 9 10 6
pre_time 10 9 9
pre_time 11 13 2
pre_time 12 10 5
pre_time 13 11 1
pre_time 14 11 10
pre_time 15 12 11
pre_time 16 11 11
pre_time 17 9 6
pre_time 18 10 7
pre_time 19 7 3
pre_time 20 3 1
pre_time 21 2 2
pre_time 22 5 5
pre_time 23 1 6
pre_time 24 3 8
pre_time 25 4 6
pre_time 26 6 2
pre_time 27 5 6
pre_time 28 5 6
pre_time 29 7 7
pre_time 30 9 6
pre_time 31 7 6
pre_time 32 9 4
pre_time 33 13 7
pre_time 34 9 7
pre_time 35 11 10
pre_time 36 11 8
pre_time 37 11 3
pre_time 38 5 3
pre_time 39 4 1
pre_time 40 5 10
pre_time 41 9 4
pre_time 42 8 8
pre_time 43 9 3
pre_time 44 7 8
pre_time 45 11 8
pre_time 46 10 5
pre_time 47 6 3
pre_time 48 9 8
pre_time 49 11 7
pre_time 50 8 5
pre_time 51 4 7
pre_time 52 4 6
pre_time 53 5 8
pre_time 54 3 4
pre_time 55 14 10
pre_time 56 19 9
pre_time 57 9 2
pre_time 58 5 4
pre_time 59 5 5
pre_time 60 4 4
pre_

In [49]:
import torch
from constants import MAX_SEQ_LEN

model = torch.load('./best-light.pt')
model = model.cuda()
model.eval()
rev_food_map = {v:k for k,v in food_map.items()}
with torch.no_grad():

    for user in u_map.keys():
        x = np.zeros([MAX_SEQ_LEN, len(food_map)])
        history = u_map[user]
        ds = np.array([d for d,f in history])
        fs = np.array([f for d,f in history])
        sorted_idx = np.argsort(ds)
        ds = ds[sorted_idx]
        fd = fs[sorted_idx]

        date_idx = 0
        now_date = ds[0]
        for food, date in zip(fs,ds):
            if date != now_date:
                date_idx+=1
                now_date = date
            x[date_idx, food_map[food]] = 1
        x = torch.FloatTensor(x).unsqueeze(0).cuda()
        out = model(x)

        hit_idx = date_idx-1
#         hit_idx = 3
        arr = out.cpu().numpy()[0,hit_idx,:]
        pred = out[0,hit_idx,:] > 0.5
        
        ans = x[0,hit_idx+1,:]
        break
                

print 'done'
print pred.shape
print torch.sum(pred), torch.sum(ans)
buf = ans.type(torch.uint8) == pred
print float(torch.sum(buf)) / float(5532)
            


done
torch.Size([5532])
tensor(639, device='cuda:0') tensor(16., device='cuda:0')
0.887020968908


In [17]:
from constants import MAX_SEQ_LEN


idx = np.random.permutation(len(u_map))
val_num = len(u_map)//10
train_idx, val_idx = idx[val_num:], idx[:val_num]
train_u_map = {k:u_map[k] for k in u_map.keys()[val_num:]}
val_u_map = {k:u_map[k] for k in u_map.keys()[:val_num]}
def batch_boostrap_generator(batch_size, u_map, food_map, max_history_len):
    G = boostrap_generator(u_map, food_map, max_history_len)
    while True:
        X = []
        pad_masks = []
        for i in range(batch_size):
            x, x_len = next(G)
            X.append(np.expand_dims(x, axis=0))
            pad_mask = np.zeros_like(x)
            for idx in range(x_len):
                pad_mask[idx, :] = (max_history_len-x_len+idx+1)*0.3
            pad_masks.append(np.expand_dims(pad_mask, axis=0))
        yield np.vstack(X), np.vstack(pad_masks)
def boostrap_generator(u_map, food_map, max_history_len):
    while True:
        keys = u_map.keys()
        for user_idx in np.random.permutation(len(u_map)):
            user = keys[user_idx]
            X = np.zeros([max_history_len, len(food_map)])
            history = u_map[user]
            ds = np.array([d for d,f in history])
            fs = np.array([f for d,f in history])
            sorted_idx = np.argsort(ds)
            ds = ds[sorted_idx]
            fd = fs[sorted_idx]
            
            date_idx = 0
            now_date = ds[0]
            for food, date in zip(fs,ds):
                if date != now_date:
                    date_idx+=1
                    now_date = date
                X[date_idx, food_map[food]] = 1
            x_len = date_idx+1
            yield X, x_len
            
    

G = batch_boostrap_generator(32, train_u_map, food_map, max_history_len=MAX_SEQ_LEN)
val_G = batch_boostrap_generator(32//2, val_u_map, food_map, max_history_len=MAX_SEQ_LEN)

x, pad_mask = next(G)
print x.shape, pad_mask.shape
x, pad_mask = next(val_G)
print x.shape, pad_mask.shape

G2 = boostrap_generator(train_u_map, food_map, max_history_len=MAX_SEQ_LEN)
x, x_len = next(G2)
print x.shape
print pad_mask[3,:,3]

(32, 165, 5532) (32, 165, 5532)
(16, 165, 5532) (16, 165, 5532)
(165, 5532)
[31.8 32.1 32.4 32.7 33.  33.3 33.6 33.9 34.2 34.5 34.8 35.1 35.4 35.7
 36.  36.3 36.6 36.9 37.2 37.5 37.8 38.1 38.4 38.7 39.  39.3 39.6 39.9
 40.2 40.5 40.8 41.1 41.4 41.7 42.  42.3 42.6 42.9 43.2 43.5 43.8 44.1
 44.4 44.7 45.  45.3 45.6 45.9 46.2 46.5 46.8 47.1 47.4 47.7 48.  48.3
 48.6 48.9 49.2 49.5  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]


In [38]:
from collections import deque
from tqdm import tqdm as tqdm

def normal_acc(pred, label, pad_mask):
    label = label.type(torch.uint8)
    buf = pred == label
    mask = torch.ones_like(pad_mask, dtype=torch.uint8)
    buf = buf*mask
    buf = buf.masked_select(label)
    
    acc = torch.sum(buf).item() / float(torch.sum(label).item())
    
    return acc
def rev_mask(m):
    out = torch.ones_like(m, dtype=torch.uint8, requires_grad=False)
    out.masked_fill_(m, 0)
    return out
        
val_acc_q = deque(maxlen=10000)
val_loss_q = deque(maxlen=10000)

batch_size = 16
val_G = batch_boostrap_generator(batch_size, val_u_map, food_map, max_history_len=MAX_SEQ_LEN)
print 'start testing.'
iters = 100000000
with tqdm(total=iters) as pbar:
    for it in range(iters):
        with torch.no_grad():
            model.eval()
            seq, pad_mask = next(val_G)
            seq = torch.FloatTensor(seq).cuda()
            pad_mask = torch.FloatTensor(pad_mask).cuda()
            seq.requires_grad_(False)
            pad_mask.requires_grad_(False)

            x = seq[:,:-1,:]
            y = seq[:,1:,:]
            output = model(x)
            pred = output > 0.5

            label = y

            val_acc = normal_acc(pred, label, pad_mask[:,1:,:])
            val_acc_q.append(val_acc)
        val_acc = np.mean(val_acc_q)

        pbar.set_postfix_str('val_acc : %.3f' % (val_acc), refresh=False)
        pbar.update(batch_size)

# Train model
print("Optimization Finished!")
# print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

  0%|          | 0/100000000 [00:00<?, ?it/s]

start testing.


  0%|          | 144/100000000 [00:04<611:27:12, 45.43it/s, val_acc : 0.820]


KeyboardInterrupt: 

In [39]:
print pred.shape
print label.shape
p = pred[0,:,:]
l = label[0,:,:]
print p.shape
print l.shape
a = p[33,:]
b = l[33,:].type(torch.uint8)
print a
print b
print a==b
print torch.sum(a == b).item() / float(5532)
print a.shape
print torch.sum(a), torch.sum(b)

torch.Size([16, 164, 5532])
torch.Size([16, 164, 5532])
torch.Size([164, 5532])
torch.Size([164, 5532])
tensor([1, 1, 1,  ..., 0, 0, 0], device='cuda:0', dtype=torch.uint8)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0', dtype=torch.uint8)
tensor([0, 0, 0,  ..., 1, 1, 1], device='cuda:0', dtype=torch.uint8)
0.805676066522
torch.Size([5532])
tensor(1096, device='cuda:0') tensor(23, device='cuda:0')
