### testing

In [1]:
import pandas as pd
import datetime
import numpy as  np
from tqdm import tqdm
from scipy.sparse import csr_matrix
np.random.seed(1337)
with open('./kaggle/rating_train.csv', 'r') as f:
    ls = f.readlines()[1:]
u_map = {}

dates = []
foods = []
users = []
    


with tqdm(total=len(ls)) as pbar:
    for l in ls:
        date_str, user, food = l.strip().split(',')
        date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
        user, food = int(user), int(food)
        if user not in u_map:
            u_map[user] = []
        u_map[user].append( (date, food) )
        
        dates.append(date)
        users.append(user)
        foods.append(food)
        pbar.update(1)
        

user_map = {u:i for i, u in enumerate(set(users))}        
food_map = {f:i for i, f in enumerate(set(foods))}


# for ranking sparse matrix
rows = [user_map[u] for u in users]
cols = [food_map[f] for f in foods]
R = csr_matrix((np.ones([len(rows), ]), (rows, cols)), shape=(len(user_map), len(food_map)))

pos_count = np.array(np.sum(R, axis=0)).flatten()
neg_count = len(ls) - pos_count

class_weight =  1. / pos_count
final_weight = neg_count*class_weight
pos_weight = neg_count / pos_count
print R.shape
print neg_count.shape
print pos_count.shape

100%|██████████| 2681494/2681494 [00:20<00:00, 131026.99it/s]


(2608, 5532)
(5532,)
(5532,)


### hybrid

In [2]:
import pandas as pd
import numpy as np
import keras
import os
from constants import MAX_TEXT_SEQ_LEN, MAX_NUM_WORDS, EMBEDDING_DIM
from keras.preprocessing.sequence import pad_sequences



csv = pd.read_csv('./kaggle/user.csv')
print csv.columns
# print 'userid,username,age,gender,location,city,state,title,about_me,reasons,inspirations,friends_count'
# for csv.iterrows
texts = []
id_list = []
age_list = []
gender_list = []
print 'Starting read texts.'
for row in csv.iterrows():
    r = row[1]
    s = ''
    s += r['about_me'] if not pd.isnull(r['about_me']) else ''
    s += r['reasons'] if not pd.isnull(r['reasons']) else ''
    s += r['inspirations'] if not pd.isnull(r['inspirations']) else ''
    id_list.append(r['userid'])
    age_list.append(r['age'])
    gender_list.append(r['gender'])
    texts.append(s)

# normalize age
valid_age_list = [age for age in age_list if not np.isnan(age)]
m, std = np.mean(valid_age_list), np.std(valid_age_list)
for i,age in enumerate(age_list):
    if not np.isnan(age):
        age_list[i] = float(age-m) / std
    else:
        age_list[i] = 0
buf = [len(s) for s in texts]
print np.max(buf), np.mean(buf), np.std(buf)

tokenizer = keras.preprocessing.text.Tokenizer(num_words=MAX_NUM_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ',
                                   lower=True, split=' ', char_level=False, oov_token=None)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
sequences = tokenizer.texts_to_sequences(texts)
pad_data = data = pad_sequences(sequences, maxlen=MAX_TEXT_SEQ_LEN, padding='post', truncating='post')


print('Preparing embedding matrix.')
# 
userid_map = {user:i for i, user in enumerate(id_list)}
def get_user_feature_fn(userid):
    idx = userid_map[userid]
    age = age_list[idx]
    gender = 1 if gender_list[idx] == 'Female' else 0
    text_seq = pad_data[idx, :]
    x = np.array([age, gender])
    return x, text_seq
u, u_text = get_user_feature_fn(8526)
print u.shape, u_text.shape
print u, u_text


Using TensorFlow backend.


Index([u'userid', u'username', u'age', u'gender', u'location', u'city',
       u'state', u'title', u'about_me', u'reasons', u'inspirations',
       u'friends_count'],
      dtype='object')
Starting read texts.
6990 567.4152607361963 737.8887977118112
Found 13852 unique tokens.
Preparing embedding matrix.
(2,) (2000,)
[-0.53151114  1.        ] [  4 372  19 ...   0   0   0]


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.init import xavier_normal_
# construct neuron network

def scaled_dot_attention(Q, K, V, mask):
    assert Q.size()[-1] == K.size()[-1]
    assert len(Q.size()) == 3 and len(K.size()) == 3 and len(V.size()) == 3
    dk = torch.tensor(K.size()[-1], dtype=torch.float32, requires_grad=False).cuda()
    out = torch.matmul(Q,K.permute(0,2,1)) / torch.sqrt(dk) 
    if mask is not None:
        out.masked_fill_(mask, -float('inf'))
    return torch.matmul(F.softmax(out, dim=-1), V)

def positional_encoding(d_model, pos):
    assert d_model % 2 == 0
    pos = torch.tensor(pos, dtype=torch.float32, requires_grad=False)
    pe = torch.zeros([1,d_model], dtype=torch.float32, requires_grad=False)
    for i in range(D_MODEL//2):
        a = torch.tensor(10000, dtype=torch.float32, requires_grad=False)
        b = torch.tensor(2.*i/float(D_MODEL), dtype=torch.float32, requires_grad=False)
        c = pos / torch.pow(a, b)
        pe[0, 2*i] = torch.sin(c)
        pe[0, 2*i+1] = torch.cos(c)
    return pe
                            
class Transformer_v3(nn.Module):

    def __init__(self, layer_num, dk, dv, dm, h, p_drop, d_ff, use_mask, use_cuda=True, posi_cache_length=200):
        super(Transformer_v3, self).__init__()
#         for construct cache positional encoding matrix.
        self.d_model = dm
        self.use_cuda = use_cuda
        
        self.decoder = Stack_Decoder(layer_num, dk, dv, dm, h, p_drop, d_ff, use_mask)
        self.emb_drop = nn.Dropout(p_drop)
        self.init_pos_mat(posi_cache_length)

    def forward(self, Q):
    
        
#         decoder
        batch, Q_len, d = Q.size()
        
        try:
            Q = Q + self.get_pos_mat(Q_len)
        except RuntimeError, e:
            if e.message == 'TensorIterator expected type torch.cuda.FloatTensor but got torch.FloatTensor':
                if Q.is_cuda != self.get_pos_mat(K_len).is_cuda:
                    print('Make sure cache positional matrix is same type of tensor with input, both cuda tensor or not.\nBy setting argument use_cuda=True to set cache positional encoding matrix as a cuda tensor.')
            raise
        
        Q = self.emb_drop(Q)
        
        de_out = self.decoder(Q)
        return de_out
    
#     To speed up the positional encoding by construct an cache matrix. 
    def init_pos_mat(self, cache_length):
        print('init postional matrix with length : %d ' % cache_length)
        self.positional_matrix = torch.cat([positional_encoding(self.d_model, i) for i in range(0,cache_length)], dim=0)
        self.positional_matrix.requires_grad = False
        if self.use_cuda:
            self.positional_matrix = self.positional_matrix.cuda()
            
        
    def get_pos_mat(self, length):
        if length > self.positional_matrix.shape[0]:
            print('input sequence length reach positional matrix maximum length. %d ' % length)
            ret = torch.cat([positional_encoding(self.d_model, i) for i in range(length)], dim=0)
            ret.requires_grad = False
            print('Increase positional matrix maximum length. %d ' % length)
            self.positional_matrix = ret
            if self.use_cuda:
                self.positional_matrix = self.positional_matrix.cuda()
            return ret
        else:
            return self.positional_matrix[:length]
        

    
    

class Stack_Decoder(nn.Module):
    """
    Stacked Decoder
    """
    def __init__(self, layer_num, dk, dv, dm, h, p_drop, d_ff, use_mask):
        super(Stack_Decoder, self).__init__()
        self.decoders = nn.ModuleList([Decoder(dk, dv, dm, h, p_drop, d_ff, use_mask) for i in range(layer_num)])
        
        
    def forward(self, Q):
        # ModuleList can act as an iterable, or be indexed using ints
        for lay in self.decoders:
            Q = lay(Q)
        return Q           

class Decoder(nn.Module):
    def __init__(self, dk, dv, dm, h, p_drop, d_ff, use_mask):
        super(Decoder, self).__init__()
        self.use_mask = use_mask
        
#         query attention residual block
        self.Q_attention_lay = nn.Linear(dm, dm)
        self.Q_att_drop = nn.Dropout(p_drop)

#         feed forward residual block
        self.fcn = nn.Linear(dm, dm)
        self.linear_drop = nn.Dropout(p_drop)
        

    def forward(self, Q):
        if self.use_mask:
            batch, Q_len, d = Q.size()
            mask = self.mask_matrix(batch, Q_len)
        else:
            mask = None
#         query attention
        Q_attention_out = self.Q_attention_lay(scaled_dot_attention(Q, Q, Q, mask=mask))
        Q_att_out = self.Q_att_drop(Q_attention_out)
        
#         feed forward
        linear_out = self.fcn(Q_att_out)
        return linear_out
    def mask_matrix(self, batch, Q_len):
#         ByteTensor
        mask = torch.zeros([1, Q_len, Q_len], dtype=torch.uint8, requires_grad=False)
        for i in range(Q_len):
            mask[0,i,i+1:] = 1
        return mask.repeat(batch,1, 1).cuda()


    
# Transformer paper baseline hyper-parameters
STACKED_NUM = 1
H = 4
D_MODEL = 128
DK = DV = D_MODEL//H
P_DROP = 0.05
D_FF = D_MODEL*4




    
# bat = 3
# Q = torch.rand([bat, 13, D_MODEL]).cuda()
# model = Transformer_v2(STACKED_NUM, DK, DV, D_MODEL, H, P_DROP, D_FF, use_mask=True, use_cuda=True).cuda()
# o = model(Q)
# print(o.size())

# Q = torch.rand([bat, 47, D_MODEL]).cuda()
# o = model(Q)
# print(o.size())
# # # print o
# def count_parameters(model):
#     return sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(count_parameters(model))

import torch
import torch.nn as nn
import torch.nn.functional as F### Transformer with ALS embedding Training
# import Transformer/

import numpy as np
from constants import FOOD_NUM, USER_NUM
class Net(nn.Module):

    def __init__(self, dm, p_drop, emb_mat):
        super(Net, self).__init__()
        self.drop = nn.Dropout(p_drop)
        self.food_emb = Food_embedding(FOOD_NUM, dm, 1, p_drop)
        self.user_emb = User_embedding(dm, 3, emb_mat, p_drop, activation_fn=F.relu)
        self.transformer = Transformer_v3(STACKED_NUM, DK, DV, D_MODEL, H, P_DROP, D_FF, use_mask=True, use_cuda=True).cuda()

        self.output_linear = nn.Linear(2*dm, FOOD_NUM)

    def forward(self, history, u, u_text):
#         print(K.size(), get_pos_mat(MAX_SEQUENCE_LENGTH).size())
        
        x = self.food_emb(history)
        batch, x_len, d = x.size()
        
        x = self.transformer(x)
#         x = torch.sigmoid(x)
        u_out = self.user_emb(u, u_text).unsqueeze(1).repeat(1, x_len, 1)
        x = self.output_linear(torch.cat([x, u_out], dim=-1))
        
        return x
        
class Food_embedding(nn.Module):
    def __init__(self, c_in, dm, layer_num, p_drop, activation_fn=F.selu):
        super(Food_embedding, self).__init__()
        self.activation_fn = activation_fn
        self.drop = nn.Dropout(p_drop)
        assert layer_num >= 1
        self.first_linear = nn.Linear(c_in, dm)
        self.linears = nn.ModuleList([nn.Linear(dm, dm) for i in range(layer_num-1)])
        

    def forward(self, x):
#         print(K.size(), get_pos_mat(MAX_SEQUENCE_LENGTH).size())
        x = self.first_linear(x)
        for lay in self.linears:
            x = self.activation_fn(lay(x))
            if lay != self.linears[-1]:
                x = self.drop(x)
        return x

class User_embedding(nn.Module):
    def __init__(self, dm, layer_num, emb_mat, p_drop, activation_fn=F.selu):
        super(User_embedding, self).__init__()
        self.activation_fn = activation_fn
        self.drop = nn.Dropout(p_drop)
        assert layer_num >= 1
        self.emb = nn.Embedding(emb_mat.shape[0], emb_mat.shape[1], padding_idx=0)
        self.emb.weight = nn.Parameter(torch.FloatTensor(emb_mat))
        self.emb.weight.requires_grad_(False)
        self.emb_linear = nn.Linear(emb_mat.shape[1], dm)
        self.att_weight = nn.Parameter(torch.zeros([1, dm-2**(layer_num+1), ], dtype=torch.float))
        torch.nn.init.xavier_normal_(self.att_weight)
        self.linears = nn.ModuleList([nn.Linear(2**(i+1), 2**(i+2)) for i in range(layer_num)])
        

    def forward(self, u, u_text):
        for lay in self.linears:
            u = self.activation_fn(lay(u))
#         for text
        u_text = self.emb(u_text)
        u_text = self.activation_fn(self.emb_linear(u_text))
        batch, seq, d = u_text.size()
        att_w = self.att_weight.view(1,1, -1).repeat(batch, 1, 1)
        Q = torch.cat([u.unsqueeze(1),att_w], dim=-1)
        u_att = scaled_dot_attention(Q, u_text, u_text, mask=None)
        u_att.squeeze_(1)
        
        return u_att
    



In [7]:
import torch
from constants import MAX_SEQ_LEN

model = torch.load('./best-hybrid.pt')
model = model.cuda()
model.eval()
rev_food_map = {v:k for k,v in food_map.items()}
with torch.no_grad():
    with open('predict.csv', 'w') as f_out:
        f_out.write('userid,foodid\n')

        with tqdm(total=len(u_map)) as pbar:
            for user in u_map.keys():
                x = np.zeros([MAX_SEQ_LEN, len(food_map)])
                history = u_map[user]
                ds = np.array([d for d,f in history])
                fs = np.array([f for d,f in history])
                sorted_idx = np.argsort(ds)
                ds = ds[sorted_idx]
                fd = fs[sorted_idx]

                date_idx = 0
                now_date = ds[0]
                for food, date in zip(fs,ds):
                    if date != now_date:
                        date_idx+=1
                        now_date = date
                    x[date_idx, food_map[food]] = 1
                x = torch.FloatTensor(x).unsqueeze(0).cuda()
#                 out = torch.sigmoid(model(x))
                u, u_text = get_user_feature_fn(user)
                u = torch.FloatTensor(u).unsqueeze(0).cuda()
                u_text = torch.LongTensor(u_text).unsqueeze(0).cuda()
                out = model(x, u, u_text)

                arr = out[0,date_idx,:].flatten()
                k20 = reversed(torch.argsort(arr)[-20:])
                s = ''
                for food_idx in k20:
                    s += ' %d' % rev_food_map[food_idx.item()]
                f_out.write('%d,%s\n' % (user, s) )

                pbar.update(1)
t = 0.5
buf = out[0,date_idx,:]>t
print 'last time' ,torch.sum(buf).item()

buf = out[0,date_idx-1,:]>t
buf2 = x[0,date_idx-1,:]>t
print 'pre_time',torch.sum(buf2).item(), torch.sum(buf).item()
print 'done'

    
            


100%|██████████| 2608/2608 [00:19<00:00, 131.09it/s]

last time 0
pre_time 4 0
done





### Find best threshold

In [3]:
import torch
from constants import MAX_SEQ_LEN

model = torch.load('./best.pt')
model = model.cuda()
model.eval()
outputs = []
labels = []
rev_food_map = {v:k for k,v in food_map.items()}
with torch.no_grad():
    with tqdm(total=len(u_map)) as pbar:
        for user in u_map.keys():
            x = np.zeros([MAX_SEQ_LEN, len(food_map)])
            history = u_map[user]
            ds = np.array([d for d,f in history])
            fs = np.array([f for d,f in history])
            sorted_idx = np.argsort(ds)
            ds = ds[sorted_idx]
            fd = fs[sorted_idx]

            date_idx = 0
            now_date = ds[0]
            for food, date in zip(fs,ds):
                if date != now_date:
                    date_idx+=1
                    now_date = date
                x[date_idx, food_map[food]] = 1
            x = torch.FloatTensor(x).unsqueeze(0).cuda()
            out = torch.sigmoid(model(x))
            period = 10
            outputs.append(out[0,date_idx-period:date_idx,:].cpu().numpy())
            labels.append(x[0,date_idx-period+1:date_idx+1,:].cpu().numpy())
#             outputs.append(out[0,date_idx-period:date_idx,:])
#             labels.append(x[0,date_idx-period+1:date_idx+1,:])
            pbar.update(1)
print 'done'
outputs = np.vstack(outputs)
labels = np.vstack(labels)
# 
outputs = torch.FloatTensor(np.vstack(outputs)).cuda()
labels = torch.FloatTensor(np.vstack(labels)).cuda()

# 
best_threshold =[]
accs = []
with tqdm(total=len(food_map)) as pbar:
    for i in range(len(food_map)):
        o = outputs[:,i]
        l = labels[:,i].type(torch.uint8)
        best_acc = 0
        best_t = 0
        for t in np.linspace(0,1, 100):
#         for t in set(o):
            a = o > float(t)
            acc = torch.sum(a == l).item() / float(outputs.shape[0])
            if acc > best_acc:
                best_acc = acc
                best_t = t
        best_threshold.append(best_t)
        accs.append(best_acc)
        pbar.update(1)
best_threshold = torch.FloatTensor(best_threshold)
print best_threshold[:100]
print accs[:100]
    
# for test
import torch
from constants import MAX_SEQ_LEN

tt = best_threshold.cuda()
with torch.no_grad():
    for user in u_map.keys():
        x = np.zeros([MAX_SEQ_LEN, len(food_map)])
        history = u_map[user]
        ds = np.array([d for d,f in history])
        fs = np.array([f for d,f in history])
        sorted_idx = np.argsort(ds)
        ds = ds[sorted_idx]
        fd = fs[sorted_idx]

        date_idx = 0
        now_date = ds[0]
        for food, date in zip(fs,ds):
            if date != now_date:
                date_idx+=1
                now_date = date
            x[date_idx, food_map[food]] = 1
        x = torch.FloatTensor(x).unsqueeze(0).cuda()
        out = torch.sigmoid(model(x))
        
        for i in range(date_idx-10, date_idx+1, 1):
            a = out[0,i,:]
            b = x[0,i+1,:]
            print torch.sum(a>tt), torch.sum(a>0.5), torch.sum(b)
        break

            



100%|██████████| 2608/2608 [00:23<00:00, 109.33it/s]


done


100%|██████████| 5532/5532 [00:30<00:00, 179.22it/s]

tensor([0.7071, 0.5657, 0.5051, 0.5253, 0.5455, 0.5556, 0.2323, 0.3535, 0.5960,
        0.3939, 0.5556, 0.5051, 0.4747, 0.8081, 0.5455, 0.4141, 0.7475, 0.3939,
        0.5051, 0.5152, 0.3131, 0.3131, 0.5657, 0.3333, 0.4141, 0.4949, 0.0606,
        0.7172, 0.5455, 0.3131, 0.2222, 0.7273, 0.4747, 0.5960, 0.5051, 0.3535,
        0.3030, 0.2626, 0.4848, 0.5051, 0.1212, 0.4343, 0.4747, 0.3333, 0.5859,
        0.4747, 0.2424, 0.4949, 0.5152, 0.1212, 0.1212, 0.2525, 0.2626, 0.5758,
        0.6869, 0.2222, 0.5152, 0.1818, 0.3535, 0.3333, 0.7071, 0.5253, 0.5152,
        0.7980, 0.2828, 0.5253, 0.0606, 0.4848, 0.5657, 0.5657, 0.1919, 0.1414,
        0.3131, 0.6566, 0.2626, 0.3939, 0.3535, 0.2323, 0.2424, 0.2525, 0.4848,
        0.4949, 0.6162, 0.4141, 0.7677, 0.1313, 0.3838, 0.4141, 0.4545, 0.4646,
        0.4747, 0.3333, 0.0808, 0.5354, 0.3737, 0.3333, 0.4141, 0.4646, 0.6061,
        0.5354])
[0.9828220858895705, 0.9512653374233129, 0.9477760736196319, 0.9883819018404908, 0.9695935582822086, 0.


