## loading data

In [1]:
import pickle
import numpy as np
from os.path import join
dir_path = './sdml_final_1'
input_list  = ['prep_final_test_win_feature.pickle',
               'prep_final_train_bid_data.pickle',
               'prep_final_train_win_data.pickle']
with open(join(dir_path, 'prep_final_train_win_data.pickle'), 'rb') as f:
    win_dic = pickle.load(f)
with open(join(dir_path, 'prep_final_train_bid_data.pickle'), 'rb') as f:
    bid_dic = pickle.load(f)
    

    


## data preprocessing

In [2]:
def trim_max(dic):
    price = dic['price']
#     threshold = np.percentile(price, 80)
    threshold = 800
#     print threshold
    valid_idx = np.array([i for i in range(price.shape[0]) if price[i, 0] < threshold])
    ret = {k: v[valid_idx, :] for k, v in dic.items()}
    return ret

win_dic = trim_max(win_dic)
bid_dic = trim_max(bid_dic)
print 'trimming done'

trimming done


## model

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Regression_model(nn.Module):
    def __init__(self, app_dim, device_dim, imp_dim, imp_posi_dim, dm1, dm2, drop, depth):
        super(Regression_model, self).__init__()
        self.app = nn.Embedding(app_dim, dm1)
        self.device = nn.Embedding(device_dim, dm1)
        self.imp = nn.Embedding(imp_dim, dm1)
        self.imp_posi = nn.Embedding(imp_posi_dim, dm1)
        self.weekdays = nn.Embedding(7, dm1)
        self.hours = nn.Embedding(24, dm1)
        self.linear = nn.Linear(6*dm1 + 2, dm2)
        self.emb_linears = Linear_model(dm2, drop, depth, False)
        self.price_linears = Linear_model(dm2, drop, depth, True)
        self.sigma_linears = Linear_model(dm2, drop, depth, True)
        self.drop = nn.Dropout(drop)
        

    def forward(self, width, height, app, device, imp, imp_posi, weekdays, hours):
        app = self.app(app).squeeze(1)
        device = self.device(device).squeeze(1)
        imp = self.imp(imp).squeeze(1)
        imp_posi = self.imp_posi(imp_posi).squeeze(1)
        weekdays = self.weekdays(weekdays).squeeze(1)
        hours = self.hours(hours).squeeze(1)
        emb = torch.cat([width, height, app, device, imp, imp_posi, weekdays, hours], dim=-1)
        x = self.drop(emb)
        x = self.linear(x)
        x = self.emb_linears(x)
        price = self.price_linears(x)
        sigma = self.sigma_linears(x)
        sigma = F.relu(sigma)
        sigma = torch.clamp(sigma, 0.1, 50)
        return price, sigma, emb
    
class Linear_model(nn.Module):
    def __init__(self, dm, drop, depth, with_out):
        super(Linear_model, self).__init__()
        self.linears = nn.ModuleList()
        self.norms = nn.ModuleList()
        self.drop = nn.Dropout(drop)
        self.with_out = with_out
        for i in range(depth):
            linear = nn.Linear(dm, dm)
            self.linears.append(linear)
            self.norms.append(nn.BatchNorm1d(dm))
        if with_out:
            self.out = nn.Linear(dm, 1)
        

    def forward(self, x):
        for norm, linear in zip(self.norms, self.linears):
            x = F.relu(linear(x)) + x
            x = norm(x)
            x = self.drop(x)
        if self.with_out:
            x = self.out(x)
        return x
        
        


## Data Generator

In [4]:
def split_dic(rate, dic):
    N = len(dic['price'])
    
    for k in dic.keys():
        assert dic[k].shape[0] == N
#         print 'categorical %s, dim size : %d' % (k, np.max(dic[k]+1))
    idx = np.random.permutation(N)
    c = int(N*rate)
    print 'split to %d : %d' % (c, N-c)
    train_idx, val_idx = idx[c:], idx[:c]
    train_dic = {k: dic[k][train_idx, :] for k in dic.keys()}
    val_dic = {k: dic[k][val_idx, :] for k in dic.keys()}
    return train_dic, val_dic

def permutation_generator(N):
    while True:
        rand_idx = np.random.permutation(N)
        for i in rand_idx:
            yield i
            
def batch_boostrap_generator(dic, batch):
    N = len(dic['price'])
    G = permutation_generator(N)
    while True:
        idx = np.array([next(G) for i in range(batch)])
        app = dic['app_type'][idx, :]
        device = dic['device_type'][idx, :]
        imp = dic['imp_type'][idx, :]
        imp_posi = dic['imp_position'][idx, :]
        weekdays = dic['weekdays'][idx, :]
        hours = dic['hours'][idx, :]
        width = dic['width'][idx, :]
        height = dic['height'][idx, :]
        price = dic['price'][idx, :]
        yield (width, height, app, device, imp, imp_posi, weekdays, hours, price)


## Training

In [7]:
import numpy as np
from itertools import count
from tqdm import tqdm
from collections import deque

def win_loss(x, y, sigma, distribution):
    if distribution is torch.distributions.gumbel.Gumbel:
        z = (y-x) / sigma
        loss = z+torch.clamp(torch.exp(-z), 10**-8, 10**8) + torch.log(sigma)
#         print 'los', loss[0,0], z[0, 0], torch.clamp(torch.exp(-z), 10**-8, 10**8)[0, 0], torch.log(sigma)[0,0], x[0, 0]
        return loss
    else:
        d = distribution(0, sigma)
        loss = -d.log_prob(y-x)    
        return loss
def bid_loss(x, y, sigma, distribution):
    
    d = distribution(0, sigma)
    z = 1-d.cdf(y-x)
    z = torch.clamp(z, 10**-8, 1.)
    loss = -torch.log(z)
#     print 'bid', loss[0,0]
    return loss
#     return 0
def cuda_data(data):
    data = list(data)
    for i, v in enumerate(data):
        if v.dtype == np.int32:
            data[i] = torch.LongTensor(v)
        elif v.dtype == np.float32:
            data[i] = torch.FloatTensor(v)
        if use_cuda:
            data[i] = data[i].cuda()
    return data

def calc_train_loss(model, data, is_win, distribution, use_cuda, emb_loss_criterion):
    if use_cuda:
        data = cuda_data(data)
    
    width, height, app, device, imp, imp_posi, weekdays, hours, price = data
    model.train()
    out, sigma, emb = model(width, height, app, device, imp, imp_posi, weekdays, hours)
    if is_win:
        loss = win_loss(out, price, sigma, distribution)
        l1_loss = torch.pow(out - price, 2)
    else:
        loss = bid_loss(out, price, sigma, distribution)
        l1_loss = 0
    emb_loss = emb_loss_criterion(emb, torch.zeros_like(emb))
#     loss = loss + 0.001*emb_loss
    return loss, l1_loss

def calc_val_loss(model, data, is_win, distribution, use_cuda):
    if use_cuda:
        data = cuda_data(data)
    width, height, app, device, imp, imp_posi, weekdays, hours, price = data
    model.eval()
    with torch.no_grad():
        out, sigma, emb = model(width, height, app, device, imp, imp_posi, weekdays, hours)
        if is_win:
            loss = win_loss(out, price, sigma, distribution)
            l1_loss = torch.pow(out - price, 2)
        else:
            loss = bid_loss(out, price, sigma, distribution)
            l1_loss = 0
    return loss, l1_loss

def train(model, opt, train_g, val_g, distribution, train_price, use_cuda, emb_loss_criterion, alpha):
    price_opt, sigma_opt = opt
    train_opt = price_opt if train_price else sigma_opt
    win_g, bid_g = train_g
    win_data, bid_data = next(win_g), next(bid_g)
    win_loss, l1_loss = calc_train_loss(model, win_data, True, distribution, use_cuda, emb_loss_criterion)
    bid_loss, _ = calc_train_loss(model, bid_data, False, distribution, use_cuda, emb_loss_criterion)
    loss = torch.mean(win_loss + alpha*bid_loss)
    l1 = torch.sqrt(torch.mean(l1_loss))
    train_opt.zero_grad()
    loss.backward()
    train_opt.step()
#     validation
    with torch.no_grad():
        model.eval()
        win_g, bid_g = val_g
        win_loss, l1_loss = calc_val_loss(model, next(win_g), True, distribution, use_cuda)
        bid_loss, _ = calc_val_loss(model, next(bid_g), False, distribution, use_cuda)
        val_loss = torch.mean(win_loss + alpha*bid_loss)
        val_l1 = torch.sqrt(torch.mean(l1_loss))
    return loss, val_loss, l1, val_l1
    
batch_size = 4096
train_win_dic, val_win_dic = split_dic(0.1, win_dic)
train_bid_dic, val_bid_dic = split_dic(0.1, bid_dic)

win_g = batch_boostrap_generator(train_win_dic, batch_size)
bid_g = batch_boostrap_generator(train_bid_dic, batch_size)
train_g = (win_g, bid_g)

win_g = batch_boostrap_generator(val_win_dic, batch_size)
bid_g = batch_boostrap_generator(val_bid_dic, batch_size)
val_g = (win_g, bid_g)


dm1 = 64
dm2 = 512
drop = 0.15
model = Regression_model(3, 5, 3, 10, dm1, dm2, drop, depth=5)
alpha = 0.7
price_params = []
sigma_params = []
for c in model.children():
    if c != model.price_linears:
        for p in c.parameters():
            sigma_params.append(p)
    if c != model.sigma_linears:
        for p in c.parameters():
            price_params.append(p)
# price_opt = torch.optim.Adadelta(price_params)
# sigma_opt = torch.optim.Adadelta(sigma_params)
price_opt = torch.optim.Adam(price_params)
sigma_opt = torch.optim.Adam(sigma_params)
opt = price_opt, sigma_opt

distribution = torch.distributions.normal.Normal
# distribution = torch.distributions.gumbel.Gumbel
emb_loss_criterion = nn.MSELoss(reduction='none')
train_q = deque(maxlen=100)
val_q = deque(maxlen=100)
l1_q = deque(maxlen=100)
val_l1_q = deque(maxlen=100)
# train_q = deque(maxlen=10)
# val_q = deque(maxlen=10)
c = 0

use_cuda = torch.cuda.is_available()
if use_cuda:
    model = model.cuda()
    
# 


split to 1507740 : 13569660
split to 1240883 : 11167948


## Cencored  regression

In [None]:
def dump_log(model, n_iter, loss, val_loss, log_file_stream, tmp_model_path):
    log_text = '%.7d<split>%.5f<split>%.5f\n' % (n_iter, loss, val_loss)
    log_file_stream.write(log_text)
    if n_iter % 10 == 0 :
        log_file_stream.flush()
        torch.save(model, tmp_model_path)



N = len(train_win_dic['price'])
train_num = 1
a = 2*train_num*batch_size
ran = N // a + 1 if N % a != 0 else N // a
pre_loss = 0
pre_train = 0 
model.train()
it = 0
with open('log-20.txt', 'w') as log_stream:
    for epoch in count():
        print 'epoch start : %d' % epoch
        with tqdm(total=ran) as pbar:
            for i in range(ran):
                loss_list = []
                val_loss_list = [] 
                l1_list = []
                val_l1_list = []
    #             price phase
                for i in range(train_num):
                    loss, val_loss, l1, val_l1 = train(model, opt, train_g, val_g, distribution,
                                           train_price=True, use_cuda=use_cuda, 
                                           emb_loss_criterion=emb_loss_criterion, alpha=alpha)
                    loss_list.append(loss.item())
                    val_loss_list.append(val_loss.item())
                    l1_list.append(l1.item())
                    val_l1_list.append(val_l1.item())

    #             sigma phase
                for i in range(train_num):
                    loss, val_loss, l1, val_l1 = train(model, opt, train_g, val_g, distribution,
                                           train_price=False, use_cuda=use_cuda,
                                           emb_loss_criterion=emb_loss_criterion, alpha=alpha)
                    loss_list.append(loss.item())
                    val_loss_list.append(val_loss.item())
                loss = np.mean(loss_list)
                val_loss = np.mean(val_loss_list)
                l1 = np.mean(l1_list)
                val_l1 = np.mean(val_l1_list)

                train_q.append(loss)
                val_q.append(val_loss)
                l1_q.append(l1)
                val_l1_q.append(val_l1)
                loss = np.mean(train_q)
                val_loss = np.mean(val_q)
                l1 = np.mean(l1_q)
                val_l1 = np.mean(val_l1_q)
                pbar.set_postfix_str('loss : %.5f, val loss : %.5f, l1 : %.3f, val_l1 : %.3f' % (loss, val_loss, l1, val_l1))

                pbar.update(1)
                if i % 10 == 0:
                    if val_loss > pre_loss and loss < pre_train:
                        c += 1
                    else:
                        c = 0
                    pre_loss = val_loss
                    pre_train = loss
#                     if c > 5 and val_loss < 50:
#                         c = 0
#                         print 'end of training'
#                         torch.save(model, './best.tar')
#                         weffwe
    #             log
                it += 1
                dump_log(model, it, loss, val_loss, log_stream, 'tmp.tar')
            
        torch.save(model, './models/%d.tar' % epoch)

  0%|          | 0/1657 [00:00<?, ?it/s]

epoch start : 0


100%|██████████| 1657/1657 [04:56<00:00,  3.24it/s, loss : 21166.41471, val loss : 23678.63989, l1 : 278.860, val_l1 : 295.776]  
  0%|          | 1/1657 [00:00<04:46,  5.78it/s, loss : 21159.30096, val loss : 23663.48043, l1 : 278.888, val_l1 : 295.805]

epoch start : 1


100%|██████████| 1657/1657 [04:56<00:00,  3.70it/s, loss : 365.23864, val loss : 368.16964, l1 : 269.966, val_l1 : 285.322]   
  0%|          | 1/1657 [00:00<04:48,  5.74it/s, loss : 365.02170, val loss : 368.07203, l1 : 269.945, val_l1 : 285.367]

epoch start : 2


100%|██████████| 1657/1657 [04:56<00:00,  3.65it/s, loss : 198.38904, val loss : 210.75456, l1 : 264.078, val_l1 : 279.118]
  0%|          | 1/1657 [00:00<04:49,  5.72it/s, loss : 198.44447, val loss : 210.68101, l1 : 264.131, val_l1 : 279.082]

epoch start : 3


100%|██████████| 1657/1657 [04:56<00:00,  4.14it/s, loss : 116.73694, val loss : 120.25410, l1 : 259.240, val_l1 : 270.162]
  0%|          | 1/1657 [00:00<04:49,  5.71it/s, loss : 116.68350, val loss : 120.25390, l1 : 259.200, val_l1 : 270.166]

epoch start : 4


100%|██████████| 1657/1657 [04:56<00:00,  4.13it/s, loss : 70.23177, val loss : 72.81069, l1 : 251.684, val_l1 : 261.515] 
  0%|          | 1/1657 [00:00<04:48,  5.73it/s, loss : 70.21502, val loss : 72.77745, l1 : 251.673, val_l1 : 261.421]

epoch start : 5


100%|██████████| 1657/1657 [04:56<00:00,  4.42it/s, loss : 42.67036, val loss : 43.93526, l1 : 241.188, val_l1 : 250.007]
  0%|          | 1/1657 [00:00<04:50,  5.71it/s, loss : 42.65421, val loss : 43.92161, l1 : 241.149, val_l1 : 249.972]

epoch start : 6


100%|██████████| 1657/1657 [04:56<00:00,  4.51it/s, loss : 29.08992, val loss : 28.52421, l1 : 225.631, val_l1 : 233.463]
  0%|          | 1/1657 [00:00<04:49,  5.71it/s, loss : 29.08867, val loss : 28.52521, l1 : 225.670, val_l1 : 233.565]

epoch start : 7


100%|██████████| 1657/1657 [04:57<00:00,  4.78it/s, loss : 19.90421, val loss : 19.92388, l1 : 191.576, val_l1 : 199.241]
  0%|          | 1/1657 [00:00<04:48,  5.73it/s, loss : 19.90294, val loss : 19.92016, l1 : 191.607, val_l1 : 199.164]

epoch start : 8


100%|██████████| 1657/1657 [04:57<00:00,  4.74it/s, loss : 14.83131, val loss : 15.13818, l1 : 155.208, val_l1 : 160.083]
  0%|          | 1/1657 [00:00<04:49,  5.73it/s, loss : 14.82745, val loss : 15.13190, l1 : 155.216, val_l1 : 160.038]

epoch start : 9


100%|██████████| 1657/1657 [04:56<00:00,  5.06it/s, loss : 12.76650, val loss : 12.57868, l1 : 153.166, val_l1 : 150.949]
  0%|          | 1/1657 [00:00<04:51,  5.69it/s, loss : 12.76509, val loss : 12.58160, l1 : 153.149, val_l1 : 150.950]

epoch start : 10


100%|██████████| 1657/1657 [04:55<00:00,  5.17it/s, loss : 12.65594, val loss : 12.56150, l1 : 152.988, val_l1 : 149.972]
  0%|          | 1/1657 [00:00<04:49,  5.72it/s, loss : 12.65614, val loss : 12.56229, l1 : 153.027, val_l1 : 149.965]

epoch start : 11


100%|██████████| 1657/1657 [04:56<00:00,  5.15it/s, loss : 12.33277, val loss : 12.64894, l1 : 152.459, val_l1 : 153.491]
  0%|          | 1/1657 [00:00<04:49,  5.72it/s, loss : 12.33363, val loss : 12.64812, l1 : 152.474, val_l1 : 153.473]

epoch start : 12


 56%|█████▌    | 930/1657 [02:46<02:09,  5.61it/s, loss : 12.23636, val loss : 12.64845, l1 : 151.670, val_l1 : 153.892]

In [10]:
# torch.save(model, './gumbel_100_trim.tar')
# torch.save(model, './best_bak.tar')


In [6]:
model = torch.load('./tmp.tar')

## Testing

In [7]:
import numpy as np
from itertools import count
from tqdm import tqdm
from collections import deque

def test_generator(dic, batch):
    rand_idx = np.arange(len(dic['price']))
    N = len(dic['price'])
    ran = N // batch_size + 1 if N % batch_size != 0 else N // batch_size
    for i in range(ran):
        idx = rand_idx[i*batch: (i+1)*batch] if (i+1)*batch < len(rand_idx) \
        else rand_idx[i*batch:]
        id = dic['id'][idx, :]
        app = dic['app_type'][idx, :]
        device = dic['device_type'][idx, :]
        imp = dic['imp_type'][idx, :]
        imp_posi = dic['imp_position'][idx, :]
        weekdays = dic['weekdays'][idx, :]
        hours = dic['hours'][idx, :]
        width = dic['width'][idx, :]
        height = dic['height'][idx, :]
        price = dic['price'][idx, :]
        yield (id, width, height, app, device, imp, imp_posi, weekdays, hours, price)
        
def test(model, data, use_cuda):
    data = list(data)
    for i, v in enumerate(data):
        if v.dtype == np.int32:
            data[i] = torch.LongTensor(v)
        elif v.dtype == np.float32:
            data[i] = torch.FloatTensor(v)
        if use_cuda:
            data[i] = data[i].cuda()
        
    id, width, height, app, device, imp, imp_posi, weekdays, hours, price = data
    out, sigma, emb = model(width, height, app, device, imp, imp_posi, weekdays, hours)
    return id, out



dir_path = './sdml_final_1'
with open(join(dir_path, 'prep_final_test_win_feature.pickle'), 'rb') as f:
    test_dic = pickle.load(f)
batch_size = 8192
N = len(test_dic['price'])
ran = N // batch_size + 1 if N % batch_size != 0 else N // batch_size
    
test_g = test_generator(test_dic, batch_size)
use_cuda = True
model.eval()
with open('./submission.csv', 'w') as f_out:
    f_out.write('id,win_price\n')
    with tqdm(total=ran) as pbar:
        with torch.no_grad():
            for i in range(ran):
                data = next(test_g)
                id, out = test(model, data, use_cuda)
                for j in range(out.shape[0]):
                    v = out[j,0].item()
                    f_out.write('%d,%f\n' % (id[j, 0].item(), v))
                pbar.update(1)
print 'done'

100%|██████████| 1233/1233 [04:54<00:00,  4.19it/s]

done





## Comined Testing

In [None]:
import numpy as np
from itertools import count
from tqdm import tqdm
from collections import deque

def test_generator(dic, batch):
    rand_idx = np.arange(len(dic['price']))
    N = len(dic['price'])
    ran = N // batch_size + 1 if N % batch_size != 0 else N // batch_size
    for i in range(ran):
        idx = rand_idx[i*batch: (i+1)*batch] if (i+1)*batch < len(rand_idx) \
        else rand_idx[i*batch:]
        id = dic['id'][idx, :]
        app = dic['app_type'][idx, :]
        device = dic['device_type'][idx, :]
        imp = dic['imp_type'][idx, :]
        imp_posi = dic['imp_position'][idx, :]
        weekdays = dic['weekdays'][idx, :]
        hours = dic['hours'][idx, :]
        width = dic['width'][idx, :]
        height = dic['height'][idx, :]
        price = dic['price'][idx, :]
        yield (id, width, height, app, device, imp, imp_posi, weekdays, hours, price)
        
def combined_test(m1, m2, data, use_cuda):
    data = list(data)
    for i, v in enumerate(data):
        if v.dtype == np.int32:
            data[i] = torch.LongTensor(v)
        elif v.dtype == np.float32:
            data[i] = torch.FloatTensor(v)
        if use_cuda:
            data[i] = data[i].cuda()
        
    id, width, height, app, device, imp, imp_posi, weekdays, hours, price = data
    out1, sigma = m1(width, height, app, device, imp, imp_posi, weekdays, hours)
    out2, sigma = m2(width, height, app, device, imp, imp_posi, weekdays, hours)
    out = out1*0.5 + out2*0.5
    return id, out



dir_path = './sdml_final_1'
with open(join(dir_path, 'prep_final_test_win_feature.pickle'), 'rb') as f:
    test_dic = pickle.load(f)
batch_size = 8192
N = len(test_dic['price'])
ran = N // batch_size + 1 if N % batch_size != 0 else N // batch_size
    
test_g = test_generator(test_dic, batch_size)
use_cuda = True
m1 = torch.load('./best.tar')
m2 = torch.load('./best_win.tar')
m1.eval()
m2.eval()
with open('./submission.csv', 'w') as f_out:
    f_out.write('id,win_price\n')
    with tqdm(total=ran) as pbar:
        with torch.no_grad():
            for i in range(ran):
                data = next(test_g)
                id, out = combined_test(m1, m2, data, use_cuda)
                for j in range(out.shape[0]):
                    v = out[j,0].item()
                    f_out.write('%d,%f\n' % (id[j, 0].item(), v))
                pbar.update(1)
print 'done'

In [None]:
print len(test_dic['price'])
N = len(test_dic['price']) // batch_size
print N
print (N+1)*batch_size
