In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import time, json, datetime 
from tqdm import tqdm

import numpy as np 
import pandas as pd 
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
data = pd.read_pickle('data/ml-1m.pkl')

feat_col = ['userId', 'gender', 'age', 'occupation', 'movieId', 'year']
label_col = 'rating'

train, test = train_test_split(data, test_size=0.3, random_state=20220316)
print('train：', train.shape)
print('test:', test.shape)

user_id_col = 'userId'
user_attr_col = ['gender', 'age', 'occupation']
item_id_col = 'movieId'
item_attr_col = ['year']
label_col = 'rating'

train_dataset = TensorDataset(torch.LongTensor(train[user_id_col].values),
                              torch.LongTensor(train[user_attr_col].values),
                              torch.LongTensor(train[item_id_col].values),
                              torch.LongTensor(train[item_attr_col].values),
                              torch.FloatTensor(train[label_col].values),)
test_dataset = TensorDataset(torch.LongTensor(test[user_id_col].values),
                              torch.LongTensor(test[user_attr_col].values),
                              torch.LongTensor(test[item_id_col].values),
                              torch.LongTensor(test[item_attr_col].values),
                              torch.FloatTensor(test[label_col].values),)

train_loader = DataLoader(train_dataset, batch_size=2048, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4096, shuffle=True)

def print_info(info):
    t0 = datetime.datetime.now().strftime('%H:%M:%S')
    info = '{} : {}'.format(t0, info)
    print(info)

train： (700146, 9)
test: (300063, 9)


In [3]:
data.head()

Unnamed: 0,userId,gender,age,occupation,movieId,rating,title,genres,year
0,0,0,0,10,1104,1,"[1, 176, 1050, 2409, 2410, 0, 0, 0]",[10],76
1,1,1,6,16,1104,1,"[1, 176, 1050, 2409, 2410, 0, 0, 0]",[10],76
2,11,1,2,12,1104,1,"[1, 176, 1050, 2409, 2410, 0, 0, 0]",[10],76
3,14,1,2,7,1104,1,"[1, 176, 1050, 2409, 2410, 0, 0, 0]",[10],76
4,16,1,5,1,1104,1,"[1, 176, 1050, 2409, 2410, 0, 0, 0]",[10],76


In [4]:
def KLD_Gaussian(mu_q, sigma_q, mu_p, sigma_p):
    return torch.log(sigma_p / sigma_q) + (sigma_q**2 + (mu_q - mu_p)**2) / (2 * sigma_p**2)

In [5]:
class VELBase(nn.Module):
    def __init__(self, feature_nuniques_list, emb_size=8):
        super().__init__()
        '''
        feature_nuniques_list: a list of  features' vocabulary size
        for instance, feature_nuniques_list = [2000, [2, 8, 10], 1000, [7, 9]]
        '''
        user_id_size, user_attr_size, item_id_size, item_attr_size = feature_nuniques_list
        self.user_id_emb = nn.Embedding(user_id_size, emb_size) 
        self.user_id_dnn = nn.Sequential(nn.Linear(emb_size, 256), nn.ReLU(),
                                      nn.Linear(256, emb_size*2), nn.Sigmoid())
        
        self.user_attr_emb = nn.ModuleList([nn.Embedding(voc_size, emb_size) 
                                            for voc_size in user_attr_size])
        self.user_attr_dim = emb_size * len(user_attr_size)
        self.user_attr_dnn = nn.Sequential(nn.Linear(self.user_attr_dim, 256), nn.ReLU(),
                                           nn.Linear(256, emb_size*2), nn.Sigmoid())
        
        self.item_id_emb = nn.Embedding(item_id_size, emb_size)
        self.item_id_dnn = nn.Sequential(nn.Linear(emb_size, 256), nn.ReLU(),
                                      nn.Linear(256, emb_size*2), nn.Sigmoid())
        
        self.item_attr_emb = nn.ModuleList([nn.Embedding(voc_size, emb_size) 
                                          for voc_size in item_attr_size])
        self.item_attr_dim = emb_size * len(item_attr_size)
        self.item_attr_dnn = nn.Sequential(nn.Linear(self.item_attr_dim, 256), nn.ReLU(),
                                           nn.Linear(256, emb_size*2), nn.Sigmoid())
        self.feature_dims = emb_size * 2 + self.user_attr_dim + self.item_attr_dim
        
    def forward(self, user_id, user_attr, item_id, item_attr):
        # variational embedding for user id
        user_id_emb_res = self.user_id_emb(user_id)
        user_id_dnn_res = self.user_id_dnn(user_id_emb_res)
        user_id_mu, user_id_sigma = user_id_dnn_res.chunk(2, dim=1)
        user_id_sigma = torch.abs(user_id_sigma)
        # Reparameterize Trick
        user_id_vemb = user_id_mu + user_id_sigma * torch.randn_like(user_id_sigma)
        # embedding for user attribute
        user_attr_emb_res = [emb(user_attr[:, i]) for i, emb in enumerate(self.user_attr_emb)]
        user_attr_emb_concat = torch.cat(user_attr_emb_res, dim=1)
        user_attr_dnn_res = self.user_attr_dnn(user_attr_emb_concat)
        user_attr_mu, user_attr_sigma = user_attr_dnn_res.chunk(2, dim=1)
        user_attr_sigma = torch.abs(user_attr_sigma)
        # variational embedding for item id
        item_id_emb_res = self.item_id_emb(item_id)
        item_id_dnn_res = self.item_id_dnn(item_id_emb_res)
        item_id_mu, item_id_sigma = item_id_dnn_res.chunk(2, dim=1)
        item_id_sigma = torch.abs(item_id_sigma)
        # Reparameterize Trick
        item_id_vemb = item_id_mu + item_id_sigma * torch.randn_like(item_id_sigma)
        # embedding for item attribute
        item_attr_emb_res = [emb(item_attr[:, i]) for i, emb in enumerate(self.item_attr_emb)]
        item_attr_emb_concat = torch.cat(item_attr_emb_res, dim=1)
        item_attr_dnn_res = self.item_attr_dnn(item_attr_emb_concat)
        item_attr_mu, item_attr_sigma = item_attr_dnn_res.chunk(2, dim=1)
        item_attr_sigma = torch.abs(item_attr_sigma)
        # concat all embeddings
        all_embs = torch.cat([user_id_vemb, item_id_vemb, user_attr_emb_concat, item_attr_emb_concat], dim=1)
        # users' KL-divergence
        user_kld = KLD_Gaussian(user_id_mu, user_id_sigma, user_attr_mu, user_attr_sigma)
        # items' KL-divergence
        item_kld = KLD_Gaussian(item_id_mu, item_id_sigma, item_attr_mu, item_attr_sigma)
        # user's prior KL-divergence
        user_prior_kld = KLD_Gaussian(user_attr_mu, user_attr_sigma, 0, 1)
        # items's prior KL-divergence
        item_prior_kld = KLD_Gaussian(item_attr_mu, item_attr_sigma, 0, 1)
        kld = user_kld + item_kld + user_prior_kld + item_prior_kld
        kld = torch.mean(kld)
        return all_embs, kld

In [6]:
class VELDeepFM(nn.Module):
    def __init__(self, feature_nuniques_list, alpha=1, emb_size=8, 
                 hid_dims=[256, 128], num_classes=1, dropout=[0.2, 0.2]):
        super().__init__()
        self.encoder = VELBase(feature_nuniques_list)
        self.all_dims = [self.encoder.feature_dims] + hid_dims
        self.dnn_linear_list = nn.ModuleList()
        for i in range(1, len(self.all_dims)):
            self.dnn_linear_list.append(nn.Sequential(
                nn.Linear(self.all_dims[i-1], self.all_dims[i]),
                nn.BatchNorm1d(self.all_dims[i]),
                nn.ReLU(),
                nn.Dropout(dropout[i-1])
            ))
        self.dnn_linear_list.append(nn.Linear(hid_dims[-1], num_classes))
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, user_id, user_attr, item_id, item_attr):
        all_embs, kld = self.encoder(user_id, user_attr, item_id, item_attr)
        '''FM module'''
        fm_1st_part = torch.sum(all_embs, 1, keepdim=True)
        sum_square_emb = torch.sum(all_embs * all_embs, 1, keepdim=True)
        sum_emb = torch.sum(all_embs, 1, keepdim=True)
        square_sum_emb = sum_emb * sum_emb
        fm_2nd_part = (sum_square_emb - square_sum_emb) * 0.5
        '''DNN module'''
        dnn_out = all_embs
        for linear in self.dnn_linear_list:
            dnn_out = linear(dnn_out)
        out = dnn_out + fm_1st_part + fm_2nd_part
        out = self.sigmoid(out)
        return out, kld

In [7]:
feature_nuniques_list = [data[user_id_col].nunique(),
                          [data[f].nunique() for f in user_attr_col],
                          data[item_id_col].nunique(),
                          [data[f].nunique() for f in item_attr_col]]

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = VELDeepFM(feature_nuniques_list, 1)
model.to(device)

loss = nn.BCELoss() # Binary Cross Entropy Loss
loss = loss.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)

total = sum([param.nelement() for param in model.parameters()])
trainable = sum([param.nelement() for param in model.parameters() if param.requires_grad])
print({'Total': total, 'Trainable': trainable})

{'Total': 154953, 'Trainable': 154953}


In [8]:
def train_and_test(model, train_loader, test_loader, epochs, device):
    best_auc = 0.0
    for epoch in range(epochs):
        '''training process'''
        model.train()
        print("Current lr : {}".format(optimizer.state_dict()['param_groups'][0]['lr']))
        train_loss_sum = 0.0
        start_time = time.time()
        for idx, x in enumerate(train_loader):
            user_id, user_attr, item_id, item_attr, label = x[0], x[1], x[2], x[3], x[4]
            user_id, user_attr = user_id.to(device), user_attr.to(device)
            item_id, item_attr = item_id.to(device), item_attr.to(device)
            label = label.float().to(device)
            pred, kld = model(user_id, user_attr, item_id, item_attr)
            pred = pred.view(-1)
            l = loss(pred, label) + kld
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            train_loss_sum += l.cpu().item()
            if (idx+1) % 50 == 0 or (idx + 1) == len(train_loader):
                info = 'Epoch: {:04d} | Step: {:04d} / {} | Loss: {:.4f} | Time: {:.4f}'.format(
                          epoch+1, idx+1, len(train_loader), train_loss_sum/(idx+1), time.time() - start_time)
                print_info(info)     
            
        scheduler.step()
        
        '''inference process'''
        model.eval()
        with torch.no_grad():
            test_labels, test_preds = [], []
            for idx, x in tqdm(enumerate(test_loader)):
                user_id, user_attr, item_id, item_attr, label = x[0], x[1], x[2], x[3], x[4]
                user_id, user_attr = user_id.to(device), user_attr.to(device)
                item_id, item_attr = item_id.to(device), item_attr.to(device)
                label = label.float().to(device)
                pred, kld = model(user_id, user_attr, item_id, item_attr)
                pred = pred.view(-1).data.cpu().numpy().tolist()
                test_preds.extend(pred)
                test_labels.extend(label.cpu().numpy().tolist())
        cur_auc = roc_auc_score(test_labels, test_preds)
        if cur_auc > best_auc:
            best_auc = cur_auc
            torch.save(model.state_dict(), 'deep_fm_ml_1m.pth')   
        info = 'Current AUC: {:.6f}, Best AUC: {:.6f}\n'.format(cur_auc, best_auc)
        print_info(info)

In [9]:
train_and_test(model, train_loader, test_loader, 30, device)

Current lr : 0.005
10:54:20 : Epoch: 0001 | Step: 0050 / 342 | Loss: 12.9680 | Time: 3.0893
10:54:23 : Epoch: 0001 | Step: 0100 / 342 | Loss: 8.3305 | Time: 6.0299
10:54:26 : Epoch: 0001 | Step: 0150 / 342 | Loss: 6.6617 | Time: 8.9614
10:54:29 : Epoch: 0001 | Step: 0200 / 342 | Loss: 5.8014 | Time: 12.0127
10:54:32 : Epoch: 0001 | Step: 0250 / 342 | Loss: 5.2725 | Time: 14.9076
10:54:35 : Epoch: 0001 | Step: 0300 / 342 | Loss: 4.9123 | Time: 17.8472


1it [00:00,  8.28it/s]

10:54:37 : Epoch: 0001 | Step: 0342 / 342 | Loss: 4.6858 | Time: 20.2866


74it [00:07, 10.26it/s]


10:54:45 : Current AUC: 0.534515, Best AUC: 0.534515

Current lr : 0.004
10:54:48 : Epoch: 0002 | Step: 0050 / 342 | Loss: 3.0327 | Time: 3.1360
10:54:51 : Epoch: 0002 | Step: 0100 / 342 | Loss: 3.0304 | Time: 6.1164
10:54:54 : Epoch: 0002 | Step: 0150 / 342 | Loss: 3.0269 | Time: 9.3441
10:54:57 : Epoch: 0002 | Step: 0200 / 342 | Loss: 3.0205 | Time: 12.3148
10:55:00 : Epoch: 0002 | Step: 0250 / 342 | Loss: 3.0138 | Time: 15.4050
10:55:03 : Epoch: 0002 | Step: 0300 / 342 | Loss: 3.0076 | Time: 18.4608


1it [00:00,  8.97it/s]

10:55:06 : Epoch: 0002 | Step: 0342 / 342 | Loss: 3.0032 | Time: 21.1346


74it [00:07, 10.18it/s]


10:55:13 : Current AUC: 0.543712, Best AUC: 0.543712

Current lr : 0.0032
10:55:16 : Epoch: 0003 | Step: 0050 / 342 | Loss: 2.9714 | Time: 3.0981
10:55:20 : Epoch: 0003 | Step: 0100 / 342 | Loss: 2.9617 | Time: 6.2987
10:55:23 : Epoch: 0003 | Step: 0150 / 342 | Loss: 2.9574 | Time: 9.3196
10:55:26 : Epoch: 0003 | Step: 0200 / 342 | Loss: 2.9575 | Time: 12.4030
10:55:29 : Epoch: 0003 | Step: 0250 / 342 | Loss: 2.9521 | Time: 15.3589
10:55:32 : Epoch: 0003 | Step: 0300 / 342 | Loss: 2.9491 | Time: 18.4415


0it [00:00, ?it/s]

10:55:34 : Epoch: 0003 | Step: 0342 / 342 | Loss: 2.9479 | Time: 20.9938


74it [00:07, 10.34it/s]


10:55:42 : Current AUC: 0.560389, Best AUC: 0.560389

Current lr : 0.00256
10:55:45 : Epoch: 0004 | Step: 0050 / 342 | Loss: 2.9162 | Time: 3.1401
10:55:48 : Epoch: 0004 | Step: 0100 / 342 | Loss: 2.9142 | Time: 6.2829
10:55:51 : Epoch: 0004 | Step: 0150 / 342 | Loss: 2.9148 | Time: 9.1996
10:55:54 : Epoch: 0004 | Step: 0200 / 342 | Loss: 2.9147 | Time: 12.3135
10:55:57 : Epoch: 0004 | Step: 0250 / 342 | Loss: 2.9131 | Time: 15.3716
10:56:00 : Epoch: 0004 | Step: 0300 / 342 | Loss: 2.9130 | Time: 18.5616


1it [00:00,  9.59it/s]

10:56:03 : Epoch: 0004 | Step: 0342 / 342 | Loss: 2.9115 | Time: 21.0819


74it [00:07, 10.32it/s]


10:56:10 : Current AUC: 0.572905, Best AUC: 0.572905

Current lr : 0.0020480000000000003
10:56:13 : Epoch: 0005 | Step: 0050 / 342 | Loss: 2.8995 | Time: 3.1446
10:56:16 : Epoch: 0005 | Step: 0100 / 342 | Loss: 2.8961 | Time: 6.1659
10:56:19 : Epoch: 0005 | Step: 0150 / 342 | Loss: 2.8960 | Time: 9.2615
10:56:22 : Epoch: 0005 | Step: 0200 / 342 | Loss: 2.8946 | Time: 12.3010
10:56:26 : Epoch: 0005 | Step: 0250 / 342 | Loss: 2.8937 | Time: 15.4260
10:56:29 : Epoch: 0005 | Step: 0300 / 342 | Loss: 2.8924 | Time: 18.5004


1it [00:00,  8.33it/s]

10:56:31 : Epoch: 0005 | Step: 0342 / 342 | Loss: 2.8916 | Time: 21.2613


74it [00:06, 11.14it/s]


10:56:38 : Current AUC: 0.577995, Best AUC: 0.577995

Current lr : 0.0016384000000000004
10:56:41 : Epoch: 0006 | Step: 0050 / 342 | Loss: 2.8833 | Time: 3.1265
10:56:45 : Epoch: 0006 | Step: 0100 / 342 | Loss: 2.8823 | Time: 6.2106
10:56:47 : Epoch: 0006 | Step: 0150 / 342 | Loss: 2.8836 | Time: 9.1514
10:56:50 : Epoch: 0006 | Step: 0200 / 342 | Loss: 2.8822 | Time: 12.1637
10:56:53 : Epoch: 0006 | Step: 0250 / 342 | Loss: 2.8809 | Time: 14.8471
10:56:56 : Epoch: 0006 | Step: 0300 / 342 | Loss: 2.8791 | Time: 17.5876


0it [00:00, ?it/s]

10:56:58 : Epoch: 0006 | Step: 0342 / 342 | Loss: 2.8785 | Time: 19.9935


74it [00:06, 10.94it/s]


10:57:05 : Current AUC: 0.578975, Best AUC: 0.578975

Current lr : 0.0013107200000000005
10:57:08 : Epoch: 0007 | Step: 0050 / 342 | Loss: 2.8779 | Time: 3.0742
10:57:11 : Epoch: 0007 | Step: 0100 / 342 | Loss: 2.8796 | Time: 5.8666
10:57:14 : Epoch: 0007 | Step: 0150 / 342 | Loss: 2.8758 | Time: 8.5617
10:57:17 : Epoch: 0007 | Step: 0200 / 342 | Loss: 2.8721 | Time: 11.2777
10:57:19 : Epoch: 0007 | Step: 0250 / 342 | Loss: 2.8718 | Time: 13.9679
10:57:22 : Epoch: 0007 | Step: 0300 / 342 | Loss: 2.8707 | Time: 16.6959


1it [00:00,  7.98it/s]

10:57:24 : Epoch: 0007 | Step: 0342 / 342 | Loss: 2.8699 | Time: 18.9246


74it [00:07,  9.28it/s]


10:57:32 : Current AUC: 0.585616, Best AUC: 0.585616

Current lr : 0.0010485760000000005
10:57:35 : Epoch: 0008 | Step: 0050 / 342 | Loss: 2.8604 | Time: 2.8899
10:57:38 : Epoch: 0008 | Step: 0100 / 342 | Loss: 2.8596 | Time: 5.6722
10:57:41 : Epoch: 0008 | Step: 0150 / 342 | Loss: 2.8582 | Time: 8.5327
10:57:44 : Epoch: 0008 | Step: 0200 / 342 | Loss: 2.8574 | Time: 11.3319
10:57:46 : Epoch: 0008 | Step: 0250 / 342 | Loss: 2.8561 | Time: 14.0716
10:57:49 : Epoch: 0008 | Step: 0300 / 342 | Loss: 2.8561 | Time: 16.9652


1it [00:00,  8.71it/s]

10:57:52 : Epoch: 0008 | Step: 0342 / 342 | Loss: 2.8560 | Time: 19.4958


74it [00:06, 10.86it/s]


10:57:59 : Current AUC: 0.587553, Best AUC: 0.587553

Current lr : 0.0008388608000000005
10:58:02 : Epoch: 0009 | Step: 0050 / 342 | Loss: 2.8523 | Time: 3.1278
10:58:05 : Epoch: 0009 | Step: 0100 / 342 | Loss: 2.8536 | Time: 5.9156
10:58:08 : Epoch: 0009 | Step: 0150 / 342 | Loss: 2.8524 | Time: 8.6423
10:58:11 : Epoch: 0009 | Step: 0200 / 342 | Loss: 2.8520 | Time: 11.7157
10:58:13 : Epoch: 0009 | Step: 0250 / 342 | Loss: 2.8517 | Time: 14.4248
10:58:16 : Epoch: 0009 | Step: 0300 / 342 | Loss: 2.8507 | Time: 17.1962


0it [00:00, ?it/s]

10:58:18 : Epoch: 0009 | Step: 0342 / 342 | Loss: 2.8499 | Time: 19.4889


74it [00:06, 10.73it/s]


10:58:26 : Current AUC: 0.590770, Best AUC: 0.590770

Current lr : 0.0006710886400000004
10:58:29 : Epoch: 0010 | Step: 0050 / 342 | Loss: 2.8510 | Time: 3.0293
10:58:31 : Epoch: 0010 | Step: 0100 / 342 | Loss: 2.8475 | Time: 5.9292
10:58:35 : Epoch: 0010 | Step: 0150 / 342 | Loss: 2.8485 | Time: 9.0585
10:58:37 : Epoch: 0010 | Step: 0200 / 342 | Loss: 2.8473 | Time: 11.8110
10:58:40 : Epoch: 0010 | Step: 0250 / 342 | Loss: 2.8466 | Time: 14.5351
10:58:43 : Epoch: 0010 | Step: 0300 / 342 | Loss: 2.8458 | Time: 17.8238


1it [00:00,  9.52it/s]

10:58:46 : Epoch: 0010 | Step: 0342 / 342 | Loss: 2.8449 | Time: 20.2277


74it [00:06, 11.33it/s]


10:58:52 : Current AUC: 0.590481, Best AUC: 0.590770

Current lr : 0.0005368709120000003
10:58:55 : Epoch: 0011 | Step: 0050 / 342 | Loss: 2.8358 | Time: 2.8568
10:58:58 : Epoch: 0011 | Step: 0100 / 342 | Loss: 2.8399 | Time: 5.5290
10:59:01 : Epoch: 0011 | Step: 0150 / 342 | Loss: 2.8401 | Time: 8.3322
10:59:04 : Epoch: 0011 | Step: 0200 / 342 | Loss: 2.8409 | Time: 11.0753
10:59:07 : Epoch: 0011 | Step: 0250 / 342 | Loss: 2.8408 | Time: 14.1087
10:59:09 : Epoch: 0011 | Step: 0300 / 342 | Loss: 2.8406 | Time: 16.9053


1it [00:00,  8.23it/s]

10:59:12 : Epoch: 0011 | Step: 0342 / 342 | Loss: 2.8406 | Time: 19.4058


74it [00:06, 11.11it/s]


10:59:19 : Current AUC: 0.595966, Best AUC: 0.595966

Current lr : 0.0004294967296000003
10:59:22 : Epoch: 0012 | Step: 0050 / 342 | Loss: 2.8407 | Time: 3.0310
10:59:25 : Epoch: 0012 | Step: 0100 / 342 | Loss: 2.8384 | Time: 5.8364
10:59:27 : Epoch: 0012 | Step: 0150 / 342 | Loss: 2.8372 | Time: 8.5370
10:59:30 : Epoch: 0012 | Step: 0200 / 342 | Loss: 2.8372 | Time: 11.3479
10:59:33 : Epoch: 0012 | Step: 0250 / 342 | Loss: 2.8374 | Time: 14.5130
10:59:36 : Epoch: 0012 | Step: 0300 / 342 | Loss: 2.8380 | Time: 17.6817


1it [00:00,  6.05it/s]

10:59:39 : Epoch: 0012 | Step: 0342 / 342 | Loss: 2.8380 | Time: 20.1785


74it [00:06, 11.12it/s]


10:59:46 : Current AUC: 0.594664, Best AUC: 0.595966

Current lr : 0.00034359738368000027
10:59:49 : Epoch: 0013 | Step: 0050 / 342 | Loss: 2.8335 | Time: 2.8714
10:59:52 : Epoch: 0013 | Step: 0100 / 342 | Loss: 2.8349 | Time: 6.2117
10:59:55 : Epoch: 0013 | Step: 0150 / 342 | Loss: 2.8337 | Time: 9.5067
10:59:58 : Epoch: 0013 | Step: 0200 / 342 | Loss: 2.8322 | Time: 12.2994
11:00:01 : Epoch: 0013 | Step: 0250 / 342 | Loss: 2.8311 | Time: 15.0660
11:00:04 : Epoch: 0013 | Step: 0300 / 342 | Loss: 2.8315 | Time: 17.8610


0it [00:00, ?it/s]

11:00:06 : Epoch: 0013 | Step: 0342 / 342 | Loss: 2.8314 | Time: 20.2683


74it [00:06, 10.93it/s]


11:00:13 : Current AUC: 0.595456, Best AUC: 0.595966

Current lr : 0.00027487790694400024
11:00:16 : Epoch: 0014 | Step: 0050 / 342 | Loss: 2.8341 | Time: 2.8625
11:00:19 : Epoch: 0014 | Step: 0100 / 342 | Loss: 2.8329 | Time: 5.7668
11:00:22 : Epoch: 0014 | Step: 0150 / 342 | Loss: 2.8336 | Time: 8.5747
11:00:24 : Epoch: 0014 | Step: 0200 / 342 | Loss: 2.8327 | Time: 11.2732
11:00:27 : Epoch: 0014 | Step: 0250 / 342 | Loss: 2.8319 | Time: 14.1037
11:00:30 : Epoch: 0014 | Step: 0300 / 342 | Loss: 2.8322 | Time: 17.0805


KeyboardInterrupt: 

In [2]:
import torch
x = torch.tensor([[1, 2, 3], [4, 9, 7]])

In [3]:
x

tensor([[1, 2, 3],
        [4, 9, 7]])

In [7]:
torch.sum(x, 1, keepdim=True)

tensor([[ 6],
        [20]])