In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as Data

import time, json, datetime 
from tqdm import tqdm

import numpy as np 
import pandas as pd 
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [3]:
class DeepFM(nn.Module):
    def __init__(self, cate_fea_nuniqs, nume_fea_size=0, emb_size=8, 
                 hid_dims=[256, 128], num_classes=1, dropout=[0.2, 0.2]): 
        """
        cate_fea_nuniqs: 类别特征的唯一值个数列表，也就是每个类别特征的vocab_size所组成的列表
        nume_fea_size: 数值特征的个数，该模型会考虑到输入全为类别型，即没有数值特征的情况 
        emb_size：嵌入层的纬度，也就是隐向量的k的值，emb层把原始的稀疏向量变成一个稠密的隐向量
        hid_dims：隐藏层（也即全连接层的纬度？）256,128代表全连接层有两层，第一层有256个神经元，第二层12个
        num_classes：默认等于1的话就是表示一个二分类问题，最后返回的结果就是一个标量，表示结果为1的概率
        dropout：防止过拟合的方法，比如说有256个神经元，0.2表示会随机的丢掉百分之20的神经元，具体是用一个256维的向量有0.8的都是1，其他的是0，与输入的向量做内积，将得到的向量再去链接
        0.2,0.2表示输入层会dropout0.2，每个隐藏层也是0.2
        
        ？？self具体是什么呢？
        """
        super().__init__()
        self.cate_fea_size = len(cate_fea_nuniqs) # cate_fea_nuniqs是一个列表，里面是输入数据中类别型的特征列
        self.nume_fea_size = nume_fea_size
        
        """FM部分"""
        # 一阶
        if self.nume_fea_size != 0: # 如果有数值型的特征
            self.fm_1st_order_dense = nn.Linear(self.nume_fea_size, 1)  # 数值特征的一阶表示
            """
            linear层具体是怎么实现的，干什么？
            这个地方创建了一个linear层对象，
            self.fm_1st_order_dense是self类的一个属性
            """
        self.fm_1st_order_sparse_emb = nn.ModuleList([
            nn.Embedding(voc_size, 1) for voc_size in cate_fea_nuniqs])  # 类别特征的一阶表示
        """
        self.fm_1st_order_sparse_emb是一个列表，其中的每个元素都是一个nn.Embedding对象
        voc_size表示每个类别型特征的可能取值数量
        nn.Embedding(voc_size, 1)函数创建一个nn.Embedding对象
        """
        
        # 二阶
        self.fm_2nd_order_sparse_emb = nn.ModuleList([
            nn.Embedding(voc_size, emb_size) for voc_size in cate_fea_nuniqs])  # 类别特征的二阶表示
        """
        同样是一个列表，每一项都是一个embedding对象，但是这个对象是一个n*k的矩阵
        """
        
        """DNN部分"""
        self.all_dims = [self.cate_fea_size * emb_size] + hid_dims # 合成一个新向量，第一个元素是品类特征数*
        self.dense_linear = nn.Linear(self.nume_fea_size, self.cate_fea_size * emb_size)  # 数值特征的维度变换到FM输出维度一致
        self.relu = nn.ReLU()
        # for DNN 
        for i in range(1, len(self.all_dims)):
            setattr(self, 'linear_'+str(i), nn.Linear(self.all_dims[i-1], self.all_dims[i]))
            setattr(self, 'batchNorm_' + str(i), nn.BatchNorm1d(self.all_dims[i]))
            setattr(self, 'activation_' + str(i), nn.ReLU())
            setattr(self, 'dropout_'+str(i), nn.Dropout(dropout[i-1]))
        # for output 
        self.dnn_linear = nn.Linear(hid_dims[-1], num_classes)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, X_sparse, X_dense=None):
        """
        X_sparse: 类别型特征输入  [bs, cate_fea_size]
        X_dense: 数值型特征输入（可能没有）  [bs, dense_fea_size]
        """
        
        """FM 一阶部分"""
        fm_1st_sparse_res = [emb(X_sparse[:, i].unsqueeze(1)).view(-1, 1) 
                             for i, emb in enumerate(self.fm_1st_order_sparse_emb)]
        fm_1st_sparse_res = torch.cat(fm_1st_sparse_res, dim=1)  # [bs, cate_fea_size]
        fm_1st_sparse_res = torch.sum(fm_1st_sparse_res, 1,  keepdim=True)  # [bs, 1]
        
        if X_dense is not None:
            fm_1st_dense_res = self.fm_1st_order_dense(X_dense) 
            fm_1st_part = fm_1st_sparse_res + fm_1st_dense_res
        else:
            fm_1st_part = fm_1st_sparse_res   # [bs, 1]
        
        """FM 二阶部分"""
        fm_2nd_order_res = [emb(X_sparse[:, i].unsqueeze(1)) for i, emb in enumerate(self.fm_2nd_order_sparse_emb)]
        fm_2nd_concat_1d = torch.cat(fm_2nd_order_res, dim=1)  # [bs, n, emb_size]  n为类别型特征个数(cate_fea_size)
        
        # 先求和再平方
        sum_embed = torch.sum(fm_2nd_concat_1d, 1)  # [bs, emb_size]
        square_sum_embed = sum_embed * sum_embed    # [bs, emb_size]
        # 先平方再求和
        square_embed = fm_2nd_concat_1d * fm_2nd_concat_1d  # [bs, n, emb_size]
        sum_square_embed = torch.sum(square_embed, 1)  # [bs, emb_size]
        # 相减除以2 
        sub = square_sum_embed - sum_square_embed  
        sub = sub * 0.5   # [bs, emb_size]
        
        fm_2nd_part = torch.sum(sub, 1, keepdim=True)   # [bs, 1]
        
        """DNN部分"""
        dnn_out = torch.flatten(fm_2nd_concat_1d, 1)   # [bs, n * emb_size]
        
        if X_dense is not None:
            dense_out = self.relu(self.dense_linear(X_dense))   # [bs, n * emb_size]
            dnn_out = dnn_out + dense_out   # [bs, n * emb_size]
        
        for i in range(1, len(self.all_dims)):
            dnn_out = getattr(self, 'linear_' + str(i))(dnn_out)
            dnn_out = getattr(self, 'batchNorm_' + str(i))(dnn_out)
            dnn_out = getattr(self, 'activation_' + str(i))(dnn_out)
            dnn_out = getattr(self, 'dropout_' + str(i))(dnn_out)
        
        dnn_out = self.dnn_linear(dnn_out)   # [bs, 1]
        out = fm_1st_part + fm_2nd_part + dnn_out   # [bs, 1]
        out = self.sigmoid(out)
        return out

## 数据预处理

In [None]:
data = pd.read_csv('data/criteo_sample_50w.csv')

dense_features = [f for f in data.columns.tolist() if f[0] == "I"]
sparse_features = [f for f in data.columns.tolist() if f[0] == "C"]
"""
分别是两个向量，
一个是数据型的向量，内容是数据类型向量的标题
一个是类别型的向量，内容是类别类型向量的标题
"""
data[sparse_features] = data[sparse_features].fillna('-10086', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
"""
对数据集中的缺失值进行填充
"""
## 类别特征labelencoder
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])
## 数值特征标准化
for feat in tqdm(dense_features):
    mean = data[feat].mean()
    std = data[feat].std() # 计算特征的标准差
    data[feat] = (data[feat] - mean) / (std + 1e-12)   # 防止除零
    
"""
把对数据进一步处理，
首先是类别的数据：
先实例化一个LabelEncoder对象，然后调用方法fit_transform方法，传入一个特征列向量，该方法会把类别用数字代替，然后返回列向量，更新
上下的区别在于下面加了一个tqdm()方法,就是让这个过程一进度条的方式可视化出来，可以直接删掉，在大型数据处理中不会用，这个过程会造成损耗
下面是数据类型的：目的是使得每个特征的均值为0，方差为1。
先求每一个的均值，方差，然后计算处理
"""
pass
# print(data.shape)
# print(data.head())

100%|██████████| 13/13 [00:00<00:00, 94.81it/s]


In [24]:
train, valid = train_test_split(data, test_size=0.2, random_state=2020)
"""
random_state是一个随机数的种子，用来随机分割数据集，当他为一个确定的数值的时候，就能够保证每次跑程序，数据集被分割的一样，是可复现的，
"""
# print(train.shape, valid.shape)

train_dataset = Data.TensorDataset(torch.LongTensor(train[sparse_features].values), 
                                   torch.FloatTensor(train[dense_features].values),
                                   torch.FloatTensor(train['label'].values),)

"""
训练集转化，讲训练集中的类别向量转换为LongTensor,数值向量转换为FloatTensor,label是点击次数，也作为FloatTensor,然后合并
"""

train_loader = Data.DataLoader(dataset=train_dataset, batch_size=2048, shuffle=True)
"""
得到了训练集进一步处理，将所有的训练数据变成一个一个的batch，shuffle是指每一个epoch都会重新随机分配数据形成新的batch组

下面对测试集的数据也是同样的处理方法
"""
valid_dataset = Data.TensorDataset(torch.LongTensor(valid[sparse_features].values), 
                                   torch.FloatTensor(valid[dense_features].values),
                                   torch.FloatTensor(valid['label'].values),)
valid_loader = Data.DataLoader(dataset=valid_dataset, batch_size=4096, shuffle=False)



device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



# print(device)
cate_fea_nuniqs = [data[f].nunique() for f in sparse_features]

"""
首先sparse_features是类别向量的列标题，因此是循环每一个类别向量，
cate_fea_nuniqs 是一个向量，每一个元素是每一个类别向量的不同的元素的个数，表示这一列有多少种不同的类别
"""

model = DeepFM(cate_fea_nuniqs, nume_fea_size=len(dense_features))
"""
声明一个deepFM对象，需要传入刚刚得到的，一个每个类别特征所包含的不同的种类数的向量，一个数值类型的特征数
"""


model.to(device)# 运行模型


loss_fcn = nn.BCELoss()  # Loss函数
loss_fcn = loss_fcn.to(device) # 将损失函数转移到指定的设备上

optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)
"""
第一个是模型的参数
lr学习率，weight_decay是l2正则化的系数
整个optimizer是基于梯度下降算法的优化器
"""

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)
"""
定义一个学习率调度器，optimizer是优化器，step_size是更新学习率的间隔，=1表示每一个epoch就更新一次，gamma是倍率，即每次更新，学习率都乘上0.8
"""



# 打印模型参数
def get_parameter_number(model):
    total_num = sum(p.numel() for p in model.parameters())
    trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return {'Total': total_num, 'Trainable': trainable_num}
print(get_parameter_number(model))

# 定义日志（data文件夹下，同级目录新建一个data文件夹）
def write_log(w):
    file_name = 'data/' + datetime.date.today().strftime('%m%d')+"_{}.log".format("deepfm")
    t0 = datetime.datetime.now().strftime('%H:%M:%S')
    info = "{} : {}".format(t0, w)
    print(info)
    with open(file_name, 'a') as f: 
        f.write(info + '\n') 

{'Total': 7037251, 'Trainable': 7037251}


In [6]:
def train_and_eval(model, train_loader, valid_loader, epochs, device):
    best_auc = 0.0
    for _ in range(epochs):
        """训练部分"""
        model.train() #开启训练模式，在这个模式下，模型会使用dropout，batch，normalization等一些有利于模型的方法，model.eval()就是不会
        print("Current lr : {}".format(optimizer.state_dict()['param_groups'][0]['lr']))
        write_log('Epoch: {}'.format(_ + 1))
        train_loss_sum = 0.0
        start_time = time.time()
        
        
        for idx, x in enumerate(train_loader):
            cate_fea, nume_fea, label = x[0], x[1], x[2]
            cate_fea, nume_fea, label = cate_fea.to(device), nume_fea.to(device), label.float().to(device)
            pred = model(cate_fea, nume_fea).view(-1)
            loss = loss_fcn(pred, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss_sum += loss.cpu().item()
            if (idx+1) % 50 == 0 or (idx + 1) == len(train_loader):
                write_log("Epoch {:04d} | Step {:04d} / {} | Loss {:.4f} | Time {:.4f}".format(
                          _+1, idx+1, len(train_loader), train_loss_sum/(idx+1), time.time() - start_time))
        scheduler.step()
        
        
        """推断部分"""
        model.eval()
        with torch.no_grad():
            valid_labels, valid_preds = [], []
            for idx, x in tqdm(enumerate(valid_loader)):
                cate_fea, nume_fea, label = x[0], x[1], x[2]
                cate_fea, nume_fea = cate_fea.to(device), nume_fea.to(device)
                pred = model(cate_fea, nume_fea).reshape(-1).data.cpu().numpy().tolist()
                valid_preds.extend(pred)
                valid_labels.extend(label.cpu().numpy().tolist())
        cur_auc = roc_auc_score(valid_labels, valid_preds)
        if cur_auc > best_auc:
            best_auc = cur_auc
            torch.save(model.state_dict(), "data/deepfm_best.pth")
        write_log('Current AUC: %.6f, Best AUC: %.6f\n' % (cur_auc, best_auc))
        

train_and_eval(model, train_loader, valid_loader, 30, device)

Current lr : 0.005
22:00:22 : Epoch: 1
22:00:26 : Epoch 0001 | Step 0050 / 196 | Loss 18.9375 | Time 3.7620
22:00:29 : Epoch 0001 | Step 0100 / 196 | Loss 16.4242 | Time 7.0448
22:00:32 : Epoch 0001 | Step 0150 / 196 | Loss 14.1202 | Time 10.5061


2it [00:00, 18.02it/s]

22:00:35 : Epoch 0001 | Step 0196 / 196 | Loss 12.0842 | Time 13.5648


25it [00:01, 13.57it/s]


22:00:37 : Current AUC: 0.666958, Best AUC: 0.666958

Current lr : 0.004
22:00:37 : Epoch: 2
22:00:41 : Epoch 0002 | Step 0050 / 196 | Loss 2.8049 | Time 3.3196
22:00:44 : Epoch 0002 | Step 0100 / 196 | Loss 2.2271 | Time 6.5872
22:00:47 : Epoch 0002 | Step 0150 / 196 | Loss 1.8805 | Time 9.9019


1it [00:00,  8.40it/s]

22:00:50 : Epoch 0002 | Step 0196 / 196 | Loss 1.6633 | Time 12.7728


25it [00:01, 13.99it/s]


22:00:52 : Current AUC: 0.672575, Best AUC: 0.672575

Current lr : 0.0032
22:00:52 : Epoch: 3
22:00:55 : Epoch 0003 | Step 0050 / 196 | Loss 0.7761 | Time 3.3309
22:00:59 : Epoch 0003 | Step 0100 / 196 | Loss 0.7444 | Time 6.6291
22:01:02 : Epoch 0003 | Step 0150 / 196 | Loss 0.7173 | Time 10.0131


1it [00:00,  7.93it/s]

22:01:05 : Epoch 0003 | Step 0196 / 196 | Loss 0.6945 | Time 12.9664


25it [00:01, 13.42it/s]


22:01:07 : Current AUC: 0.714427, Best AUC: 0.714427

Current lr : 0.00256
22:01:07 : Epoch: 4
22:01:10 : Epoch 0004 | Step 0050 / 196 | Loss 0.5783 | Time 3.2622
22:01:14 : Epoch 0004 | Step 0100 / 196 | Loss 0.5687 | Time 6.5675
22:01:17 : Epoch 0004 | Step 0150 / 196 | Loss 0.5596 | Time 9.9286


2it [00:00, 11.12it/s]

22:01:20 : Epoch 0004 | Step 0196 / 196 | Loss 0.5539 | Time 12.8720


25it [00:01, 14.06it/s]


22:01:22 : Current AUC: 0.735579, Best AUC: 0.735579

Current lr : 0.0020480000000000003
22:01:22 : Epoch: 5
22:01:25 : Epoch 0005 | Step 0050 / 196 | Loss 0.5141 | Time 3.3699
22:01:28 : Epoch 0005 | Step 0100 / 196 | Loss 0.5132 | Time 6.5798
22:01:32 : Epoch 0005 | Step 0150 / 196 | Loss 0.5130 | Time 9.7945


2it [00:00, 17.43it/s]

22:01:35 : Epoch 0005 | Step 0196 / 196 | Loss 0.5110 | Time 12.8358


25it [00:01, 14.07it/s]


22:01:37 : Current AUC: 0.749549, Best AUC: 0.749549

Current lr : 0.0016384000000000004
22:01:37 : Epoch: 6
22:01:40 : Epoch 0006 | Step 0050 / 196 | Loss 0.4903 | Time 3.4016
22:01:43 : Epoch 0006 | Step 0100 / 196 | Loss 0.4893 | Time 6.7074
22:01:47 : Epoch 0006 | Step 0150 / 196 | Loss 0.4912 | Time 10.1295


2it [00:00, 17.77it/s]

22:01:50 : Epoch 0006 | Step 0196 / 196 | Loss 0.4918 | Time 13.3024


25it [00:01, 13.98it/s]


22:01:52 : Current AUC: 0.755648, Best AUC: 0.755648

Current lr : 0.0013107200000000005
22:01:52 : Epoch: 7
22:01:55 : Epoch 0007 | Step 0050 / 196 | Loss 0.4815 | Time 3.3421
22:01:58 : Epoch 0007 | Step 0100 / 196 | Loss 0.4815 | Time 6.6541
22:02:02 : Epoch 0007 | Step 0150 / 196 | Loss 0.4824 | Time 9.9956


2it [00:00, 14.47it/s]

22:02:05 : Epoch 0007 | Step 0196 / 196 | Loss 0.4819 | Time 13.1224


25it [00:01, 13.52it/s]


22:02:07 : Current AUC: 0.762962, Best AUC: 0.762962

Current lr : 0.0010485760000000005
22:02:07 : Epoch: 8
22:02:10 : Epoch 0008 | Step 0050 / 196 | Loss 0.4738 | Time 3.2552
22:02:13 : Epoch 0008 | Step 0100 / 196 | Loss 0.4745 | Time 6.5406
22:02:17 : Epoch 0008 | Step 0150 / 196 | Loss 0.4752 | Time 10.0268


1it [00:00,  7.88it/s]

22:02:20 : Epoch 0008 | Step 0196 / 196 | Loss 0.4758 | Time 12.9783


25it [00:01, 13.39it/s]


22:02:22 : Current AUC: 0.763636, Best AUC: 0.763636

Current lr : 0.0008388608000000005
22:02:22 : Epoch: 9
22:02:25 : Epoch 0009 | Step 0050 / 196 | Loss 0.4689 | Time 3.3143
22:02:28 : Epoch 0009 | Step 0100 / 196 | Loss 0.4692 | Time 6.5835
22:02:32 : Epoch 0009 | Step 0150 / 196 | Loss 0.4697 | Time 9.9682


1it [00:00,  7.95it/s]

22:02:35 : Epoch 0009 | Step 0196 / 196 | Loss 0.4707 | Time 12.9462


25it [00:01, 13.56it/s]


22:02:37 : Current AUC: 0.769275, Best AUC: 0.769275

Current lr : 0.0006710886400000004
22:02:37 : Epoch: 10
22:02:40 : Epoch 0010 | Step 0050 / 196 | Loss 0.4651 | Time 3.3143
22:02:43 : Epoch 0010 | Step 0100 / 196 | Loss 0.4666 | Time 6.5508
22:02:47 : Epoch 0010 | Step 0150 / 196 | Loss 0.4658 | Time 9.8890


2it [00:00, 11.07it/s]

22:02:50 : Epoch 0010 | Step 0196 / 196 | Loss 0.4661 | Time 12.9321


25it [00:01, 13.31it/s]


22:02:52 : Current AUC: 0.770835, Best AUC: 0.770835

Current lr : 0.0005368709120000003
22:02:52 : Epoch: 11
22:02:55 : Epoch 0011 | Step 0050 / 196 | Loss 0.4568 | Time 3.3484
22:02:58 : Epoch 0011 | Step 0100 / 196 | Loss 0.4595 | Time 6.6525
22:03:02 : Epoch 0011 | Step 0150 / 196 | Loss 0.4612 | Time 10.0327


2it [00:00, 10.60it/s]

22:03:05 : Epoch 0011 | Step 0196 / 196 | Loss 0.4620 | Time 13.0543


25it [00:01, 14.02it/s]


22:03:07 : Current AUC: 0.774584, Best AUC: 0.774584

Current lr : 0.0004294967296000003
22:03:07 : Epoch: 12
22:03:10 : Epoch 0012 | Step 0050 / 196 | Loss 0.4523 | Time 3.3728
22:03:14 : Epoch 0012 | Step 0100 / 196 | Loss 0.4555 | Time 6.7032
22:03:17 : Epoch 0012 | Step 0150 / 196 | Loss 0.4567 | Time 10.0225


2it [00:00, 18.16it/s]

22:03:20 : Epoch 0012 | Step 0196 / 196 | Loss 0.4579 | Time 13.0616


25it [00:01, 13.73it/s]


22:03:22 : Current AUC: 0.776030, Best AUC: 0.776030

Current lr : 0.00034359738368000027
22:03:22 : Epoch: 13
22:03:25 : Epoch 0013 | Step 0050 / 196 | Loss 0.4462 | Time 3.3467
22:03:29 : Epoch 0013 | Step 0100 / 196 | Loss 0.4497 | Time 6.6925
22:03:32 : Epoch 0013 | Step 0150 / 196 | Loss 0.4520 | Time 10.0104


2it [00:00, 17.34it/s]

22:03:35 : Epoch 0013 | Step 0196 / 196 | Loss 0.4528 | Time 13.1506


25it [00:01, 14.16it/s]


22:03:37 : Current AUC: 0.777847, Best AUC: 0.777847

Current lr : 0.00027487790694400024
22:03:37 : Epoch: 14
22:03:40 : Epoch 0014 | Step 0050 / 196 | Loss 0.4428 | Time 3.3019
22:03:44 : Epoch 0014 | Step 0100 / 196 | Loss 0.4434 | Time 6.6812
22:03:47 : Epoch 0014 | Step 0150 / 196 | Loss 0.4461 | Time 9.9794


2it [00:00, 16.54it/s]

22:03:50 : Epoch 0014 | Step 0196 / 196 | Loss 0.4473 | Time 12.9896


25it [00:01, 13.40it/s]


22:03:52 : Current AUC: 0.778540, Best AUC: 0.778540

Current lr : 0.0002199023255552002
22:03:52 : Epoch: 15
22:03:55 : Epoch 0015 | Step 0050 / 196 | Loss 0.4338 | Time 3.3255
22:03:59 : Epoch 0015 | Step 0100 / 196 | Loss 0.4347 | Time 6.6028
22:04:02 : Epoch 0015 | Step 0150 / 196 | Loss 0.4376 | Time 10.0091


1it [00:00,  8.15it/s]

22:04:05 : Epoch 0015 | Step 0196 / 196 | Loss 0.4392 | Time 12.9490


25it [00:01, 13.52it/s]


22:04:07 : Current AUC: 0.777531, Best AUC: 0.778540

Current lr : 0.00017592186044416018
22:04:07 : Epoch: 16
22:04:10 : Epoch 0016 | Step 0050 / 196 | Loss 0.4253 | Time 3.2910
22:04:13 : Epoch 0016 | Step 0100 / 196 | Loss 0.4265 | Time 6.6439
22:04:17 : Epoch 0016 | Step 0150 / 196 | Loss 0.4283 | Time 10.0777


1it [00:00,  7.84it/s]

22:04:20 : Epoch 0016 | Step 0196 / 196 | Loss 0.4297 | Time 13.0034


25it [00:01, 13.39it/s]


22:04:22 : Current AUC: 0.776154, Best AUC: 0.778540

Current lr : 0.00014073748835532815
22:04:22 : Epoch: 17
22:04:25 : Epoch 0017 | Step 0050 / 196 | Loss 0.4045 | Time 3.2766
22:04:28 : Epoch 0017 | Step 0100 / 196 | Loss 0.4071 | Time 6.5136
22:04:31 : Epoch 0017 | Step 0150 / 196 | Loss 0.4100 | Time 9.7726


2it [00:00, 10.37it/s]

22:04:34 : Epoch 0017 | Step 0196 / 196 | Loss 0.4132 | Time 12.7591


25it [00:01, 13.82it/s]


22:04:36 : Current AUC: 0.773039, Best AUC: 0.778540

Current lr : 0.00011258999068426252
22:04:36 : Epoch: 18
22:04:40 : Epoch 0018 | Step 0050 / 196 | Loss 0.3799 | Time 3.3956
22:04:43 : Epoch 0018 | Step 0100 / 196 | Loss 0.3816 | Time 6.6887
22:04:46 : Epoch 0018 | Step 0150 / 196 | Loss 0.3844 | Time 9.9062


2it [00:00, 14.63it/s]

22:04:49 : Epoch 0018 | Step 0196 / 196 | Loss 0.3868 | Time 12.9281


25it [00:01, 13.54it/s]


22:04:51 : Current AUC: 0.766611, Best AUC: 0.778540

Current lr : 9.007199254741002e-05
22:04:51 : Epoch: 19
22:04:55 : Epoch 0019 | Step 0050 / 196 | Loss 0.3517 | Time 3.4556
22:04:58 : Epoch 0019 | Step 0100 / 196 | Loss 0.3514 | Time 6.7221
22:05:01 : Epoch 0019 | Step 0150 / 196 | Loss 0.3535 | Time 10.0510


2it [00:00, 17.50it/s]

22:05:04 : Epoch 0019 | Step 0196 / 196 | Loss 0.3562 | Time 13.1793


25it [00:01, 13.84it/s]


22:05:06 : Current AUC: 0.759299, Best AUC: 0.778540

Current lr : 7.205759403792802e-05
22:05:06 : Epoch: 20
22:05:10 : Epoch 0020 | Step 0050 / 196 | Loss 0.3275 | Time 3.3265
22:05:13 : Epoch 0020 | Step 0100 / 196 | Loss 0.3261 | Time 6.7422
22:05:16 : Epoch 0020 | Step 0150 / 196 | Loss 0.3286 | Time 10.0633


2it [00:00, 15.53it/s]

22:05:19 : Epoch 0020 | Step 0196 / 196 | Loss 0.3317 | Time 13.1404


25it [00:01, 12.99it/s]


22:05:21 : Current AUC: 0.750790, Best AUC: 0.778540

Current lr : 5.764607523034242e-05
22:05:21 : Epoch: 21
22:05:25 : Epoch 0021 | Step 0050 / 196 | Loss 0.3085 | Time 3.4164
22:05:28 : Epoch 0021 | Step 0100 / 196 | Loss 0.3119 | Time 6.7368
22:05:31 : Epoch 0021 | Step 0150 / 196 | Loss 0.3120 | Time 10.1011


1it [00:00,  8.01it/s]

22:05:35 : Epoch 0021 | Step 0196 / 196 | Loss 0.3129 | Time 13.1960


25it [00:01, 13.68it/s]


22:05:36 : Current AUC: 0.746840, Best AUC: 0.778540

Current lr : 4.611686018427394e-05
22:05:36 : Epoch: 22
22:05:40 : Epoch 0022 | Step 0050 / 196 | Loss 0.2964 | Time 3.3325
22:05:43 : Epoch 0022 | Step 0100 / 196 | Loss 0.2982 | Time 6.5885
22:05:46 : Epoch 0022 | Step 0150 / 196 | Loss 0.2998 | Time 9.8578


2it [00:00, 11.54it/s]

22:05:49 : Epoch 0022 | Step 0196 / 196 | Loss 0.3009 | Time 12.8242


25it [00:01, 13.33it/s]


22:05:51 : Current AUC: 0.739279, Best AUC: 0.778540

Current lr : 3.6893488147419155e-05
22:05:51 : Epoch: 23
22:05:55 : Epoch 0023 | Step 0050 / 196 | Loss 0.2870 | Time 3.3671
22:05:58 : Epoch 0023 | Step 0100 / 196 | Loss 0.2902 | Time 6.6727
22:06:01 : Epoch 0023 | Step 0150 / 196 | Loss 0.2902 | Time 9.9915


2it [00:00, 15.32it/s]

22:06:04 : Epoch 0023 | Step 0196 / 196 | Loss 0.2918 | Time 13.1385


25it [00:01, 13.80it/s]


22:06:06 : Current AUC: 0.741273, Best AUC: 0.778540

Current lr : 2.9514790517935324e-05
22:06:06 : Epoch: 24
22:06:10 : Epoch 0024 | Step 0050 / 196 | Loss 0.2817 | Time 3.3512
22:06:13 : Epoch 0024 | Step 0100 / 196 | Loss 0.2824 | Time 6.6314
22:06:16 : Epoch 0024 | Step 0150 / 196 | Loss 0.2837 | Time 10.0089


2it [00:00, 17.81it/s]

22:06:19 : Epoch 0024 | Step 0196 / 196 | Loss 0.2853 | Time 13.0348


25it [00:01, 13.89it/s]


22:06:21 : Current AUC: 0.737856, Best AUC: 0.778540

Current lr : 2.361183241434826e-05
22:06:21 : Epoch: 25
22:06:24 : Epoch 0025 | Step 0050 / 196 | Loss 0.2781 | Time 3.2894
22:06:28 : Epoch 0025 | Step 0100 / 196 | Loss 0.2776 | Time 6.7927
22:06:31 : Epoch 0025 | Step 0150 / 196 | Loss 0.2795 | Time 10.0497


2it [00:00, 17.01it/s]

22:06:34 : Epoch 0025 | Step 0196 / 196 | Loss 0.2796 | Time 13.0914


25it [00:01, 13.80it/s]


22:06:36 : Current AUC: 0.736271, Best AUC: 0.778540

Current lr : 1.888946593147861e-05
22:06:36 : Epoch: 26
22:06:39 : Epoch 0026 | Step 0050 / 196 | Loss 0.2715 | Time 3.2947
22:06:43 : Epoch 0026 | Step 0100 / 196 | Loss 0.2746 | Time 6.5905
22:06:46 : Epoch 0026 | Step 0150 / 196 | Loss 0.2749 | Time 9.9500


1it [00:00,  8.12it/s]

22:06:49 : Epoch 0026 | Step 0196 / 196 | Loss 0.2764 | Time 12.9666


25it [00:01, 13.15it/s]


22:06:51 : Current AUC: 0.735598, Best AUC: 0.778540

Current lr : 1.5111572745182888e-05
22:06:51 : Epoch: 27
22:06:54 : Epoch 0027 | Step 0050 / 196 | Loss 0.2739 | Time 3.3461
22:06:58 : Epoch 0027 | Step 0100 / 196 | Loss 0.2726 | Time 6.7172
22:07:01 : Epoch 0027 | Step 0150 / 196 | Loss 0.2726 | Time 10.0863


2it [00:00, 10.60it/s]

22:07:04 : Epoch 0027 | Step 0196 / 196 | Loss 0.2738 | Time 13.1563


25it [00:01, 13.65it/s]


22:07:06 : Current AUC: 0.734483, Best AUC: 0.778540

Current lr : 1.2089258196146311e-05
22:07:06 : Epoch: 28
22:07:09 : Epoch 0028 | Step 0050 / 196 | Loss 0.2683 | Time 3.3470
22:07:13 : Epoch 0028 | Step 0100 / 196 | Loss 0.2702 | Time 6.6261
22:07:16 : Epoch 0028 | Step 0150 / 196 | Loss 0.2721 | Time 9.9179


2it [00:00, 15.41it/s]

22:07:19 : Epoch 0028 | Step 0196 / 196 | Loss 0.2723 | Time 12.9251


25it [00:01, 13.55it/s]


22:07:21 : Current AUC: 0.734460, Best AUC: 0.778540

Current lr : 9.67140655691705e-06
22:07:21 : Epoch: 29
22:07:24 : Epoch 0029 | Step 0050 / 196 | Loss 0.2689 | Time 3.2952
22:07:27 : Epoch 0029 | Step 0100 / 196 | Loss 0.2688 | Time 6.6833
22:07:31 : Epoch 0029 | Step 0150 / 196 | Loss 0.2697 | Time 9.9066


2it [00:00, 17.57it/s]

22:07:34 : Epoch 0029 | Step 0196 / 196 | Loss 0.2695 | Time 13.0595


25it [00:01, 14.13it/s]


22:07:36 : Current AUC: 0.733417, Best AUC: 0.778540

Current lr : 7.73712524553364e-06
22:07:36 : Epoch: 30
22:07:39 : Epoch 0030 | Step 0050 / 196 | Loss 0.2684 | Time 3.2587
22:07:42 : Epoch 0030 | Step 0100 / 196 | Loss 0.2675 | Time 6.5317
22:07:45 : Epoch 0030 | Step 0150 / 196 | Loss 0.2683 | Time 9.7743


2it [00:00, 17.58it/s]

22:07:49 : Epoch 0030 | Step 0196 / 196 | Loss 0.2685 | Time 12.8603


25it [00:01, 13.96it/s]

22:07:50 : Current AUC: 0.733560, Best AUC: 0.778540




