In [2]:
import random, os
import torch
import pandas as pd
import numpy as np
def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [3]:
SEED = 42
DATA_PATH = "../data/"

In [4]:
train = pd.read_csv(f"{DATA_PATH}campaign_train.csv")
test = pd.read_csv(f"{DATA_PATH}campaign_test.csv")
train.shape, test.shape

((1344, 27), (896, 26))

In [5]:
train.head()

Unnamed: 0,ID,출생연도,고객_교육수준,고객_결혼여부,고객_소득,고객_자녀수,고객_청소년수,고객_가입날짜,고객_최신구매일_경과기간,고객_와인_구매금액,...,고객_카탈로그_통한_구매횟수,고객_매장방문_구매횟수,고객_지난달_회사사이트_방문횟수,캠페인1_수락여부,캠페인2_수락여부,캠페인3_수락여부,캠페인4_수락여부,캠페인5_수락여부,불만제기,target
0,train_0,1969,박사,기혼,30396.0,1,0,2014-04-30,22,15,...,1,2,7,0,0,1,0,0,0,1
1,train_1,1967,박사,기혼,36947.0,1,1,2012-08-07,49,88,...,0,4,9,0,0,0,0,0,0,0
2,train_2,1949,석사,사별,47570.0,1,1,2013-05-29,3,67,...,2,2,7,0,0,0,0,0,0,1
3,train_3,1976,석사,기혼,81929.0,1,0,2012-09-29,60,1486,...,4,10,6,0,0,1,0,1,0,1
4,train_4,1982,고졸,기혼,57937.0,0,1,2014-02-16,56,261,...,4,9,3,0,0,0,0,0,0,0


In [6]:
train_ft = train.iloc[:, 1:-1].copy()
test_ft = test.iloc[:, 1:].copy()

train_ft.shape , test_ft.shape

((1344, 25), (896, 25))

# 결측치 처리

In [7]:
mask = train_ft.isnull().sum() > 0
cols = train_ft.isnull().sum()[mask].index
cols

Index(['고객_교육수준', '고객_결혼여부', '고객_소득'], dtype='object')

In [8]:
e_level, m_status  = train_ft["고객_교육수준"].mode()[0], train_ft["고객_결혼여부"].mode()[0]
s_mean = train_ft["고객_소득"].mean()

In [9]:
train_ft["고객_교육수준"] = train_ft["고객_교육수준"].fillna(e_level)
train_ft["고객_결혼여부"] = train_ft["고객_결혼여부"].fillna(m_status)
train_ft["고객_소득"] = train_ft["고객_소득"].fillna(s_mean)

test_ft["고객_교육수준"] = test_ft["고객_교육수준"].fillna(e_level)
test_ft["고객_결혼여부"] = test_ft["고객_결혼여부"].fillna(m_status)
test_ft["고객_소득"] = test_ft["고객_소득"].fillna(s_mean)

In [10]:
train_ft.isnull().sum().sum(), test_ft.isnull().sum().sum()

(0, 0)

In [11]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

In [12]:
train_ft = train_ft.drop(columns=["고객_가입날짜"])
test_ft = test_ft.drop(columns=["고객_가입날짜"])

In [13]:
cols = train_ft.select_dtypes("object").columns
cols

Index(['고객_교육수준', '고객_결혼여부'], dtype='object')

In [14]:
enc = OneHotEncoder(handle_unknown="ignore")
train_ft[enc.get_feature_names_out()] = enc.fit_transform(train_ft[cols]).A

In [15]:
test_ft[enc.get_feature_names_out()] = enc.transform(test_ft[cols]).A

In [16]:
train_ft = train_ft.drop(columns=cols)
test_ft = test_ft.drop(columns=cols)
train_ft.shape, test_ft.shape

((1344, 32), (896, 32))

In [17]:
scaler = MinMaxScaler()
train_ft = scaler.fit_transform(train_ft)
test_ft = scaler.transform(test_ft)

In [18]:
train_ft

array([[0.71875   , 0.04311092, 0.5       , ..., 0.        , 0.        ,
        0.        ],
       [0.69791667, 0.05296299, 0.5       , ..., 0.        , 0.        ,
        0.        ],
       [0.51041667, 0.06893897, 0.5       , ..., 0.        , 1.        ,
        0.        ],
       ...,
       [0.69791667, 0.05069811, 0.5       , ..., 0.        , 0.        ,
        0.        ],
       [0.86458333, 0.05272688, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.84375   , 0.05787174, 0.5       , ..., 0.        , 0.        ,
        0.        ]])

In [19]:
target = train["target"].to_numpy().reshape(-1, 1).astype("float32")
target.shape

(1344, 1)

In [20]:
from datasets import Dataset

In [21]:
train_dict = {"x": train_ft, "label": target}
train_dt = Dataset.from_dict(train_dict)
train_dt

Dataset({
    features: ['x', 'label'],
    num_rows: 1344
})

In [22]:
train_dt.select([0,1,2])

Dataset({
    features: ['x', 'label'],
    num_rows: 3
})

# 모델 클래스

In [23]:
class Net(torch.nn.Module):
    def __init__(self, in_features, hidden_size=16):
        super().__init__()
        self.seq = torch.nn.Sequential(
            torch.nn.Linear(in_features, hidden_size),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(hidden_size, hidden_size // 2),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size // 2, 1),
        )
        self.loss_fn = torch.nn.BCEWithLogitsLoss()

    def forward(self, x, labels=None):
        result = {"logits": self.seq(x)} # 예측값

        if labels is not None:
            result["loss"] = self.loss_fn(result["logits"], labels)

        return result

In [24]:
x = torch.Tensor(train_ft[:2])
x.shape

torch.Size([2, 32])

In [25]:
model = Net(train_ft.shape[1])
model(x, torch.Tensor(target[:2]))

{'logits': tensor([[-0.0416],
         [-0.0211]], grad_fn=<AddmmBackward0>),
 'loss': tensor(0.6984, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)}

# compute_metrics 파라미터에 전달할 콜백함수

In [26]:
from sklearn.metrics import roc_auc_score

def compute_metrics(eval_pred):
    true = eval_pred.label_ids
    pred = eval_pred.predictions
    sig = torch.nn.Sigmoid()
    pred = sig(torch.Tensor(pred)).numpy()
    return {"auc": roc_auc_score(true, pred)}

In [27]:
from transformers import TrainingArguments, Trainer

In [32]:
train_args_params = {
    "output_dir": "../output",
    "per_device_train_batch_size": 32,
    "per_device_eval_batch_size": 32,
    "learning_rate": 0.001,
    "num_train_epochs": 100,
    "lr_scheduler_type": "reduce_lr_on_plateau",
    "eval_strategy": "epoch",
    "logging_strategy": "epoch",
    "save_strategy": "epoch",
    "load_best_model_at_end": True,
    "metric_for_best_model": "auc",
    "greater_is_better": True,
    "report_to": "none",
}

train_args = TrainingArguments(**train_args_params)

In [33]:
from sklearn.model_selection import KFold

cv = KFold(5, shuffle=True, random_state=SEED)

In [34]:
from transformers import EarlyStoppingCallback

es_cb = EarlyStoppingCallback(10)

In [35]:
reset_seeds(SEED)
trainer_list = []

for i, (tri, vai) in enumerate(cv.split(train_ft)):
    x_train = train_dt.select(tri)
    x_valid = train_dt.select(vai)
    model = Net(train_ft.shape[1])
    trainer = Trainer(
        model,
        train_args,
        train_dataset=x_train,
        eval_dataset=x_valid,
        compute_metrics=compute_metrics,
        callbacks=[es_cb],
    )
    trainer.train()
    trainer_list.append(trainer)

  0%|          | 0/3400 [00:00<?, ?it/s]

{'loss': 0.7966, 'grad_norm': 0.48407790064811707, 'learning_rate': 0.001, 'epoch': 1.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.7443380355834961, 'eval_auc': 0.5346508330379298, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.924, 'eval_steps_per_second': 310.347, 'epoch': 1.0}
{'loss': 0.7091, 'grad_norm': 0.5499278903007507, 'learning_rate': 0.001, 'epoch': 2.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.655297040939331, 'eval_auc': 0.49135944700460826, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.696, 'eval_steps_per_second': 310.339, 'epoch': 2.0}
{'loss': 0.5964, 'grad_norm': 0.6038154363632202, 'learning_rate': 0.001, 'epoch': 3.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.5562451481819153, 'eval_auc': 0.4655264090747962, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.715, 'eval_steps_per_second': 290.332, 'epoch': 3.0}
{'loss': 0.472, 'grad_norm': 0.28793027997016907, 'learning_rate': 0.001, 'epoch': 4.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4988579750061035, 'eval_auc': 0.4836937256292095, 'eval_runtime': 0.029, 'eval_samples_per_second': 9274.857, 'eval_steps_per_second': 310.311, 'epoch': 4.0}
{'loss': 0.4168, 'grad_norm': 0.17005252838134766, 'learning_rate': 0.001, 'epoch': 5.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4938058853149414, 'eval_auc': 0.5615030131159163, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.999, 'eval_steps_per_second': 346.182, 'epoch': 5.0}
{'loss': 0.3856, 'grad_norm': 0.19093284010887146, 'learning_rate': 0.001, 'epoch': 6.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.48432791233062744, 'eval_auc': 0.6535802906770648, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.229, 'eval_steps_per_second': 310.357, 'epoch': 6.0}
{'loss': 0.382, 'grad_norm': 0.281551331281662, 'learning_rate': 0.001, 'epoch': 7.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.47270306944847107, 'eval_auc': 0.7215526409074796, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.306, 'eval_steps_per_second': 310.36, 'epoch': 7.0}
{'loss': 0.3702, 'grad_norm': 0.4026000201702118, 'learning_rate': 0.001, 'epoch': 8.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.45761653780937195, 'eval_auc': 0.7647997164126197, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.96, 'eval_steps_per_second': 300.01, 'epoch': 8.0}
{'loss': 0.3499, 'grad_norm': 0.33086517453193665, 'learning_rate': 0.001, 'epoch': 9.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.45371153950691223, 'eval_auc': 0.782435306628855, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.581, 'eval_steps_per_second': 290.328, 'epoch': 9.0}
{'loss': 0.3553, 'grad_norm': 0.3336843252182007, 'learning_rate': 0.001, 'epoch': 10.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4402434825897217, 'eval_auc': 0.7967919177596596, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.924, 'eval_steps_per_second': 310.347, 'epoch': 10.0}
{'loss': 0.3375, 'grad_norm': 0.14876165986061096, 'learning_rate': 0.001, 'epoch': 11.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.42829060554504395, 'eval_auc': 0.8072492024104928, 'eval_runtime': 0.045, 'eval_samples_per_second': 5977.388, 'eval_steps_per_second': 199.987, 'epoch': 11.0}
{'loss': 0.3301, 'grad_norm': 0.37041229009628296, 'learning_rate': 0.001, 'epoch': 12.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.43134641647338867, 'eval_auc': 0.8080467919177596, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.077, 'eval_steps_per_second': 310.352, 'epoch': 12.0}
{'loss': 0.3222, 'grad_norm': 0.4277331829071045, 'learning_rate': 0.001, 'epoch': 13.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4164610803127289, 'eval_auc': 0.8165544133286069, 'eval_runtime': 0.026, 'eval_samples_per_second': 10344.627, 'eval_steps_per_second': 346.103, 'epoch': 13.0}
{'loss': 0.315, 'grad_norm': 0.3666844367980957, 'learning_rate': 0.001, 'epoch': 14.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.414499968290329, 'eval_auc': 0.8182382133995038, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.629, 'eval_steps_per_second': 333.322, 'epoch': 14.0}
{'loss': 0.3143, 'grad_norm': 0.2171667367219925, 'learning_rate': 0.0001, 'epoch': 15.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4133386015892029, 'eval_auc': 0.8185040765685927, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.746, 'eval_steps_per_second': 300.003, 'epoch': 15.0}
{'loss': 0.3122, 'grad_norm': 0.23132532835006714, 'learning_rate': 0.0001, 'epoch': 16.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.41280436515808105, 'eval_auc': 0.8185926976249556, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.314, 'eval_steps_per_second': 310.326, 'epoch': 16.0}
{'loss': 0.3222, 'grad_norm': 0.238907128572464, 'learning_rate': 0.0001, 'epoch': 17.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.41263967752456665, 'eval_auc': 0.8189471818504077, 'eval_runtime': 0.057, 'eval_samples_per_second': 4719.522, 'eval_steps_per_second': 157.902, 'epoch': 17.0}
{'loss': 0.3177, 'grad_norm': 0.2973310947418213, 'learning_rate': 0.0001, 'epoch': 18.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.41168293356895447, 'eval_auc': 0.8190358029067707, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.453, 'eval_steps_per_second': 333.316, 'epoch': 18.0}
{'loss': 0.3136, 'grad_norm': 0.24041499197483063, 'learning_rate': 0.0001, 'epoch': 19.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4107148051261902, 'eval_auc': 0.8194789081885856, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.619, 'eval_steps_per_second': 310.337, 'epoch': 19.0}
{'loss': 0.3114, 'grad_norm': 0.4996499717235565, 'learning_rate': 0.0001, 'epoch': 20.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.41030651330947876, 'eval_auc': 0.8199220134704006, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.162, 'eval_steps_per_second': 310.321, 'epoch': 20.0}
{'loss': 0.3231, 'grad_norm': 0.6758785843849182, 'learning_rate': 0.0001, 'epoch': 21.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.41018301248550415, 'eval_auc': 0.8200106345267635, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.267, 'eval_steps_per_second': 321.433, 'epoch': 21.0}
{'loss': 0.3088, 'grad_norm': 0.178353413939476, 'learning_rate': 0.0001, 'epoch': 22.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40968945622444153, 'eval_auc': 0.8207196029776676, 'eval_runtime': 0.0318, 'eval_samples_per_second': 8449.102, 'eval_steps_per_second': 282.684, 'epoch': 22.0}
{'loss': 0.301, 'grad_norm': 0.21912285685539246, 'learning_rate': 0.0001, 'epoch': 23.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40911340713500977, 'eval_auc': 0.8211627082594825, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.121, 'eval_steps_per_second': 321.394, 'epoch': 23.0}
{'loss': 0.3101, 'grad_norm': 0.29347747564315796, 'learning_rate': 0.0001, 'epoch': 24.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4089583456516266, 'eval_auc': 0.8213399503722085, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.612, 'eval_steps_per_second': 321.411, 'epoch': 24.0}
{'loss': 0.3047, 'grad_norm': 0.19717052578926086, 'learning_rate': 0.0001, 'epoch': 25.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4087071120738983, 'eval_auc': 0.8213399503722084, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.773, 'eval_steps_per_second': 333.36, 'epoch': 25.0}
{'loss': 0.3048, 'grad_norm': 0.3636682629585266, 'learning_rate': 1e-05, 'epoch': 26.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40872085094451904, 'eval_auc': 0.8213399503722084, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.916, 'eval_steps_per_second': 310.38, 'epoch': 26.0}
{'loss': 0.3122, 'grad_norm': 0.37282732129096985, 'learning_rate': 1e-05, 'epoch': 27.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40864452719688416, 'eval_auc': 0.8213399503722084, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.805, 'eval_steps_per_second': 333.328, 'epoch': 27.0}
{'loss': 0.3169, 'grad_norm': 0.3418703079223633, 'learning_rate': 1e-05, 'epoch': 28.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40854412317276, 'eval_auc': 0.8213399503722084, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.285, 'eval_steps_per_second': 321.4, 'epoch': 28.0}
{'loss': 0.3058, 'grad_norm': 0.38661518692970276, 'learning_rate': 1e-05, 'epoch': 29.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4085138142108917, 'eval_auc': 0.8213399503722084, 'eval_runtime': 0.035, 'eval_samples_per_second': 7685.853, 'eval_steps_per_second': 257.147, 'epoch': 29.0}
{'loss': 0.3104, 'grad_norm': 0.5050496459007263, 'learning_rate': 1e-05, 'epoch': 30.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4084683656692505, 'eval_auc': 0.8214285714285714, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.145, 'eval_steps_per_second': 346.154, 'epoch': 30.0}
{'loss': 0.3095, 'grad_norm': 0.22034478187561035, 'learning_rate': 1e-05, 'epoch': 31.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4084058701992035, 'eval_auc': 0.8214285714285714, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.999, 'eval_steps_per_second': 346.182, 'epoch': 31.0}
{'loss': 0.3115, 'grad_norm': 0.29551661014556885, 'learning_rate': 1e-05, 'epoch': 32.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4083503484725952, 'eval_auc': 0.8214285714285714, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.19, 'eval_steps_per_second': 333.307, 'epoch': 32.0}
{'loss': 0.3166, 'grad_norm': 1.1539727449417114, 'learning_rate': 1e-05, 'epoch': 33.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4084305763244629, 'eval_auc': 0.8214285714285714, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.114, 'eval_steps_per_second': 290.312, 'epoch': 33.0}
{'loss': 0.3136, 'grad_norm': 0.17946229875087738, 'learning_rate': 1e-05, 'epoch': 34.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4083050489425659, 'eval_auc': 0.8213399503722084, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.05, 'eval_steps_per_second': 346.15, 'epoch': 34.0}
{'loss': 0.3106, 'grad_norm': 0.21357201039791107, 'learning_rate': 1e-05, 'epoch': 35.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40825891494750977, 'eval_auc': 0.8212513293158454, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.19, 'eval_steps_per_second': 333.307, 'epoch': 35.0}
{'loss': 0.3081, 'grad_norm': 0.20491591095924377, 'learning_rate': 1e-05, 'epoch': 36.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4082389771938324, 'eval_auc': 0.8213399503722085, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.461, 'eval_steps_per_second': 299.993, 'epoch': 36.0}
{'loss': 0.3139, 'grad_norm': 0.388141393661499, 'learning_rate': 1.0000000000000002e-06, 'epoch': 37.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40823930501937866, 'eval_auc': 0.8213399503722085, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.776, 'eval_steps_per_second': 321.416, 'epoch': 37.0}
{'loss': 0.3112, 'grad_norm': 0.21616004407405853, 'learning_rate': 1.0000000000000002e-06, 'epoch': 38.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40823328495025635, 'eval_auc': 0.8213399503722085, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.772, 'eval_steps_per_second': 310.342, 'epoch': 38.0}
{'loss': 0.3184, 'grad_norm': 0.2287006974220276, 'learning_rate': 1.0000000000000002e-06, 'epoch': 39.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4082280397415161, 'eval_auc': 0.8213399503722085, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.512, 'eval_steps_per_second': 321.441, 'epoch': 39.0}
{'loss': 0.3131, 'grad_norm': 0.37016329169273376, 'learning_rate': 1.0000000000000002e-06, 'epoch': 40.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40822693705558777, 'eval_auc': 0.8213399503722085, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.453, 'eval_steps_per_second': 333.316, 'epoch': 40.0}
{'train_runtime': 7.799, 'train_samples_per_second': 13783.894, 'train_steps_per_second': 435.956, 'train_loss': 0.35636348373749677, 'epoch': 40.0}


  0%|          | 0/3400 [00:00<?, ?it/s]

{'loss': 0.5579, 'grad_norm': 0.2147006094455719, 'learning_rate': 0.001, 'epoch': 1.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.5602144598960876, 'eval_auc': 0.5681777298075193, 'eval_runtime': 0.025, 'eval_samples_per_second': 10760.985, 'eval_steps_per_second': 360.033, 'epoch': 1.0}
{'loss': 0.502, 'grad_norm': 0.4346275329589844, 'learning_rate': 0.001, 'epoch': 2.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.5086501240730286, 'eval_auc': 0.6054146429213887, 'eval_runtime': 0.038, 'eval_samples_per_second': 7078.883, 'eval_steps_per_second': 236.84, 'epoch': 2.0}
{'loss': 0.4348, 'grad_norm': 0.39131447672843933, 'learning_rate': 0.001, 'epoch': 3.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4719909727573395, 'eval_auc': 0.6819571865443425, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.805, 'eval_steps_per_second': 333.328, 'epoch': 3.0}
{'loss': 0.389, 'grad_norm': 0.26212596893310547, 'learning_rate': 0.001, 'epoch': 4.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.46687257289886475, 'eval_auc': 0.7534628530311207, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.858, 'eval_steps_per_second': 321.419, 'epoch': 4.0}
{'loss': 0.3804, 'grad_norm': 0.1896594613790512, 'learning_rate': 0.001, 'epoch': 5.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4580327272415161, 'eval_auc': 0.7866522755891348, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.102, 'eval_steps_per_second': 333.305, 'epoch': 5.0}
{'loss': 0.3691, 'grad_norm': 0.12559135258197784, 'learning_rate': 0.001, 'epoch': 6.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.442923903465271, 'eval_auc': 0.796096420219464, 'eval_runtime': 0.028, 'eval_samples_per_second': 9605.958, 'eval_steps_per_second': 321.389, 'epoch': 6.0}
{'loss': 0.3517, 'grad_norm': 0.3402172923088074, 'learning_rate': 0.001, 'epoch': 7.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4378795325756073, 'eval_auc': 0.8029321820471306, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.05, 'eval_steps_per_second': 346.15, 'epoch': 7.0}
{'loss': 0.344, 'grad_norm': 0.21113881468772888, 'learning_rate': 0.001, 'epoch': 8.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.42650681734085083, 'eval_auc': 0.8011332973556394, 'eval_runtime': 0.026, 'eval_samples_per_second': 10345.67, 'eval_steps_per_second': 346.138, 'epoch': 8.0}
{'loss': 0.3339, 'grad_norm': 0.37507566809654236, 'learning_rate': 0.001, 'epoch': 9.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4171946942806244, 'eval_auc': 0.8038316243928764, 'eval_runtime': 0.026, 'eval_samples_per_second': 10345.386, 'eval_steps_per_second': 346.128, 'epoch': 9.0}
{'loss': 0.3295, 'grad_norm': 0.27322664856910706, 'learning_rate': 0.001, 'epoch': 10.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4109800159931183, 'eval_auc': 0.801852851232236, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.19, 'eval_steps_per_second': 333.307, 'epoch': 10.0}
{'loss': 0.3195, 'grad_norm': 0.3512864112854004, 'learning_rate': 0.001, 'epoch': 11.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4106331169605255, 'eval_auc': 0.8053606763806439, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.581, 'eval_steps_per_second': 290.328, 'epoch': 11.0}
{'loss': 0.3232, 'grad_norm': 0.19924834370613098, 'learning_rate': 0.001, 'epoch': 12.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4023212492465973, 'eval_auc': 0.806799784133837, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.921, 'eval_steps_per_second': 321.455, 'epoch': 12.0}
{'loss': 0.3142, 'grad_norm': 0.16982193291187286, 'learning_rate': 0.0001, 'epoch': 13.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4017624855041504, 'eval_auc': 0.8069796726029861, 'eval_runtime': 0.039, 'eval_samples_per_second': 6897.303, 'eval_steps_per_second': 230.765, 'epoch': 13.0}
{'loss': 0.3149, 'grad_norm': 0.27732911705970764, 'learning_rate': 0.0001, 'epoch': 14.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4019775390625, 'eval_auc': 0.8071595610721353, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.53, 'eval_steps_per_second': 321.408, 'epoch': 14.0}
{'loss': 0.3111, 'grad_norm': 0.20207875967025757, 'learning_rate': 0.0001, 'epoch': 15.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40164634585380554, 'eval_auc': 0.8071595610721353, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.334, 'eval_steps_per_second': 346.16, 'epoch': 15.0}
{'loss': 0.3159, 'grad_norm': 0.2682812213897705, 'learning_rate': 0.0001, 'epoch': 16.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4015727639198303, 'eval_auc': 0.8077891707141572, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.449, 'eval_steps_per_second': 321.405, 'epoch': 16.0}
{'loss': 0.3096, 'grad_norm': 0.22218216955661774, 'learning_rate': 0.0001, 'epoch': 17.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4012059271335602, 'eval_auc': 0.8083288361216048, 'eval_runtime': 0.031, 'eval_samples_per_second': 8676.914, 'eval_steps_per_second': 290.306, 'epoch': 17.0}
{'loss': 0.3089, 'grad_norm': 0.19955873489379883, 'learning_rate': 0.0001, 'epoch': 18.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40097710490226746, 'eval_auc': 0.808868501529052, 'eval_runtime': 0.026, 'eval_samples_per_second': 10344.627, 'eval_steps_per_second': 346.103, 'epoch': 18.0}
{'loss': 0.3154, 'grad_norm': 0.1941005438566208, 'learning_rate': 0.0001, 'epoch': 19.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4006139636039734, 'eval_auc': 0.8089584457636265, 'eval_runtime': 0.026, 'eval_samples_per_second': 10345.291, 'eval_steps_per_second': 346.125, 'epoch': 19.0}
{'loss': 0.31, 'grad_norm': 0.24860087037086487, 'learning_rate': 0.0001, 'epoch': 20.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4000551700592041, 'eval_auc': 0.808868501529052, 'eval_runtime': 0.025, 'eval_samples_per_second': 10759.138, 'eval_steps_per_second': 359.971, 'epoch': 20.0}
{'loss': 0.3086, 'grad_norm': 0.17947182059288025, 'learning_rate': 0.0001, 'epoch': 21.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.40009990334510803, 'eval_auc': 0.8094081669364994, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.245, 'eval_steps_per_second': 333.343, 'epoch': 21.0}
{'loss': 0.3114, 'grad_norm': 0.22896628081798553, 'learning_rate': 0.0001, 'epoch': 22.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39984744787216187, 'eval_auc': 0.8092282784673503, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.103, 'eval_steps_per_second': 321.427, 'epoch': 22.0}
{'loss': 0.3151, 'grad_norm': 0.35334518551826477, 'learning_rate': 0.0001, 'epoch': 23.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39935460686683655, 'eval_auc': 0.8095880554056485, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.96, 'eval_steps_per_second': 300.01, 'epoch': 23.0}
{'loss': 0.3095, 'grad_norm': 0.18589147925376892, 'learning_rate': 1e-05, 'epoch': 24.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3993333578109741, 'eval_auc': 0.809677999640223, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.24, 'eval_steps_per_second': 346.157, 'epoch': 24.0}
{'loss': 0.3092, 'grad_norm': 0.21480371057987213, 'learning_rate': 1e-05, 'epoch': 25.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39928242564201355, 'eval_auc': 0.8097679438747976, 'eval_runtime': 0.027, 'eval_samples_per_second': 9961.75, 'eval_steps_per_second': 333.293, 'epoch': 25.0}
{'loss': 0.3103, 'grad_norm': 0.20534084737300873, 'learning_rate': 1e-05, 'epoch': 26.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39925989508628845, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.027, 'eval_samples_per_second': 9961.574, 'eval_steps_per_second': 333.287, 'epoch': 26.0}
{'loss': 0.309, 'grad_norm': 0.19830818474292755, 'learning_rate': 1e-05, 'epoch': 27.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3992778956890106, 'eval_auc': 0.8097679438747976, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.805, 'eval_steps_per_second': 333.328, 'epoch': 27.0}
{'loss': 0.3088, 'grad_norm': 0.2758598327636719, 'learning_rate': 1e-05, 'epoch': 28.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39923322200775146, 'eval_auc': 0.8097679438747976, 'eval_runtime': 0.025, 'eval_samples_per_second': 10760.472, 'eval_steps_per_second': 360.016, 'epoch': 28.0}
{'loss': 0.3119, 'grad_norm': 0.3850826323032379, 'learning_rate': 1e-05, 'epoch': 29.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39921727776527405, 'eval_auc': 0.8097679438747976, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.84, 'eval_steps_per_second': 310.378, 'epoch': 29.0}
{'loss': 0.309, 'grad_norm': 0.3939700126647949, 'learning_rate': 1e-05, 'epoch': 30.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3991384506225586, 'eval_auc': 0.8097679438747976, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.157, 'eval_steps_per_second': 333.34, 'epoch': 30.0}
{'loss': 0.3164, 'grad_norm': 0.38220471143722534, 'learning_rate': 1e-05, 'epoch': 31.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.399127334356308, 'eval_auc': 0.8098578881093722, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.612, 'eval_steps_per_second': 321.411, 'epoch': 31.0}
{'loss': 0.312, 'grad_norm': 0.42100784182548523, 'learning_rate': 1e-05, 'epoch': 32.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990853428840637, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.448, 'eval_steps_per_second': 290.324, 'epoch': 32.0}
{'loss': 0.3113, 'grad_norm': 0.5128201842308044, 'learning_rate': 1e-05, 'epoch': 33.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990480303764343, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.05, 'eval_steps_per_second': 346.15, 'epoch': 33.0}
{'loss': 0.3155, 'grad_norm': 0.25249338150024414, 'learning_rate': 1e-05, 'epoch': 34.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990202248096466, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.776, 'eval_steps_per_second': 321.416, 'epoch': 34.0}
{'loss': 0.3088, 'grad_norm': 0.18581248819828033, 'learning_rate': 1.0000000000000002e-06, 'epoch': 35.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39901626110076904, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.24, 'eval_steps_per_second': 346.157, 'epoch': 35.0}
{'loss': 0.3115, 'grad_norm': 0.2539944648742676, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990117311477661, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.026, 'eval_samples_per_second': 10345.765, 'eval_steps_per_second': 346.141, 'epoch': 36.0}
{'loss': 0.3066, 'grad_norm': 0.3311654329299927, 'learning_rate': 1.0000000000000002e-06, 'epoch': 37.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.399007111787796, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.453, 'eval_steps_per_second': 333.316, 'epoch': 37.0}
{'loss': 0.304, 'grad_norm': 0.16892357170581818, 'learning_rate': 1.0000000000000002e-06, 'epoch': 38.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990074694156647, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.026, 'eval_samples_per_second': 10345.576, 'eval_steps_per_second': 346.134, 'epoch': 38.0}
{'loss': 0.31, 'grad_norm': 0.18083573877811432, 'learning_rate': 1.0000000000000002e-06, 'epoch': 39.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990045487880707, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.245, 'eval_steps_per_second': 333.343, 'epoch': 39.0}
{'loss': 0.3157, 'grad_norm': 0.33120763301849365, 'learning_rate': 1.0000000000000002e-06, 'epoch': 40.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39900273084640503, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.461, 'eval_steps_per_second': 299.993, 'epoch': 40.0}
{'loss': 0.3085, 'grad_norm': 0.13396255671977997, 'learning_rate': 1.0000000000000002e-06, 'epoch': 41.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3990015685558319, 'eval_auc': 0.8097679438747977, 'eval_runtime': 0.029, 'eval_samples_per_second': 9274.857, 'eval_steps_per_second': 310.311, 'epoch': 41.0}
{'train_runtime': 7.889, 'train_samples_per_second': 13626.548, 'train_steps_per_second': 430.979, 'train_loss': 0.3331194826999046, 'epoch': 41.0}


  0%|          | 0/3400 [00:00<?, ?it/s]

{'loss': 0.5743, 'grad_norm': 0.07757870852947235, 'learning_rate': 0.001, 'epoch': 1.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.5317915081977844, 'eval_auc': 0.5964473056747734, 'eval_runtime': 0.03, 'eval_samples_per_second': 8965.963, 'eval_steps_per_second': 299.976, 'epoch': 1.0}
{'loss': 0.5058, 'grad_norm': 0.20444805920124054, 'learning_rate': 0.001, 'epoch': 2.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4503386616706848, 'eval_auc': 0.615999046256557, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.157, 'eval_steps_per_second': 333.34, 'epoch': 2.0}
{'loss': 0.4461, 'grad_norm': 0.2722700536251068, 'learning_rate': 0.001, 'epoch': 3.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.39580366015434265, 'eval_auc': 0.6578445398187888, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.889, 'eval_steps_per_second': 300.007, 'epoch': 3.0}
{'loss': 0.4207, 'grad_norm': 0.27190008759498596, 'learning_rate': 0.001, 'epoch': 4.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3744781017303467, 'eval_auc': 0.721864568431092, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.939, 'eval_steps_per_second': 321.422, 'epoch': 4.0}
{'loss': 0.3997, 'grad_norm': 0.35146158933639526, 'learning_rate': 0.001, 'epoch': 5.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3619261682033539, 'eval_auc': 0.7600143061516452, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.086, 'eval_steps_per_second': 310.319, 'epoch': 5.0}
{'loss': 0.3855, 'grad_norm': 0.2285628467798233, 'learning_rate': 0.001, 'epoch': 6.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.35127705335617065, 'eval_auc': 0.7805197901764425, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.365, 'eval_steps_per_second': 333.313, 'epoch': 6.0}
{'loss': 0.3724, 'grad_norm': 0.2544596195220947, 'learning_rate': 0.001, 'epoch': 7.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3400638997554779, 'eval_auc': 0.7929184549356224, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.185, 'eval_steps_per_second': 321.43, 'epoch': 7.0}
{'loss': 0.3674, 'grad_norm': 0.21590176224708557, 'learning_rate': 0.001, 'epoch': 8.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.32891854643821716, 'eval_auc': 0.8079399141630902, 'eval_runtime': 0.0282, 'eval_samples_per_second': 9527.438, 'eval_steps_per_second': 318.762, 'epoch': 8.0}
{'loss': 0.3572, 'grad_norm': 0.22488732635974884, 'learning_rate': 0.001, 'epoch': 9.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31815603375434875, 'eval_auc': 0.8214115402956604, 'eval_runtime': 0.027, 'eval_samples_per_second': 9961.662, 'eval_steps_per_second': 333.29, 'epoch': 9.0}
{'loss': 0.3456, 'grad_norm': 0.2301149219274521, 'learning_rate': 0.001, 'epoch': 10.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.30996671319007874, 'eval_auc': 0.8254649499284693, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.448, 'eval_steps_per_second': 290.324, 'epoch': 10.0}
{'loss': 0.3443, 'grad_norm': 0.25102168321609497, 'learning_rate': 0.001, 'epoch': 11.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.30289700627326965, 'eval_auc': 0.8346447305674773, 'eval_runtime': 0.025, 'eval_samples_per_second': 10759.959, 'eval_steps_per_second': 359.999, 'epoch': 11.0}
{'loss': 0.3236, 'grad_norm': 0.26803770661354065, 'learning_rate': 0.001, 'epoch': 12.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29501020908355713, 'eval_auc': 0.8384597043395327, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.245, 'eval_steps_per_second': 333.343, 'epoch': 12.0}
{'loss': 0.3233, 'grad_norm': 0.2785767614841461, 'learning_rate': 0.0001, 'epoch': 13.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2943804860115051, 'eval_auc': 0.8384597043395327, 'eval_runtime': 0.025, 'eval_samples_per_second': 10761.396, 'eval_steps_per_second': 360.047, 'epoch': 13.0}
{'loss': 0.3212, 'grad_norm': 0.2890479564666748, 'learning_rate': 0.0001, 'epoch': 14.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29381194710731506, 'eval_auc': 0.8392942298521697, 'eval_runtime': 0.041, 'eval_samples_per_second': 6561.222, 'eval_steps_per_second': 219.52, 'epoch': 14.0}
{'loss': 0.3275, 'grad_norm': 0.26530691981315613, 'learning_rate': 0.0001, 'epoch': 15.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29340821504592896, 'eval_auc': 0.8392942298521697, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.805, 'eval_steps_per_second': 333.328, 'epoch': 15.0}
{'loss': 0.3254, 'grad_norm': 0.3439389169216156, 'learning_rate': 0.0001, 'epoch': 16.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2930430769920349, 'eval_auc': 0.8401287553648069, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.077, 'eval_steps_per_second': 310.352, 'epoch': 16.0}
{'loss': 0.3252, 'grad_norm': 0.38646990060806274, 'learning_rate': 0.0001, 'epoch': 17.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29285722970962524, 'eval_auc': 0.840486409155937, 'eval_runtime': 0.026, 'eval_samples_per_second': 10345.67, 'eval_steps_per_second': 346.138, 'epoch': 17.0}
{'loss': 0.33, 'grad_norm': 0.9767679572105408, 'learning_rate': 0.0001, 'epoch': 18.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29244089126586914, 'eval_auc': 0.8403671912255604, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.333, 'eval_steps_per_second': 333.346, 'epoch': 18.0}
{'loss': 0.3199, 'grad_norm': 0.28472983837127686, 'learning_rate': 0.0001, 'epoch': 19.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2921961843967438, 'eval_auc': 0.8408440629470673, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.776, 'eval_steps_per_second': 321.416, 'epoch': 19.0}
{'loss': 0.3251, 'grad_norm': 0.30794671177864075, 'learning_rate': 0.0001, 'epoch': 20.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2919117510318756, 'eval_auc': 0.8407248450166905, 'eval_runtime': 0.031, 'eval_samples_per_second': 8678.449, 'eval_steps_per_second': 290.357, 'epoch': 20.0}
{'loss': 0.3294, 'grad_norm': 0.42205923795700073, 'learning_rate': 0.0001, 'epoch': 21.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29173964262008667, 'eval_auc': 0.8416785884597044, 'eval_runtime': 0.025, 'eval_samples_per_second': 10760.472, 'eval_steps_per_second': 360.016, 'epoch': 21.0}
{'loss': 0.3238, 'grad_norm': 0.27276259660720825, 'learning_rate': 0.0001, 'epoch': 22.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2913646996021271, 'eval_auc': 0.8421554601812112, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.694, 'eval_steps_per_second': 321.414, 'epoch': 22.0}
{'loss': 0.3213, 'grad_norm': 0.45504146814346313, 'learning_rate': 0.0001, 'epoch': 23.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29095473885536194, 'eval_auc': 0.8421554601812112, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.717, 'eval_steps_per_second': 333.325, 'epoch': 23.0}
{'loss': 0.3208, 'grad_norm': 0.34583181142807007, 'learning_rate': 1e-05, 'epoch': 24.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2909161448478699, 'eval_auc': 0.8421554601812112, 'eval_runtime': 0.025, 'eval_samples_per_second': 10760.985, 'eval_steps_per_second': 360.033, 'epoch': 24.0}
{'loss': 0.3189, 'grad_norm': 0.40577349066734314, 'learning_rate': 1e-05, 'epoch': 25.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2908831834793091, 'eval_auc': 0.8421554601812112, 'eval_runtime': 0.028, 'eval_samples_per_second': 9608.248, 'eval_steps_per_second': 321.466, 'epoch': 25.0}
{'loss': 0.3246, 'grad_norm': 0.29304632544517517, 'learning_rate': 1e-05, 'epoch': 26.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2908420264720917, 'eval_auc': 0.8420362422508346, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.453, 'eval_steps_per_second': 333.316, 'epoch': 26.0}
{'loss': 0.3143, 'grad_norm': 0.703245222568512, 'learning_rate': 1e-05, 'epoch': 27.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29078882932662964, 'eval_auc': 0.8420362422508346, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.39, 'eval_steps_per_second': 299.991, 'epoch': 27.0}
{'loss': 0.314, 'grad_norm': 0.36058130860328674, 'learning_rate': 1e-05, 'epoch': 28.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29075342416763306, 'eval_auc': 0.8420362422508346, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.981, 'eval_steps_per_second': 333.334, 'epoch': 28.0}
{'loss': 0.3185, 'grad_norm': 0.2869873344898224, 'learning_rate': 1e-05, 'epoch': 29.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2907239496707916, 'eval_auc': 0.8420362422508346, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.453, 'eval_steps_per_second': 333.316, 'epoch': 29.0}
{'loss': 0.3253, 'grad_norm': 0.342040091753006, 'learning_rate': 1e-05, 'epoch': 30.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2907012104988098, 'eval_auc': 0.8420362422508346, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.24, 'eval_steps_per_second': 346.157, 'epoch': 30.0}
{'loss': 0.3178, 'grad_norm': 0.26181933283805847, 'learning_rate': 1e-05, 'epoch': 31.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29066771268844604, 'eval_auc': 0.8421554601812113, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.247, 'eval_steps_per_second': 290.317, 'epoch': 31.0}
{'loss': 0.3182, 'grad_norm': 0.26158884167671204, 'learning_rate': 1e-05, 'epoch': 32.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29064157605171204, 'eval_auc': 0.8421554601812113, 'eval_runtime': 0.027, 'eval_samples_per_second': 9961.926, 'eval_steps_per_second': 333.299, 'epoch': 32.0}
{'loss': 0.327, 'grad_norm': 0.34857383370399475, 'learning_rate': 1e-05, 'epoch': 33.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2906181812286377, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.19, 'eval_steps_per_second': 333.307, 'epoch': 33.0}
{'loss': 0.32, 'grad_norm': 0.25624406337738037, 'learning_rate': 1e-05, 'epoch': 34.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.290592223405838, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.619, 'eval_steps_per_second': 310.337, 'epoch': 34.0}
{'loss': 0.3195, 'grad_norm': 0.3287242650985718, 'learning_rate': 1.0000000000000002e-06, 'epoch': 35.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29059073328971863, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.449, 'eval_steps_per_second': 321.405, 'epoch': 35.0}
{'loss': 0.319, 'grad_norm': 0.5124474763870239, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2905861437320709, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.038, 'eval_samples_per_second': 7078.883, 'eval_steps_per_second': 236.84, 'epoch': 36.0}
{'loss': 0.3187, 'grad_norm': 0.4080304503440857, 'learning_rate': 1.0000000000000002e-06, 'epoch': 37.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.290583074092865, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.229, 'eval_steps_per_second': 310.357, 'epoch': 37.0}
{'loss': 0.3168, 'grad_norm': 0.2519524097442627, 'learning_rate': 1.0000000000000002e-06, 'epoch': 38.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2905809283256531, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.027, 'eval_samples_per_second': 9961.574, 'eval_steps_per_second': 333.287, 'epoch': 38.0}
{'loss': 0.3164, 'grad_norm': 0.31445494294166565, 'learning_rate': 1.0000000000000002e-06, 'epoch': 39.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29057765007019043, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.267, 'eval_steps_per_second': 321.433, 'epoch': 39.0}
{'loss': 0.3241, 'grad_norm': 0.38108792901039124, 'learning_rate': 1.0000000000000002e-06, 'epoch': 40.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29057419300079346, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.543, 'eval_steps_per_second': 310.334, 'epoch': 40.0}
{'loss': 0.3171, 'grad_norm': 0.4707827866077423, 'learning_rate': 1.0000000000000002e-06, 'epoch': 41.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2905712425708771, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.185, 'eval_steps_per_second': 321.43, 'epoch': 41.0}
{'loss': 0.3244, 'grad_norm': 0.36207491159439087, 'learning_rate': 1.0000000000000002e-06, 'epoch': 42.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2905680537223816, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.314, 'eval_steps_per_second': 290.319, 'epoch': 42.0}
{'loss': 0.3217, 'grad_norm': 0.2636401653289795, 'learning_rate': 1.0000000000000002e-06, 'epoch': 43.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29056596755981445, 'eval_auc': 0.842274678111588, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.594, 'eval_steps_per_second': 321.444, 'epoch': 43.0}
{'train_runtime': 8.3808, 'train_samples_per_second': 12826.932, 'train_steps_per_second': 405.689, 'train_loss': 0.344458820718747, 'epoch': 43.0}


  0%|          | 0/3400 [00:00<?, ?it/s]

{'loss': 0.6007, 'grad_norm': 0.21411705017089844, 'learning_rate': 0.001, 'epoch': 1.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.5442813038825989, 'eval_auc': 0.4365079365079365, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.278, 'eval_steps_per_second': 333.31, 'epoch': 1.0}
{'loss': 0.5088, 'grad_norm': 0.34969303011894226, 'learning_rate': 0.001, 'epoch': 2.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.43698710203170776, 'eval_auc': 0.5534798534798535, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.247, 'eval_steps_per_second': 290.317, 'epoch': 2.0}
{'loss': 0.4385, 'grad_norm': 0.34250181913375854, 'learning_rate': 0.001, 'epoch': 3.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3805520236492157, 'eval_auc': 0.6921855921855922, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.597, 'eval_steps_per_second': 333.355, 'epoch': 3.0}
{'loss': 0.4045, 'grad_norm': 0.18349561095237732, 'learning_rate': 0.001, 'epoch': 4.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3645758330821991, 'eval_auc': 0.742979242979243, 'eval_runtime': 0.031, 'eval_samples_per_second': 8676.714, 'eval_steps_per_second': 290.299, 'epoch': 4.0}
{'loss': 0.3955, 'grad_norm': 0.2970419228076935, 'learning_rate': 0.001, 'epoch': 5.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3533629775047302, 'eval_auc': 0.7540903540903541, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.763, 'eval_steps_per_second': 310.375, 'epoch': 5.0}
{'loss': 0.3699, 'grad_norm': 0.24552224576473236, 'learning_rate': 0.001, 'epoch': 6.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3444630205631256, 'eval_auc': 0.7578754578754578, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.034, 'eval_steps_per_second': 299.979, 'epoch': 6.0}
{'loss': 0.3618, 'grad_norm': 0.3838513493537903, 'learning_rate': 0.001, 'epoch': 7.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3369000256061554, 'eval_auc': 0.763980463980464, 'eval_runtime': 0.032, 'eval_samples_per_second': 8406.17, 'eval_steps_per_second': 281.247, 'epoch': 7.0}
{'loss': 0.3547, 'grad_norm': 0.41746804118156433, 'learning_rate': 0.001, 'epoch': 8.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3310006856918335, 'eval_auc': 0.7716727716727716, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.939, 'eval_steps_per_second': 321.422, 'epoch': 8.0}
{'loss': 0.3357, 'grad_norm': 0.26023125648498535, 'learning_rate': 0.001, 'epoch': 9.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.32553887367248535, 'eval_auc': 0.7818070818070818, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.694, 'eval_steps_per_second': 321.414, 'epoch': 9.0}
{'loss': 0.3325, 'grad_norm': 0.42028331756591797, 'learning_rate': 0.001, 'epoch': 10.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3221527934074402, 'eval_auc': 0.7875457875457876, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.162, 'eval_steps_per_second': 310.321, 'epoch': 10.0}
{'loss': 0.3367, 'grad_norm': 0.2843736708164215, 'learning_rate': 0.001, 'epoch': 11.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31964555382728577, 'eval_auc': 0.7925518925518925, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.509, 'eval_steps_per_second': 333.352, 'epoch': 11.0}
{'loss': 0.3262, 'grad_norm': 0.3426964282989502, 'learning_rate': 0.001, 'epoch': 12.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31577280163764954, 'eval_auc': 0.7970695970695971, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.893, 'eval_steps_per_second': 333.331, 'epoch': 12.0}
{'loss': 0.3182, 'grad_norm': 0.48290619254112244, 'learning_rate': 0.0001, 'epoch': 13.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31537699699401855, 'eval_auc': 0.798046398046398, 'eval_runtime': 0.028, 'eval_samples_per_second': 9607.267, 'eval_steps_per_second': 321.433, 'epoch': 13.0}
{'loss': 0.3169, 'grad_norm': 0.2748687267303467, 'learning_rate': 0.0001, 'epoch': 14.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31507018208503723, 'eval_auc': 0.7973137973137974, 'eval_runtime': 0.029, 'eval_samples_per_second': 9276.382, 'eval_steps_per_second': 310.362, 'epoch': 14.0}
{'loss': 0.3196, 'grad_norm': 0.39880460500717163, 'learning_rate': 0.0001, 'epoch': 15.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.314668208360672, 'eval_auc': 0.7980463980463981, 'eval_runtime': 0.031, 'eval_samples_per_second': 8676.847, 'eval_steps_per_second': 290.303, 'epoch': 15.0}
{'loss': 0.3277, 'grad_norm': 0.505814254283905, 'learning_rate': 0.0001, 'epoch': 16.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.314256876707077, 'eval_auc': 0.7987789987789988, 'eval_runtime': 0.033, 'eval_samples_per_second': 8151.223, 'eval_steps_per_second': 272.717, 'epoch': 16.0}
{'loss': 0.328, 'grad_norm': 0.39419519901275635, 'learning_rate': 0.0001, 'epoch': 17.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31407663226127625, 'eval_auc': 0.7987789987789988, 'eval_runtime': 0.032, 'eval_samples_per_second': 8406.609, 'eval_steps_per_second': 281.262, 'epoch': 17.0}
{'loss': 0.3219, 'grad_norm': 0.6815794110298157, 'learning_rate': 0.0001, 'epoch': 18.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3137947618961334, 'eval_auc': 0.7990231990231991, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.247, 'eval_steps_per_second': 290.317, 'epoch': 18.0}
{'loss': 0.3272, 'grad_norm': 0.3147534728050232, 'learning_rate': 0.0001, 'epoch': 19.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3136213719844818, 'eval_auc': 0.7992673992673993, 'eval_runtime': 0.034, 'eval_samples_per_second': 7904.578, 'eval_steps_per_second': 264.465, 'epoch': 19.0}
{'loss': 0.3209, 'grad_norm': 0.3456777036190033, 'learning_rate': 0.0001, 'epoch': 20.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3134456276893616, 'eval_auc': 0.7993894993894993, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.285, 'eval_steps_per_second': 321.4, 'epoch': 20.0}
{'loss': 0.3193, 'grad_norm': 0.3592134714126587, 'learning_rate': 0.0001, 'epoch': 21.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3132040202617645, 'eval_auc': 0.8002442002442003, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.453, 'eval_steps_per_second': 333.316, 'epoch': 21.0}
{'loss': 0.3285, 'grad_norm': 0.4152067005634308, 'learning_rate': 0.0001, 'epoch': 22.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3128615617752075, 'eval_auc': 0.8007326007326008, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.086, 'eval_steps_per_second': 310.319, 'epoch': 22.0}
{'loss': 0.3209, 'grad_norm': 0.48039674758911133, 'learning_rate': 0.0001, 'epoch': 23.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3126329481601715, 'eval_auc': 0.8003663003663004, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.102, 'eval_steps_per_second': 333.305, 'epoch': 23.0}
{'loss': 0.3154, 'grad_norm': 0.2654661536216736, 'learning_rate': 1e-05, 'epoch': 24.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31260475516319275, 'eval_auc': 0.8003663003663004, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.121, 'eval_steps_per_second': 321.394, 'epoch': 24.0}
{'loss': 0.3232, 'grad_norm': 0.2761572003364563, 'learning_rate': 1e-05, 'epoch': 25.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31258389353752136, 'eval_auc': 0.8004884004884005, 'eval_runtime': 0.031, 'eval_samples_per_second': 8677.114, 'eval_steps_per_second': 290.312, 'epoch': 25.0}
{'loss': 0.3198, 'grad_norm': 0.40500667691230774, 'learning_rate': 1e-05, 'epoch': 26.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3125544786453247, 'eval_auc': 0.8006105006105007, 'eval_runtime': 0.027, 'eval_samples_per_second': 9962.278, 'eval_steps_per_second': 333.31, 'epoch': 26.0}
{'loss': 0.3261, 'grad_norm': 0.628402054309845, 'learning_rate': 1e-05, 'epoch': 27.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31253427267074585, 'eval_auc': 0.8006105006105007, 'eval_runtime': 0.03, 'eval_samples_per_second': 8965.963, 'eval_steps_per_second': 299.976, 'epoch': 27.0}
{'loss': 0.317, 'grad_norm': 0.3797178268432617, 'learning_rate': 1e-05, 'epoch': 28.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31250685453414917, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.029, 'eval_samples_per_second': 9275.696, 'eval_steps_per_second': 310.339, 'epoch': 28.0}
{'loss': 0.3193, 'grad_norm': 0.24055086076259613, 'learning_rate': 1e-05, 'epoch': 29.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31248706579208374, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.121, 'eval_steps_per_second': 321.394, 'epoch': 29.0}
{'loss': 0.3245, 'grad_norm': 0.3753252625465393, 'learning_rate': 1e-05, 'epoch': 30.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3124677240848541, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.029, 'eval_samples_per_second': 9274.781, 'eval_steps_per_second': 310.309, 'epoch': 30.0}
{'loss': 0.321, 'grad_norm': 0.34531667828559875, 'learning_rate': 1e-05, 'epoch': 31.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31243929266929626, 'eval_auc': 0.800976800976801, 'eval_runtime': 0.027, 'eval_samples_per_second': 9963.333, 'eval_steps_per_second': 333.346, 'epoch': 31.0}
{'loss': 0.3232, 'grad_norm': 0.30616071820259094, 'learning_rate': 1e-05, 'epoch': 32.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3124230206012726, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.0288, 'eval_samples_per_second': 9331.622, 'eval_steps_per_second': 312.21, 'epoch': 32.0}
{'loss': 0.3229, 'grad_norm': 0.28089672327041626, 'learning_rate': 1e-05, 'epoch': 33.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31241437792778015, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.03, 'eval_samples_per_second': 8965.891, 'eval_steps_per_second': 299.974, 'epoch': 33.0}
{'loss': 0.323, 'grad_norm': 0.47080954909324646, 'learning_rate': 1e-05, 'epoch': 34.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3123864233493805, 'eval_auc': 0.8007326007326008, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.53, 'eval_steps_per_second': 321.408, 'epoch': 34.0}
{'loss': 0.3274, 'grad_norm': 0.6396613717079163, 'learning_rate': 1.0000000000000002e-06, 'epoch': 35.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3123859167098999, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.028, 'eval_samples_per_second': 9606.858, 'eval_steps_per_second': 321.419, 'epoch': 35.0}
{'loss': 0.3167, 'grad_norm': 0.2689070999622345, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31238245964050293, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.034, 'eval_samples_per_second': 7911.95, 'eval_steps_per_second': 264.712, 'epoch': 36.0}
{'loss': 0.319, 'grad_norm': 0.30350223183631897, 'learning_rate': 1.0000000000000002e-06, 'epoch': 37.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3123804032802582, 'eval_auc': 0.8008547008547009, 'eval_runtime': 0.026, 'eval_samples_per_second': 10344.722, 'eval_steps_per_second': 346.106, 'epoch': 37.0}
{'loss': 0.3139, 'grad_norm': 0.2918669283390045, 'learning_rate': 1.0000000000000002e-06, 'epoch': 38.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3123769164085388, 'eval_auc': 0.8007326007326008, 'eval_runtime': 0.026, 'eval_samples_per_second': 10346.24, 'eval_steps_per_second': 346.157, 'epoch': 38.0}
{'loss': 0.3199, 'grad_norm': 0.229884073138237, 'learning_rate': 1.0000000000000002e-06, 'epoch': 39.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3123750686645508, 'eval_auc': 0.8007326007326008, 'eval_runtime': 0.03, 'eval_samples_per_second': 8965.321, 'eval_steps_per_second': 299.955, 'epoch': 39.0}
{'loss': 0.3223, 'grad_norm': 0.2626439034938812, 'learning_rate': 1.0000000000000002e-06, 'epoch': 40.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3123721182346344, 'eval_auc': 0.8007326007326008, 'eval_runtime': 0.03, 'eval_samples_per_second': 8966.818, 'eval_steps_per_second': 300.005, 'epoch': 40.0}
{'loss': 0.3192, 'grad_norm': 0.40234845876693726, 'learning_rate': 1.0000000000000002e-06, 'epoch': 41.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31237003207206726, 'eval_auc': 0.8007326007326008, 'eval_runtime': 0.025, 'eval_samples_per_second': 10760.164, 'eval_steps_per_second': 360.005, 'epoch': 41.0}
{'train_runtime': 8.0446, 'train_samples_per_second': 13362.943, 'train_steps_per_second': 422.642, 'train_loss': 0.343622163173287, 'epoch': 41.0}


  0%|          | 0/3400 [00:00<?, ?it/s]

{'loss': 0.5778, 'grad_norm': 0.11649471521377563, 'learning_rate': 0.001, 'epoch': 1.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.5120256543159485, 'eval_auc': 0.6993006993006993, 'eval_runtime': 0.027, 'eval_samples_per_second': 9924.805, 'eval_steps_per_second': 333.296, 'epoch': 1.0}
{'loss': 0.5117, 'grad_norm': 0.5667338371276855, 'learning_rate': 0.001, 'epoch': 2.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.4144202768802643, 'eval_auc': 0.7091544818817546, 'eval_runtime': 0.027, 'eval_samples_per_second': 9926.382, 'eval_steps_per_second': 333.349, 'epoch': 2.0}
{'loss': 0.4542, 'grad_norm': 0.2648465633392334, 'learning_rate': 0.001, 'epoch': 3.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.3401172161102295, 'eval_auc': 0.7347425301970756, 'eval_runtime': 0.039, 'eval_samples_per_second': 6871.663, 'eval_steps_per_second': 230.765, 'epoch': 3.0}
{'loss': 0.4324, 'grad_norm': 0.20265477895736694, 'learning_rate': 0.001, 'epoch': 4.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.31592878699302673, 'eval_auc': 0.7774952320406866, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.068, 'eval_steps_per_second': 333.305, 'epoch': 4.0}
{'loss': 0.4102, 'grad_norm': 0.19445228576660156, 'learning_rate': 0.001, 'epoch': 5.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.29702889919281006, 'eval_auc': 0.8219961856325493, 'eval_runtime': 0.028, 'eval_samples_per_second': 9570.085, 'eval_steps_per_second': 321.383, 'epoch': 5.0}
{'loss': 0.3954, 'grad_norm': 0.3072047233581543, 'learning_rate': 0.001, 'epoch': 6.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.28233200311660767, 'eval_auc': 0.8418626827717737, 'eval_runtime': 0.026, 'eval_samples_per_second': 10307.778, 'eval_steps_per_second': 346.157, 'epoch': 6.0}
{'loss': 0.3829, 'grad_norm': 0.22941668331623077, 'learning_rate': 0.001, 'epoch': 7.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.27069953083992004, 'eval_auc': 0.847743165924984, 'eval_runtime': 0.035, 'eval_samples_per_second': 7657.437, 'eval_steps_per_second': 257.153, 'epoch': 7.0}
{'loss': 0.374, 'grad_norm': 0.27769744396209717, 'learning_rate': 0.001, 'epoch': 8.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.26548516750335693, 'eval_auc': 0.8571201525746981, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.857, 'eval_steps_per_second': 333.331, 'epoch': 8.0}
{'loss': 0.3613, 'grad_norm': 0.24106380343437195, 'learning_rate': 0.001, 'epoch': 9.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.25656476616859436, 'eval_auc': 0.865702479338843, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.857, 'eval_steps_per_second': 333.331, 'epoch': 9.0}
{'loss': 0.3526, 'grad_norm': 0.331400990486145, 'learning_rate': 0.001, 'epoch': 10.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2476680874824524, 'eval_auc': 0.875079465988557, 'eval_runtime': 0.03, 'eval_samples_per_second': 8933.2, 'eval_steps_per_second': 299.996, 'epoch': 10.0}
{'loss': 0.3552, 'grad_norm': 0.38988861441612244, 'learning_rate': 0.001, 'epoch': 11.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.25020095705986023, 'eval_auc': 0.8780991735537189, 'eval_runtime': 0.0282, 'eval_samples_per_second': 9506.148, 'eval_steps_per_second': 319.236, 'epoch': 11.0}
{'loss': 0.3456, 'grad_norm': 0.2547082304954529, 'learning_rate': 0.001, 'epoch': 12.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23636271059513092, 'eval_auc': 0.8833439287984742, 'eval_runtime': 0.03, 'eval_samples_per_second': 8933.484, 'eval_steps_per_second': 300.005, 'epoch': 12.0}
{'loss': 0.3473, 'grad_norm': 0.31276485323905945, 'learning_rate': 0.0001, 'epoch': 13.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23632413148880005, 'eval_auc': 0.883502860775588, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.769, 'eval_steps_per_second': 333.328, 'epoch': 13.0}
{'loss': 0.3338, 'grad_norm': 0.2963481843471527, 'learning_rate': 0.0001, 'epoch': 14.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23715181648731232, 'eval_auc': 0.8828671328671329, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.681, 'eval_steps_per_second': 333.325, 'epoch': 14.0}
{'loss': 0.328, 'grad_norm': 0.29110416769981384, 'learning_rate': 0.0001, 'epoch': 15.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2375069409608841, 'eval_auc': 0.8831849968213604, 'eval_runtime': 0.031, 'eval_samples_per_second': 8644.325, 'eval_steps_per_second': 290.295, 'epoch': 15.0}
{'loss': 0.3408, 'grad_norm': 0.3884955048561096, 'learning_rate': 0.0001, 'epoch': 16.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23730072379112244, 'eval_auc': 0.8833439287984743, 'eval_runtime': 0.028, 'eval_samples_per_second': 9571.389, 'eval_steps_per_second': 321.427, 'epoch': 16.0}
{'loss': 0.3446, 'grad_norm': 0.24859999120235443, 'learning_rate': 0.0001, 'epoch': 17.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23775582015514374, 'eval_auc': 0.8836617927527018, 'eval_runtime': 0.03, 'eval_samples_per_second': 8932.845, 'eval_steps_per_second': 299.984, 'epoch': 17.0}
{'loss': 0.3395, 'grad_norm': 0.7652255296707153, 'learning_rate': 0.0001, 'epoch': 18.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2383134514093399, 'eval_auc': 0.8836617927527017, 'eval_runtime': 0.026, 'eval_samples_per_second': 10308.345, 'eval_steps_per_second': 346.176, 'epoch': 18.0}
{'loss': 0.3404, 'grad_norm': 0.36500903964042664, 'learning_rate': 0.0001, 'epoch': 19.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2388351559638977, 'eval_auc': 0.8833439287984743, 'eval_runtime': 0.025, 'eval_samples_per_second': 10719.857, 'eval_steps_per_second': 359.995, 'epoch': 19.0}
{'loss': 0.336, 'grad_norm': 0.23655402660369873, 'learning_rate': 0.0001, 'epoch': 20.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2395981103181839, 'eval_auc': 0.883661792752702, 'eval_runtime': 0.028, 'eval_samples_per_second': 9571.389, 'eval_steps_per_second': 321.427, 'epoch': 20.0}
{'loss': 0.3374, 'grad_norm': 0.33183467388153076, 'learning_rate': 0.0001, 'epoch': 21.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.239741250872612, 'eval_auc': 0.8835028607755882, 'eval_runtime': 0.03, 'eval_samples_per_second': 8933.555, 'eval_steps_per_second': 300.007, 'epoch': 21.0}
{'loss': 0.3349, 'grad_norm': 0.23713535070419312, 'learning_rate': 0.0001, 'epoch': 22.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23939970135688782, 'eval_auc': 0.8838207247298157, 'eval_runtime': 0.027, 'eval_samples_per_second': 9926.032, 'eval_steps_per_second': 333.337, 'epoch': 22.0}
{'loss': 0.3377, 'grad_norm': 0.3823864161968231, 'learning_rate': 0.0001, 'epoch': 23.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23886217176914215, 'eval_auc': 0.8841385886840433, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.944, 'eval_steps_per_second': 333.334, 'epoch': 23.0}
{'loss': 0.3427, 'grad_norm': 0.5408858060836792, 'learning_rate': 1e-05, 'epoch': 24.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2387315332889557, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.029, 'eval_samples_per_second': 9240.682, 'eval_steps_per_second': 310.321, 'epoch': 24.0}
{'loss': 0.3363, 'grad_norm': 0.3778921365737915, 'learning_rate': 1e-05, 'epoch': 25.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23882333934307098, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.039, 'eval_samples_per_second': 6871.495, 'eval_steps_per_second': 230.759, 'epoch': 25.0}
{'loss': 0.3339, 'grad_norm': 0.3096606135368347, 'learning_rate': 1e-05, 'epoch': 26.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23877345025539398, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.028, 'eval_samples_per_second': 9571.552, 'eval_steps_per_second': 321.433, 'epoch': 26.0}
{'loss': 0.3332, 'grad_norm': 0.43213486671447754, 'learning_rate': 1e-05, 'epoch': 27.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2386687695980072, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.044, 'eval_samples_per_second': 6090.888, 'eval_steps_per_second': 204.545, 'epoch': 27.0}
{'loss': 0.3321, 'grad_norm': 0.258098840713501, 'learning_rate': 1e-05, 'epoch': 28.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23868735134601593, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.028, 'eval_samples_per_second': 9570.818, 'eval_steps_per_second': 321.408, 'epoch': 28.0}
{'loss': 0.3326, 'grad_norm': 0.48125824332237244, 'learning_rate': 1e-05, 'epoch': 29.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2386094331741333, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.046, 'eval_samples_per_second': 5825.875, 'eval_steps_per_second': 195.645, 'epoch': 29.0}
{'loss': 0.3395, 'grad_norm': 0.24965451657772064, 'learning_rate': 1e-05, 'epoch': 30.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23872488737106323, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.029, 'eval_samples_per_second': 9241.365, 'eval_steps_per_second': 310.344, 'epoch': 30.0}
{'loss': 0.3364, 'grad_norm': 0.263850599527359, 'learning_rate': 1e-05, 'epoch': 31.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2386605143547058, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.042, 'eval_samples_per_second': 6380.908, 'eval_steps_per_second': 214.284, 'epoch': 31.0}
{'loss': 0.3252, 'grad_norm': 0.3092857897281647, 'learning_rate': 1e-05, 'epoch': 32.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23866738379001617, 'eval_auc': 0.8844564526382709, 'eval_runtime': 0.028, 'eval_samples_per_second': 9571.47, 'eval_steps_per_second': 321.43, 'epoch': 32.0}
{'loss': 0.3253, 'grad_norm': 0.32626873254776, 'learning_rate': 1e-05, 'epoch': 33.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23875656723976135, 'eval_auc': 0.8846153846153846, 'eval_runtime': 0.037, 'eval_samples_per_second': 7243.395, 'eval_steps_per_second': 243.248, 'epoch': 33.0}
{'loss': 0.3346, 'grad_norm': 0.3541235029697418, 'learning_rate': 1e-05, 'epoch': 34.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23860247433185577, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.026, 'eval_samples_per_second': 10307.967, 'eval_steps_per_second': 346.163, 'epoch': 34.0}
{'loss': 0.3386, 'grad_norm': 0.5292523503303528, 'learning_rate': 1.0000000000000002e-06, 'epoch': 35.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23860378563404083, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.026, 'eval_samples_per_second': 10304.848, 'eval_steps_per_second': 346.058, 'epoch': 35.0}
{'loss': 0.3353, 'grad_norm': 0.21270228922367096, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23860663175582886, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.027, 'eval_samples_per_second': 9926.207, 'eval_steps_per_second': 333.343, 'epoch': 36.0}
{'loss': 0.3304, 'grad_norm': 0.37426719069480896, 'learning_rate': 1.0000000000000002e-06, 'epoch': 37.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2385886162519455, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.029, 'eval_samples_per_second': 9241.441, 'eval_steps_per_second': 310.347, 'epoch': 37.0}
{'loss': 0.3273, 'grad_norm': 0.2710532546043396, 'learning_rate': 1.0000000000000002e-06, 'epoch': 38.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23859359323978424, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.026, 'eval_samples_per_second': 10307.4, 'eval_steps_per_second': 346.144, 'epoch': 38.0}
{'loss': 0.3304, 'grad_norm': 0.3380688726902008, 'learning_rate': 1.0000000000000002e-06, 'epoch': 39.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23858541250228882, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.025, 'eval_samples_per_second': 10719.755, 'eval_steps_per_second': 359.992, 'epoch': 39.0}
{'loss': 0.328, 'grad_norm': 0.2857165038585663, 'learning_rate': 1.0000000000000002e-06, 'epoch': 40.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.238581120967865, 'eval_auc': 0.8847743165924985, 'eval_runtime': 0.029, 'eval_samples_per_second': 9241.213, 'eval_steps_per_second': 310.339, 'epoch': 40.0}
{'loss': 0.3349, 'grad_norm': 0.3158630132675171, 'learning_rate': 1.0000000000000002e-06, 'epoch': 41.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23858414590358734, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.026, 'eval_samples_per_second': 10307.683, 'eval_steps_per_second': 346.154, 'epoch': 41.0}
{'loss': 0.3324, 'grad_norm': 0.23203760385513306, 'learning_rate': 1.0000000000000002e-06, 'epoch': 42.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23857936263084412, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.028, 'eval_samples_per_second': 9570.981, 'eval_steps_per_second': 321.414, 'epoch': 42.0}
{'loss': 0.3333, 'grad_norm': 0.3704831898212433, 'learning_rate': 1.0000000000000002e-06, 'epoch': 43.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23856791853904724, 'eval_auc': 0.8847743165924985, 'eval_runtime': 0.029, 'eval_samples_per_second': 9241.517, 'eval_steps_per_second': 310.349, 'epoch': 43.0}
{'loss': 0.3371, 'grad_norm': 0.3859098553657532, 'learning_rate': 1.0000000000000002e-06, 'epoch': 44.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23857001960277557, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.03, 'eval_samples_per_second': 8932.632, 'eval_steps_per_second': 299.976, 'epoch': 44.0}
{'loss': 0.3309, 'grad_norm': 0.4461885392665863, 'learning_rate': 1.0000000000000002e-06, 'epoch': 45.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2385542392730713, 'eval_auc': 0.8847743165924985, 'eval_runtime': 0.026, 'eval_samples_per_second': 10307.683, 'eval_steps_per_second': 346.154, 'epoch': 45.0}
{'loss': 0.3331, 'grad_norm': 0.41812556982040405, 'learning_rate': 1.0000000000000002e-07, 'epoch': 46.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.238555446267128, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.034, 'eval_samples_per_second': 7882.261, 'eval_steps_per_second': 264.703, 'epoch': 46.0}
{'loss': 0.3347, 'grad_norm': 0.25684085488319397, 'learning_rate': 1.0000000000000002e-07, 'epoch': 47.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2385544627904892, 'eval_auc': 0.8847743165924985, 'eval_runtime': 0.027, 'eval_samples_per_second': 9926.382, 'eval_steps_per_second': 333.349, 'epoch': 47.0}
{'loss': 0.3345, 'grad_norm': 0.6610632538795471, 'learning_rate': 1.0000000000000002e-07, 'epoch': 48.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2385524958372116, 'eval_auc': 0.8847743165924984, 'eval_runtime': 0.028, 'eval_samples_per_second': 9571.878, 'eval_steps_per_second': 321.444, 'epoch': 48.0}
{'loss': 0.3349, 'grad_norm': 0.2811409831047058, 'learning_rate': 1.0000000000000002e-07, 'epoch': 49.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.2385532110929489, 'eval_auc': 0.8847743165924985, 'eval_runtime': 0.026, 'eval_samples_per_second': 10307.683, 'eval_steps_per_second': 346.154, 'epoch': 49.0}
{'loss': 0.3377, 'grad_norm': 0.3841450810432434, 'learning_rate': 1.0000000000000002e-07, 'epoch': 50.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.23855243623256683, 'eval_auc': 0.8847743165924985, 'eval_runtime': 0.027, 'eval_samples_per_second': 9925.769, 'eval_steps_per_second': 333.328, 'epoch': 50.0}
{'train_runtime': 9.772, 'train_samples_per_second': 11011.105, 'train_steps_per_second': 347.935, 'train_loss': 0.3535710003796746, 'epoch': 50.0}


In [36]:
scores = []
for trainer in trainer_list:
    scores.append(trainer.state.best_metric)

np.mean(scores)

0.8318624510437662

# 테스트 데이터 예측

In [37]:
test_dict = {"x": test_ft}
test_dt = Dataset.from_dict(test_dict)
test_dt

Dataset({
    features: ['x'],
    num_rows: 896
})

In [38]:
sig = torch.nn.Sigmoid()
pred_list = []
for trainer in trainer_list:
    pred_output = trainer.predict(test_dt)
    pred = torch.Tensor(pred_output.predictions)
    pred = sig(pred).numpy()
    pred_list.append(pred)

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

In [39]:
pred = np.mean(pred_list, axis=0)
pred

array([[0.05044533],
       [0.09840681],
       [0.0909398 ],
       [0.33907896],
       [0.09959547],
       [0.3112548 ],
       [0.5630649 ],
       [0.14410111],
       [0.1128155 ],
       [0.07603491],
       [0.07350561],
       [0.0881794 ],
       [0.1505562 ],
       [0.08533502],
       [0.38068622],
       [0.05456564],
       [0.12584177],
       [0.49073964],
       [0.05581699],
       [0.16177419],
       [0.10844004],
       [0.17539969],
       [0.03216618],
       [0.05652187],
       [0.06908792],
       [0.2228659 ],
       [0.10119226],
       [0.12001918],
       [0.1363337 ],
       [0.21725789],
       [0.1224153 ],
       [0.26481122],
       [0.08616831],
       [0.4805251 ],
       [0.08482625],
       [0.0258268 ],
       [0.38198194],
       [0.06233369],
       [0.06362274],
       [0.0603743 ],
       [0.41983086],
       [0.03229585],
       [0.18673691],
       [0.03786109],
       [0.05390849],
       [0.02575592],
       [0.04876714],
       [0.048