In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from collections import Counter
import numpy as np
import random
import logging
import os
import torch.optim as optim
import torch.distributed as dist
from torch.utils.data import DataLoader
from pathlib import Path



In [None]:

from dataclasses import dataclass
from typing import Optional

@dataclass
class Args:
    nGPU: int = 1
    seed: int = 0
    prepare: bool = True
    mode: str = "train"
    train_data_dir: str = "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train"
    test_data_dir: str = "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev"
    train_abstract_dir: str = '/content/genAbs0.json'
    # "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/train_gen_abs.json"
    test_abstract_dir: str = '/content/genAbs0.json'
    # "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/Dev_gen_abs.json"
    model_dir: str = '/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/NRMS_title'
    batch_size: int = 32
    npratio: int = 4
    enable_gpu: bool = True
    filter_num: int = 3
    log_steps: int = 100
    epochs: int = 5
    lr: float = 0.0003
    num_words_title: int = 20
    num_words_abstract: int = 50
    user_log_length: int = 50
    word_embedding_dim: int = 300
    glove_embedding_path: str = '/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt'
    freeze_embedding: bool = False
    news_dim: int = 400
    news_query_vector_dim: int = 200
    user_query_vector_dim: int = 200
    num_attention_heads: int = 15
    user_log_mask: bool = False
    drop_rate: float = 0.2
    save_steps: int = 10000
    start_epoch: int = 0
    load_ckpt_name: Optional[str] = None
    use_category: bool = True
    use_subcategory: bool = True
    use_abstract: bool = True
    use_custom_abstract: bool = True
    category_emb_dim: int = 100

def parse_args():
  return Args()


**Dataset.py**

In [None]:
from torch.utils.data import IterableDataset, Dataset
import numpy as np
import random


class DatasetTrain(IterableDataset):
    def __init__(self, filename, news_index, news_combined, args):
        super(DatasetTrain).__init__()
        self.filename = filename
        self.news_index = news_index
        self.news_combined = news_combined
        self.args = args

    def trans_to_nindex(self, nids):
        return [int(self.news_index[i]) if i in self.news_index else 0 for i in nids]

    def pad_to_fix_len(self, x, fix_length, padding_front=True, padding_value=0):
        if padding_front:
            pad_x = [padding_value] * (fix_length - len(x)) + x[-fix_length:]
            mask = [0] * (fix_length - len(x)) + [1] * min(fix_length, len(x))
        else:
            pad_x = x[-fix_length:] + [padding_value] * (fix_length - len(x))
            mask = [1] * min(fix_length, len(x)) + [0] * (fix_length - len(x))
        return pad_x, np.array(mask, dtype='float32')

    def line_mapper(self, line):
        line = line.strip().split('\t')
        click_docs = line[3].split()
        sess_pos = line[4].split()
        sess_neg = line[5].split()

        click_docs, _ = self.pad_to_fix_len(self.trans_to_nindex(click_docs), self.args.user_log_length)
        user_feature = self.news_combined[click_docs]

        pos = self.trans_to_nindex(sess_pos)
        neg = self.trans_to_nindex(sess_neg)

        label = random.randint(0, self.args.npratio)
        sample_news = neg[:label] + pos + neg[label:]
        news_feature = self.news_combined[sample_news]

        return user_feature, news_feature, label

    def __iter__(self):
        file_iter = open(self.filename)
        return map(self.line_mapper, file_iter)


class DatasetTest(DatasetTrain):
    def __init__(self, filename, news_index, news_scoring, args):
        super(DatasetTrain).__init__()
        self.filename = filename
        self.news_index = news_index
        self.news_scoring = news_scoring
        self.args = args

    def line_mapper(self, line):
        line = line.strip().split('\t')
        click_docs = line[3].split()
        click_docs, log_mask = self.pad_to_fix_len(self.trans_to_nindex(click_docs), self.args.user_log_length)
        user_feature = self.news_scoring[click_docs]

        candidate_news = self.trans_to_nindex([i.split('-')[0] for i in line[4].split()])
        labels = np.array([int(i.split('-')[1]) for i in line[4].split()])
        news_feature = self.news_scoring[candidate_news]

        return user_feature, news_feature, labels

    def __iter__(self):
        file_iter = open(self.filename)
        return map(self.line_mapper, file_iter)


class NewsDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, idx):
        return self.data[idx]

    def __len__(self):
        return self.data.shape[0]


**Metric.py**

In [None]:
from sklearn.metrics import roc_auc_score
import numpy as np


def dcg_score(y_true, y_score, k=10):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2**y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)


def ndcg_score(y_true, y_score, k=10):
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best


def mrr_score(y_true, y_score):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)


def ctr_score(y_true, y_score, k=1):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    return np.mean(y_true)

def acc(y_true, y_hat):
    y_hat = torch.argmax(y_hat, dim=-1)
    tot = y_true.shape[0]
    hit = torch.sum(y_true == y_hat)
    return hit.data.float() * 1.0 / tot



**Ultis.py**

In [None]:
import logging
import argparse
import sys

def setuplogger():
    root = logging.getLogger()
    root.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter("[%(levelname)s %(asctime)s] %(message)s")
    handler.setFormatter(formatter)
    root.addHandler(handler)


def dump_args(args):
    for arg in dir(args):
        if not arg.startswith("_"):
            logging.info(f"args[{arg}]={getattr(args, arg)}")

def load_matrix(embedding_file_path, word_dict, word_embedding_dim):
    embedding_matrix = np.zeros(shape=(len(word_dict) + 1, word_embedding_dim))
    have_word = []
    if embedding_file_path is not None:
        with open(embedding_file_path, 'rb') as f:
            while True:
                line = f.readline()
                if len(line) == 0:
                    break
                line = line.split()
                word = line[0].decode()
                if word in word_dict:
                    index = word_dict[word]
                    tp = [float(x) for x in line[1:]]
                    embedding_matrix[index] = np.array(tp)
                    have_word.append(word)
    return embedding_matrix, have_word


def get_checkpoint(directory, ckpt_name):
    ckpt_path = os.path.join(directory, ckpt_name)
    if os.path.exists(ckpt_path):
        return ckpt_path
    else:
        return None


**preprocess.py**

In [None]:
from collections import Counter
from tqdm import tqdm
import numpy as np
from nltk.tokenize import word_tokenize
import json


def update_dict(dict, key, value=None):
    if key not in dict:
        if value is None:
            dict[key] = len(dict) + 1
        else:
            dict[key] = value


def read_custom_abstract(news_file, custom_abstract_dict):
    news = {}
    news_index = {}
    category_dict = {}
    subcategory_dict = {}
    word_cnt = {}

    with open(news_file, 'r', encoding='utf-8') as f:
        for line in f:
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, entity_title, entity_abstract = splited
            if doc_id in custom_abstract_dict:
                abstract = custom_abstract_dict[doc_id]
            news[doc_id] = [title.split(' '), category, subcategory, abstract.split(' ')]
            news_index[doc_id] = len(news_index) + 1
            for word in title.split(' '):
                if word not in word_cnt:
                    word_cnt[word] = 0
                word_cnt[word] += 1
            for word in abstract.split(' '):
                if word not in word_cnt:
                    word_cnt[word] = 0
                word_cnt[word] += 1
            if category not in category_dict:
                category_dict[category] = len(category_dict) + 1
            if subcategory not in subcategory_dict:
                subcategory_dict[subcategory] = len(subcategory_dict) + 1

    return news, news_index, category_dict, subcategory_dict, word_cnt

def read_news(news_path, abstract_path, args, mode='train'):
    news = {}
    category_dict = {}
    subcategory_dict = {}
    news_index = {}
    word_cnt = Counter()
    if args.use_custom_abstract:
      with open(abstract_path, 'r') as f:
          abs = json.load(f)
    with open(news_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, _, _ = splited
            update_dict(news_index, doc_id)

            title = title.lower()
            title = word_tokenize(title, language='english', preserve_line=True)

            update_dict(news, doc_id, [title, category, subcategory, abs[doc_id] if doc_id in abs else abstract])
            if mode == 'train':
                if args.use_category:
                    update_dict(category_dict, category)
                if args.use_subcategory:
                    update_dict(subcategory_dict, subcategory)
                word_cnt.update(title)

    if mode == 'train':
        word = [k for k, v in word_cnt.items() if v > args.filter_num]
        word_dict = {k: v for k, v in zip(word, range(1, len(word) + 1))}
        return news, news_index, category_dict, subcategory_dict, word_dict
    elif mode == 'test':
        return news, news_index
    else:
        assert False, 'Wrong mode!'


def get_doc_input(news, news_index, category_dict, subcategory_dict, word_dict, args):
    news_num = len(news) + 1
    news_title = np.zeros((news_num, args.num_words_title), dtype='int32')
    news_category = np.zeros((news_num, 1), dtype='int32') if args.use_category else None
    news_subcategory = np.zeros((news_num, 1), dtype='int32') if args.use_subcategory else None
    news_abstract = np.zeros((news_num, args.num_words_abstract), dtype='int32') if args.use_abstract else None

    for key in tqdm(news):
        title, category, subcategory, abstract = news[key]
        doc_index = news_index[key]

        for word_id in range(min(args.num_words_title, len(title))):
            if title[word_id] in word_dict:
                news_title[doc_index, word_id] = word_dict[title[word_id]]

        if args.use_category:
            news_category[doc_index, 0] = category_dict[category] if category in category_dict else 0
        if args.use_subcategory:
            news_subcategory[doc_index, 0] = subcategory_dict[subcategory] if subcategory in subcategory_dict else 0
        if args.use_abstract:
            for word_id in range(min(args.num_words_abstract, len(abstract))):
                if abstract[word_id] in word_dict:
                    news_abstract[doc_index, word_id] = word_dict[abstract[word_id]]

    return news_title, news_category, news_subcategory, news_abstract

**prepare_data.py**

In [None]:
import os
from tqdm import tqdm
import random
import logging


def get_sample(all_elements, num_sample):
    if num_sample > len(all_elements):
        return random.sample(all_elements * (num_sample // len(all_elements) + 1), num_sample)
    else:
        return random.sample(all_elements, num_sample)


def prepare_training_data(train_data_dir, nGPU, npratio, seed):
    random.seed(seed)
    behaviors = []

    behavior_file_path = os.path.join(train_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            iid, uid, time, history, imp = line.strip().split('\t')
            impressions = [x.split('-') for x in imp.split(' ')]
            pos, neg = [], []
            for news_ID, label in impressions:
                if label == '0':
                    neg.append(news_ID)
                elif label == '1':
                    pos.append(news_ID)
            if len(pos) == 0 or len(neg) == 0:
                continue
            for pos_id in pos:
                neg_candidate = get_sample(neg, npratio)
                neg_str = ' '.join(neg_candidate)
                new_line = '\t'.join([iid, uid, time, history, pos_id, neg_str]) + '\n'
                behaviors.append(new_line)

    random.shuffle(behaviors)

    behaviors_per_file = [[] for _ in range(nGPU)]
    for i, line in enumerate(behaviors):
        behaviors_per_file[i % nGPU].append(line)

    logging.info('Writing files...')
    for i in range(nGPU):
        processed_file_path = os.path.join(train_data_dir, f'behaviors_np{npratio}_{i}.tsv')
        with open(processed_file_path, 'w') as f:
            f.writelines(behaviors_per_file[i])

    return len(behaviors)


def prepare_testing_data(test_data_dir, nGPU):
    behaviors = [[] for _ in range(nGPU)]

    behavior_file_path = os.path.join(test_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(tqdm(f)):
            behaviors[i % nGPU].append(line)

    logging.info('Writing files...')
    for i in range(nGPU):
        processed_file_path = os.path.join(test_data_dir, f'behaviors_{i}.tsv')
        with open(processed_file_path, 'w') as f:
            f.writelines(behaviors[i])

    return sum([len(x) for x in behaviors])


In [None]:
def train(rank, args):

    is_distributed = False
    torch.cuda.set_device(rank)


    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args.train_abstract_dir, args, mode='train')

    news_title, news_category, news_subcategory, news_abstract = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory, news_abstract] if x is not None], axis=-1)

    if rank == 0:
        logging.info('Initializing word embedding matrix...')

    embedding_matrix, have_word = load_matrix(args.glove_embedding_path,
                                                    word_dict,
                                                    args.word_embedding_dim)
    if rank == 0:
        logging.info(f'Word dict length: {len(word_dict)}')
        logging.info(f'Have words: {len(have_word)}')
        logging.info(f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}')

    model = Model(args, embedding_matrix, len(category_dict), len(subcategory_dict))

    if args.load_ckpt_name is not None:
        ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
        checkpoint = torch.load(ckpt_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        logging.info(f"Model loaded from {ckpt_path}.")

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if args.enable_gpu:
        model = model.cuda(rank)

    if is_distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

    # if rank == 0:
    #     print(model)
    #     for name, param in model.named_parameters():
    #         print(name, param.requires_grad)

    data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv')

    dataset = DatasetTrain(data_file_path, news_index, news_combined, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size)

    logging.info('Training...')
    for ep in range(args.start_epoch, args.epochs):
        loss = 0.0
        accuary = 0.0
        for cnt, (log_ids, log_mask, input_ids, targets) in enumerate(dataloader):
            if args.enable_gpu:
                log_ids = log_ids.cuda(rank, non_blocking=True)
                log_mask = log_mask.cuda(rank, non_blocking=True)
                input_ids = input_ids.cuda(rank, non_blocking=True)
                targets = targets.cuda(rank, non_blocking=True)

            bz_loss, y_hat = model(log_ids, log_mask, input_ids, targets)
            loss += bz_loss.data.float()
            accuary += acc(targets, y_hat)
            optimizer.zero_grad()
            bz_loss.backward()
            optimizer.step()

            if cnt % args.log_steps == 0:
                logging.info(
                    '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
                        rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)
                )

            if rank == 0 and     cnt != 0 and cnt % args.save_steps == 0:
                ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
                torch.save(
                    {
                        'model_state_dict':
                            {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
                            if is_distributed else model.state_dict(),
                        'category_dict': category_dict,
                        'word_dict': word_dict,
                        'subcategory_dict': subcategory_dict
                    }, ckpt_path)
                logging.info(f"Model saved to {ckpt_path}.")

        logging.info('Training finish.')

        if rank == 0:
            ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
            torch.save(
                {
                    'model_state_dict':
                        {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
                        if is_distributed else model.state_dict(),
                    'category_dict': category_dict,
                    'subcategory_dict': subcategory_dict,
                    'word_dict': word_dict,
                }, ckpt_path)
            logging.info(f"Model saved to {ckpt_path}.")



In [None]:

    import subprocess
    setuplogger()
    args = parse_args()
    dump_args(args)
    random.seed(args.seed)





INFO:root:args[batch_size]=32


[INFO 2025-03-02 18:49:03,622] args[batch_size]=32


INFO:root:args[category_emb_dim]=100


[INFO 2025-03-02 18:49:03,624] args[category_emb_dim]=100


INFO:root:args[drop_rate]=0.2


[INFO 2025-03-02 18:49:03,630] args[drop_rate]=0.2


INFO:root:args[enable_gpu]=True


[INFO 2025-03-02 18:49:03,634] args[enable_gpu]=True


INFO:root:args[epochs]=5


[INFO 2025-03-02 18:49:03,638] args[epochs]=5


INFO:root:args[filter_num]=3


[INFO 2025-03-02 18:49:03,643] args[filter_num]=3


INFO:root:args[freeze_embedding]=False


[INFO 2025-03-02 18:49:03,648] args[freeze_embedding]=False


INFO:root:args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt


[INFO 2025-03-02 18:49:03,650] args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt


INFO:root:args[load_ckpt_name]=None


[INFO 2025-03-02 18:49:03,655] args[load_ckpt_name]=None


INFO:root:args[log_steps]=100


[INFO 2025-03-02 18:49:03,658] args[log_steps]=100


INFO:root:args[lr]=0.0003


[INFO 2025-03-02 18:49:03,662] args[lr]=0.0003


INFO:root:args[mode]=train


[INFO 2025-03-02 18:49:03,665] args[mode]=train


INFO:root:args[model_dir]=/content/model


[INFO 2025-03-02 18:49:03,669] args[model_dir]=/content/model


INFO:root:args[nGPU]=1


[INFO 2025-03-02 18:49:03,672] args[nGPU]=1


INFO:root:args[news_dim]=400


[INFO 2025-03-02 18:49:03,676] args[news_dim]=400


INFO:root:args[news_query_vector_dim]=200


[INFO 2025-03-02 18:49:03,680] args[news_query_vector_dim]=200


INFO:root:args[npratio]=4


[INFO 2025-03-02 18:49:03,684] args[npratio]=4


INFO:root:args[num_attention_heads]=15


[INFO 2025-03-02 18:49:03,687] args[num_attention_heads]=15


INFO:root:args[num_words_abstract]=50


[INFO 2025-03-02 18:49:03,691] args[num_words_abstract]=50


INFO:root:args[num_words_title]=20


[INFO 2025-03-02 18:49:03,694] args[num_words_title]=20


INFO:root:args[prepare]=True


[INFO 2025-03-02 18:49:03,696] args[prepare]=True


INFO:root:args[save_steps]=10000


[INFO 2025-03-02 18:49:03,699] args[save_steps]=10000


INFO:root:args[seed]=0


[INFO 2025-03-02 18:49:03,701] args[seed]=0


INFO:root:args[start_epoch]=0


[INFO 2025-03-02 18:49:03,703] args[start_epoch]=0


INFO:root:args[test_abstract_dir]=/content/genAbs0.json


[INFO 2025-03-02 18:49:03,705] args[test_abstract_dir]=/content/genAbs0.json


INFO:root:args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev


[INFO 2025-03-02 18:49:03,707] args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev


INFO:root:args[train_abstract_dir]=/content/genAbs0.json


[INFO 2025-03-02 18:49:03,709] args[train_abstract_dir]=/content/genAbs0.json


INFO:root:args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train


[INFO 2025-03-02 18:49:03,711] args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train


INFO:root:args[use_abstract]=True


[INFO 2025-03-02 18:49:03,713] args[use_abstract]=True


INFO:root:args[use_category]=True


[INFO 2025-03-02 18:49:03,715] args[use_category]=True


INFO:root:args[use_custom_abstract]=True


[INFO 2025-03-02 18:49:03,717] args[use_custom_abstract]=True


INFO:root:args[use_subcategory]=True


[INFO 2025-03-02 18:49:03,719] args[use_subcategory]=True


INFO:root:args[user_log_length]=50


[INFO 2025-03-02 18:49:03,721] args[user_log_length]=50


INFO:root:args[user_log_mask]=False


[INFO 2025-03-02 18:49:03,723] args[user_log_mask]=False


INFO:root:args[user_query_vector_dim]=200


[INFO 2025-03-02 18:49:03,726] args[user_query_vector_dim]=200


INFO:root:args[word_embedding_dim]=300


[INFO 2025-03-02 18:49:03,728] args[word_embedding_dim]=300


# **NRMS**

In [None]:
import torch
import torch.nn as nn
class DotProductClickPredictor(torch.nn.Module):
    def __init__(self):
        super(DotProductClickPredictor, self).__init__()

    def forward(self, candidate_news_vector, user_vector):
        """
        Args:
            candidate_news_vector: batch_size, candidate_size, X
            user_vector: batch_size, X
        Returns:
            (shape): batch_size
        """
        # batch_size, candidate_size
        probability = torch.bmm(candidate_news_vector,
                                user_vector.unsqueeze(dim=-1)).squeeze(dim=-1)
        return probability
class AdditiveAttention(torch.nn.Module):
    """
    A general additive attention module.
    Originally for NAML.
    """
    def __init__(self,
                 query_vector_dim,
                 candidate_vector_dim,
                 writer=None,
                 tag=None,
                 names=None):
        super(AdditiveAttention, self).__init__()
        self.linear = nn.Linear(candidate_vector_dim, query_vector_dim)
        self.attention_query_vector = nn.Parameter(
            torch.empty(query_vector_dim).uniform_(-0.1, 0.1))
        # For tensorboard
        self.writer = writer
        self.tag = tag
        self.names = names
        self.local_step = 1

    def forward(self, candidate_vector):
        """
        Args:
            candidate_vector: batch_size, candidate_size, candidate_vector_dim
        Returns:
            (shape) batch_size, candidate_vector_dim
        """
        # batch_size, candidate_size, query_vector_dim
        temp = torch.tanh(self.linear(candidate_vector))
        # batch_size, candidate_size
        candidate_weights = F.softmax(torch.matmul(
            temp, self.attention_query_vector),
                                      dim=1)
        if self.writer is not None:
            assert candidate_weights.size(1) == len(self.names)
            if self.local_step % 10 == 0:
                self.writer.add_scalars(
                    self.tag, {
                        x: y
                        for x, y in zip(self.names,
                                        candidate_weights.mean(dim=0))
                    }, self.local_step)
            self.local_step += 1
        # batch_size, candidate_vector_dim
        target = torch.bmm(candidate_weights.unsqueeze(dim=1),
                           candidate_vector).squeeze(dim=1)
        return target

class ScaledDotProductAttention(nn.Module):
    def __init__(self, d_k):
        super(ScaledDotProductAttention, self).__init__()
        self.d_k = d_k

    def forward(self, Q, K, V, attn_mask=None):
        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(self.d_k)
        scores = torch.exp(scores)
        if attn_mask is not None:
            scores = scores * attn_mask
        attn = scores / (torch.sum(scores, dim=-1, keepdim=True) + 1e-8)

        context = torch.matmul(attn, V)
        return context, attn



class MultiHeadSelfAttention(nn.Module):
    def __init__(self, d_model, num_attention_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.d_model = d_model
        self.num_attention_heads = num_attention_heads
        assert d_model % num_attention_heads == 0
        self.d_k = d_model // num_attention_heads
        self.d_v = d_model // num_attention_heads

        self.W_Q = nn.Linear(d_model, d_model)
        self.W_K = nn.Linear(d_model, d_model)
        self.W_V = nn.Linear(d_model, d_model)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight, gain=1)

    def forward(self, Q, K=None, V=None, length=None):
        if K is None:
            K = Q
        if V is None:
            V = Q
        batch_size = Q.size(0)

        q_s = self.W_Q(Q).view(batch_size, -1, self.num_attention_heads,
                               self.d_k).transpose(1, 2)
        k_s = self.W_K(K).view(batch_size, -1, self.num_attention_heads,
                               self.d_k).transpose(1, 2)
        v_s = self.W_V(V).view(batch_size, -1, self.num_attention_heads,
                               self.d_v).transpose(1, 2)

        if length is not None:
            maxlen = Q.size(1)
            attn_mask = torch.arange(maxlen).expand(
                batch_size, maxlen) < length.view(-1, 1)
            attn_mask = attn_mask.unsqueeze(1).expand(batch_size, maxlen,
                                                      maxlen)
            attn_mask = attn_mask.unsqueeze(1).repeat(1,
                                                      self.num_attention_heads,
                                                      1, 1)
        else:
            attn_mask = None

        context, attn = ScaledDotProductAttention(self.d_k)(q_s, k_s, v_s,
                                                            attn_mask)
        context = context.transpose(1, 2).contiguous().view(
            batch_size, -1, self.num_attention_heads * self.d_v)
        return context

In [None]:
import torch
from torch import nn
import torch.nn.functional as F



class NewsEncoder(nn.Module):
    def __init__(self, args, embedding_matrix):
        super(NewsEncoder, self).__init__()
        self.embedding_matrix = embedding_matrix
        self.drop_rate = args.drop_rate
        self.dim_per_head = args.news_dim // args.num_attention_heads
        self.multi_head_self_attn = MultiHeadSelfAttention(
            args.word_embedding_dim,
            args.num_attention_heads,
            # self.dim_per_head,
            # self.dim_per_head
        )
        self.attn = AdditiveAttention(args.news_query_vector_dim, args.word_embedding_dim)

    def forward(self, x, mask=None):
        '''
            x: batch_size, word_num
            mask: batch_size, word_num
        '''
        word_vecs = F.dropout(self.embedding_matrix(x.long()),
                              p=self.drop_rate,
                              training=self.training)
        multihead_text_vecs = self.multi_head_self_attn(word_vecs, word_vecs, word_vecs, mask)
        multihead_text_vecs = F.dropout(multihead_text_vecs,
                                        p=self.drop_rate,
                                        training=self.training)
        news_vec = self.attn(multihead_text_vecs)
        return news_vec


class UserEncoder(nn.Module):
    def __init__(self, args):
        super(UserEncoder, self).__init__()
        self.args = args
        self.dim_per_head = args.news_dim // args.num_attention_heads
        self.multi_head_self_attn = MultiHeadSelfAttention(args.word_embedding_dim, args.num_attention_heads)
        self.attn = AdditiveAttention(args.user_query_vector_dim, args.word_embedding_dim)
        self.pad_doc = nn.Parameter(torch.empty(1, args.news_dim).uniform_(-1, 1)).type(torch.FloatTensor)

    def forward(self, user_vector):
        """
        Args:
            user_vector: batch_size, num_clicked_news_a_user, word_embedding_dim
        Returns:
            (shape) batch_size, word_embedding_dim
        """
        # batch_size, num_clicked_news_a_user, word_embedding_dim
        multihead_user_vector = self.multi_head_self_attn(user_vector)
        # batch_size, word_embedding_dim
        final_user_vector = self.attn(multihead_user_vector)
        return final_user_vector

    # def forward(self, news_vecs, log_mask=None):
    #     '''
    #         news_vecs: batch_size, history_num, news_dim
    #         log_mask: batch_size, history_num
    #     '''
    #     bz = news_vecs.shape[0]
    #     if self.args.user_log_mask:


    #         news_vecs = self.multi_head_self_attn(news_vecs, news_vecs, news_vecs, log_mask)
    #         user_vec = self.attn(news_vecs, log_mask)
    #     else:
    #         padding_doc = self.pad_doc.unsqueeze(dim=0).expand(bz, self.args.user_log_length, -1)
    #         news_vecs = news_vecs * log_mask.unsqueeze(dim=-1) + padding_doc * (1 - log_mask.unsqueeze(dim=-1))
    #         news_vecs = self.multi_head_self_attn(news_vecs, news_vecs, news_vecs)
    #         user_vec = self.attn(news_vecs)
    #     return user_vec



class NRMS(torch.nn.Module):
    """
    NRMS network.
    Input 1 + K candidate news and a list of user clicked news, produce the click probability.
    """
    def __init__(self, config, embedding_matrix):
        super(NRMS, self).__init__()
        self.config = config
        word_embedding = torch.from_numpy(embedding_matrix).float()
        pretrained_word_embedding = nn.Embedding.from_pretrained(word_embedding,
                                                      freeze=args.freeze_embedding,
                                                      padding_idx=0)

        self.news_encoder = NewsEncoder(config, pretrained_word_embedding)
        self.user_encoder = UserEncoder(config)
        self.click_predictor = DotProductClickPredictor()
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, clicked_news, candidate_news, label):
        """
        Args:
            candidate_news:
                [
                    {
                        "title": batch_size * num_words_title
                    } * (1 + K)
                ]
            clicked_news:
                [
                    {
                        "title":batch_size * num_words_title
                    } * num_clicked_news_a_user
                ]
        Returns:
          click_probability: batch_size, 1 + K
        """
        # batch_size, 1 + K, word_embedding_dim
        candidate_news_vector = torch.stack(
            [self.news_encoder(x) for x in candidate_news])

        # batch_size, num_clicked_news_a_user, word_embedding_dim
        clicked_news_vector = torch.stack(
            [self.news_encoder(x) for x in clicked_news])

        # batch_size, word_embedding_dim
        user_vector = self.user_encoder(clicked_news_vector)

        # batch_size, 1 + K
        click_probability = self.click_predictor(candidate_news_vector,
                                                 user_vector)
        loss = self.loss_fn(click_probability, label)
        # loss = 0.5
        return loss, click_probability

    def get_news_vector(self, news):
        """
        Args:
            news:
                {
                    "title": batch_size * num_words_title
                },
        Returns:
            (shape) batch_size, word_embedding_dim
        """
        # batch_size, word_embedding_dim
        return self.news_encoder(news)

    def get_user_vector(self, clicked_news_vector):
        """
        Args:
            clicked_news_vector: batch_size, num_clicked_news_a_user, word_embedding_dim
        Returns:
            (shape) batch_size, word_embedding_dim
        """
        # batch_size, word_embedding_dim
        return self.user_encoder(clicked_news_vector)

    def get_prediction(self, news_vector, user_vector):
        """
        Args:
            news_vector: candidate_size, word_embedding_dim
            user_vector: word_embedding_dim
        Returns:
            click_probability: candidate_size
        """
        # candidate_size
        return self.click_predictor(
            news_vector.unsqueeze(dim=0),
            user_vector.unsqueeze(dim=0)).squeeze(dim=0)

In [None]:
args.mode = 'train'

In [None]:
if 'train' in args.mode:
    if args.prepare:
        logging.info('Preparing training data...')
        total_sample_num = prepare_training_data(args.train_data_dir, args.nGPU, args.npratio, args.seed)
    else:
        total_sample_num = 0
        for i in range(args.nGPU):
            data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{i}.tsv')
            print(data_file_path)
            if not os.path.exists(data_file_path):
                logging.error(f'Splited training data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                exit()
            result = subprocess.getoutput(f'wc -l {data_file_path}')
            total_sample_num += int(result.split(' ')[0])
        logging.info('Skip training data preparation.')
    logging.info(f'{total_sample_num} training samples, {total_sample_num // args.batch_size // args.nGPU} batches in total.')



INFO:root:Preparing training data...


[INFO 2025-03-02 18:49:54,311] Preparing training data...


156965it [00:02, 61776.50it/s]
INFO:root:Writing files...


[INFO 2025-03-02 18:49:56,994] Writing files...


INFO:root:236344 training samples, 7385 batches in total.


[INFO 2025-03-02 18:49:57,448] 236344 training samples, 7385 batches in total.


In [None]:
rank = 0
news, news_index, category_dict, subcategory_dict, word_dict = read_news(
		os.path.join(args.train_data_dir, 'news.tsv'), args.train_abstract_dir, args, mode='train')

news_title, news_category, news_subcategory, news_abstract = get_doc_input(
    news, news_index, category_dict, subcategory_dict, word_dict, args)
news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory, news_abstract] if x is not None], axis=-1)

if rank == 0:
    logging.info('Initializing word embedding matrix...')

embedding_matrix, have_word = load_matrix(args.glove_embedding_path,
                                                word_dict,
                                                args.word_embedding_dim)
if rank == 0:
    logging.info(f'Word dict length: {len(word_dict)}')
    logging.info(f'Have words: {len(have_word)}')
    logging.info(f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}')

51282it [00:06, 8054.94it/s] 
100%|██████████| 51282/51282 [00:00<00:00, 191114.33it/s]
INFO:root:Initializing word embedding matrix...


[INFO 2025-03-02 18:50:04,705] Initializing word embedding matrix...


INFO:root:Word dict length: 12519


[INFO 2025-03-02 18:51:26,471] Word dict length: 12519


INFO:root:Have words: 11960


[INFO 2025-03-02 18:51:26,475] Have words: 11960


INFO:root:Missing rate: 0.0446521287642783


[INFO 2025-03-02 18:51:26,478] Missing rate: 0.0446521287642783


In [None]:
import torch.optim as optim

model = NRMS(args, embedding_matrix)
is_distributed = False
if args.load_ckpt_name is not None:
	ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
	checkpoint = torch.load(ckpt_path, map_location='cpu')
	model.load_state_dict(checkpoint['model_state_dict'])
	logging.info(f"Model loaded from {ckpt_path}.")

optimizer = optim.Adam(model.parameters(), lr=args.lr)

if args.enable_gpu:
	model = model.cuda(rank)

if is_distributed:
	model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv')

dataset = DatasetTrain(data_file_path, news_index, news_combined, args)
dataloader = DataLoader(dataset, batch_size=args.batch_size)

logging.info('Training...')
for ep in range(args.start_epoch, args.epochs):
	loss = 0.0
	accuary = 0.0
	for cnt, (log_ids, input_ids, targets) in enumerate(dataloader):
		if args.enable_gpu:
			log_ids = log_ids.cuda(rank, non_blocking=True)
			# log_mask = log_mask.cuda(rank, non_blocking=True)
			input_ids = input_ids.cuda(rank, non_blocking=True)
			targets = targets.cuda(rank, non_blocking=True)


		bz_loss, y_hat = model(log_ids, input_ids, targets)
		loss += bz_loss.data.float()
		accuary += acc(targets, y_hat)
		optimizer.zero_grad()
		bz_loss.backward()
		optimizer.step()

		if cnt % args.log_steps == 0:
			logging.info(
				'[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
					rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)
			)

		if rank == 0 and cnt != 0 and cnt % args.save_steps == 0:
			ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
			torch.save(
				{
					'model_state_dict':
						{'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
						if is_distributed else model.state_dict(),
					'category_dict': category_dict,
					'word_dict': word_dict,
					'subcategory_dict': subcategory_dict
				}, ckpt_path)
			logging.info(f"Model saved to {ckpt_path}.")

	logging.info('Training finish.')

	if rank == 0:
		ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
		torch.save(
			{
				'model_state_dict':
					{'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
					if is_distributed else model.state_dict(),
				'category_dict': category_dict,
				'subcategory_dict': subcategory_dict,
				'word_dict': word_dict,
			}, ckpt_path)
		logging.info(f"Model saved to {ckpt_path}.")



INFO:root:Training...


[INFO 2025-03-02 18:51:31,332] Training...


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-03-02 18:51:32,934] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.60300, acc: 0.27906


[INFO 2025-03-02 18:51:58,215] [0] Ed: 3200, train_loss: 1.60300, acc: 0.27906


INFO:root:[0] Ed: 6400, train_loss: 1.57256, acc: 0.30281


[INFO 2025-03-02 18:52:23,567] [0] Ed: 6400, train_loss: 1.57256, acc: 0.30281


INFO:root:[0] Ed: 9600, train_loss: 1.56084, acc: 0.31135


[INFO 2025-03-02 18:52:49,144] [0] Ed: 9600, train_loss: 1.56084, acc: 0.31135


INFO:root:[0] Ed: 12800, train_loss: 1.55049, acc: 0.31773


[INFO 2025-03-02 18:53:15,089] [0] Ed: 12800, train_loss: 1.55049, acc: 0.31773


INFO:root:[0] Ed: 16000, train_loss: 1.54209, acc: 0.32381


[INFO 2025-03-02 18:53:41,231] [0] Ed: 16000, train_loss: 1.54209, acc: 0.32381


INFO:root:[0] Ed: 19200, train_loss: 1.53520, acc: 0.32964


[INFO 2025-03-02 18:54:07,224] [0] Ed: 19200, train_loss: 1.53520, acc: 0.32964


INFO:root:[0] Ed: 22400, train_loss: 1.53005, acc: 0.33531


[INFO 2025-03-02 18:54:33,147] [0] Ed: 22400, train_loss: 1.53005, acc: 0.33531


INFO:root:[0] Ed: 25600, train_loss: 1.52700, acc: 0.33773


[INFO 2025-03-02 18:54:59,170] [0] Ed: 25600, train_loss: 1.52700, acc: 0.33773


INFO:root:[0] Ed: 28800, train_loss: 1.52139, acc: 0.34208


[INFO 2025-03-02 18:55:25,140] [0] Ed: 28800, train_loss: 1.52139, acc: 0.34208


INFO:root:[0] Ed: 32000, train_loss: 1.51721, acc: 0.34513


[INFO 2025-03-02 18:55:51,096] [0] Ed: 32000, train_loss: 1.51721, acc: 0.34513


INFO:root:[0] Ed: 35200, train_loss: 1.51507, acc: 0.34563


[INFO 2025-03-02 18:56:17,087] [0] Ed: 35200, train_loss: 1.51507, acc: 0.34563


INFO:root:[0] Ed: 38400, train_loss: 1.51126, acc: 0.34823


[INFO 2025-03-02 18:56:43,107] [0] Ed: 38400, train_loss: 1.51126, acc: 0.34823


INFO:root:[0] Ed: 41600, train_loss: 1.50776, acc: 0.35089


[INFO 2025-03-02 18:57:09,051] [0] Ed: 41600, train_loss: 1.50776, acc: 0.35089


INFO:root:[0] Ed: 44800, train_loss: 1.50456, acc: 0.35201


[INFO 2025-03-02 18:57:35,003] [0] Ed: 44800, train_loss: 1.50456, acc: 0.35201


INFO:root:[0] Ed: 48000, train_loss: 1.50253, acc: 0.35317


[INFO 2025-03-02 18:58:01,081] [0] Ed: 48000, train_loss: 1.50253, acc: 0.35317


INFO:root:[0] Ed: 51200, train_loss: 1.49876, acc: 0.35561


[INFO 2025-03-02 18:58:27,137] [0] Ed: 51200, train_loss: 1.49876, acc: 0.35561


INFO:root:[0] Ed: 54400, train_loss: 1.49493, acc: 0.35741


[INFO 2025-03-02 18:58:53,297] [0] Ed: 54400, train_loss: 1.49493, acc: 0.35741


INFO:root:[0] Ed: 57600, train_loss: 1.49080, acc: 0.36026


[INFO 2025-03-02 18:59:19,289] [0] Ed: 57600, train_loss: 1.49080, acc: 0.36026


INFO:root:[0] Ed: 60800, train_loss: 1.48745, acc: 0.36222


[INFO 2025-03-02 18:59:45,275] [0] Ed: 60800, train_loss: 1.48745, acc: 0.36222


INFO:root:[0] Ed: 64000, train_loss: 1.48355, acc: 0.36459


[INFO 2025-03-02 19:00:11,207] [0] Ed: 64000, train_loss: 1.48355, acc: 0.36459


INFO:root:[0] Ed: 67200, train_loss: 1.47989, acc: 0.36689


[INFO 2025-03-02 19:00:37,170] [0] Ed: 67200, train_loss: 1.47989, acc: 0.36689


INFO:root:[0] Ed: 70400, train_loss: 1.47650, acc: 0.36902


[INFO 2025-03-02 19:01:03,220] [0] Ed: 70400, train_loss: 1.47650, acc: 0.36902


INFO:root:[0] Ed: 73600, train_loss: 1.47349, acc: 0.37083


[INFO 2025-03-02 19:01:29,251] [0] Ed: 73600, train_loss: 1.47349, acc: 0.37083


INFO:root:[0] Ed: 76800, train_loss: 1.46981, acc: 0.37327


[INFO 2025-03-02 19:01:55,526] [0] Ed: 76800, train_loss: 1.46981, acc: 0.37327


INFO:root:[0] Ed: 80000, train_loss: 1.46763, acc: 0.37439


[INFO 2025-03-02 19:02:21,569] [0] Ed: 80000, train_loss: 1.46763, acc: 0.37439


INFO:root:[0] Ed: 83200, train_loss: 1.46446, acc: 0.37602


[INFO 2025-03-02 19:02:47,623] [0] Ed: 83200, train_loss: 1.46446, acc: 0.37602


INFO:root:[0] Ed: 86400, train_loss: 1.46186, acc: 0.37753


[INFO 2025-03-02 19:03:13,591] [0] Ed: 86400, train_loss: 1.46186, acc: 0.37753


INFO:root:[0] Ed: 89600, train_loss: 1.45861, acc: 0.37940


[INFO 2025-03-02 19:03:39,581] [0] Ed: 89600, train_loss: 1.45861, acc: 0.37940


INFO:root:[0] Ed: 92800, train_loss: 1.45650, acc: 0.38099


[INFO 2025-03-02 19:04:05,579] [0] Ed: 92800, train_loss: 1.45650, acc: 0.38099


INFO:root:[0] Ed: 96000, train_loss: 1.45410, acc: 0.38249


[INFO 2025-03-02 19:04:31,567] [0] Ed: 96000, train_loss: 1.45410, acc: 0.38249


INFO:root:[0] Ed: 99200, train_loss: 1.45241, acc: 0.38350


[INFO 2025-03-02 19:04:57,549] [0] Ed: 99200, train_loss: 1.45241, acc: 0.38350


INFO:root:[0] Ed: 102400, train_loss: 1.44978, acc: 0.38515


[INFO 2025-03-02 19:05:23,516] [0] Ed: 102400, train_loss: 1.44978, acc: 0.38515


INFO:root:[0] Ed: 105600, train_loss: 1.44743, acc: 0.38692


[INFO 2025-03-02 19:05:49,424] [0] Ed: 105600, train_loss: 1.44743, acc: 0.38692


INFO:root:[0] Ed: 108800, train_loss: 1.44564, acc: 0.38797


[INFO 2025-03-02 19:06:15,393] [0] Ed: 108800, train_loss: 1.44564, acc: 0.38797


INFO:root:[0] Ed: 112000, train_loss: 1.44370, acc: 0.38927


[INFO 2025-03-02 19:06:41,366] [0] Ed: 112000, train_loss: 1.44370, acc: 0.38927


INFO:root:[0] Ed: 115200, train_loss: 1.44189, acc: 0.39030


[INFO 2025-03-02 19:07:07,240] [0] Ed: 115200, train_loss: 1.44189, acc: 0.39030


INFO:root:[0] Ed: 118400, train_loss: 1.43974, acc: 0.39138


[INFO 2025-03-02 19:07:33,220] [0] Ed: 118400, train_loss: 1.43974, acc: 0.39138


INFO:root:[0] Ed: 121600, train_loss: 1.43817, acc: 0.39243


[INFO 2025-03-02 19:07:59,114] [0] Ed: 121600, train_loss: 1.43817, acc: 0.39243


INFO:root:[0] Ed: 124800, train_loss: 1.43639, acc: 0.39353


[INFO 2025-03-02 19:08:25,099] [0] Ed: 124800, train_loss: 1.43639, acc: 0.39353


INFO:root:[0] Ed: 128000, train_loss: 1.43515, acc: 0.39439


[INFO 2025-03-02 19:08:51,061] [0] Ed: 128000, train_loss: 1.43515, acc: 0.39439


INFO:root:[0] Ed: 131200, train_loss: 1.43340, acc: 0.39532


[INFO 2025-03-02 19:09:16,977] [0] Ed: 131200, train_loss: 1.43340, acc: 0.39532


INFO:root:[0] Ed: 134400, train_loss: 1.43196, acc: 0.39628


[INFO 2025-03-02 19:09:42,992] [0] Ed: 134400, train_loss: 1.43196, acc: 0.39628


INFO:root:[0] Ed: 137600, train_loss: 1.43069, acc: 0.39719


[INFO 2025-03-02 19:10:08,974] [0] Ed: 137600, train_loss: 1.43069, acc: 0.39719


INFO:root:[0] Ed: 140800, train_loss: 1.42899, acc: 0.39834


[INFO 2025-03-02 19:10:34,884] [0] Ed: 140800, train_loss: 1.42899, acc: 0.39834


INFO:root:[0] Ed: 144000, train_loss: 1.42756, acc: 0.39904


[INFO 2025-03-02 19:11:00,871] [0] Ed: 144000, train_loss: 1.42756, acc: 0.39904


INFO:root:[0] Ed: 147200, train_loss: 1.42585, acc: 0.40013


[INFO 2025-03-02 19:11:26,982] [0] Ed: 147200, train_loss: 1.42585, acc: 0.40013


INFO:root:[0] Ed: 150400, train_loss: 1.42431, acc: 0.40075


[INFO 2025-03-02 19:11:52,959] [0] Ed: 150400, train_loss: 1.42431, acc: 0.40075


INFO:root:[0] Ed: 153600, train_loss: 1.42288, acc: 0.40149


[INFO 2025-03-02 19:12:18,891] [0] Ed: 153600, train_loss: 1.42288, acc: 0.40149


INFO:root:[0] Ed: 156800, train_loss: 1.42151, acc: 0.40235


[INFO 2025-03-02 19:12:44,901] [0] Ed: 156800, train_loss: 1.42151, acc: 0.40235


INFO:root:[0] Ed: 160000, train_loss: 1.42019, acc: 0.40322


[INFO 2025-03-02 19:13:10,763] [0] Ed: 160000, train_loss: 1.42019, acc: 0.40322


INFO:root:[0] Ed: 163200, train_loss: 1.41899, acc: 0.40379


[INFO 2025-03-02 19:13:36,773] [0] Ed: 163200, train_loss: 1.41899, acc: 0.40379


INFO:root:[0] Ed: 166400, train_loss: 1.41773, acc: 0.40479


[INFO 2025-03-02 19:14:02,830] [0] Ed: 166400, train_loss: 1.41773, acc: 0.40479


INFO:root:[0] Ed: 169600, train_loss: 1.41653, acc: 0.40560


[INFO 2025-03-02 19:14:28,825] [0] Ed: 169600, train_loss: 1.41653, acc: 0.40560


INFO:root:[0] Ed: 172800, train_loss: 1.41525, acc: 0.40647


[INFO 2025-03-02 19:14:54,739] [0] Ed: 172800, train_loss: 1.41525, acc: 0.40647


INFO:root:[0] Ed: 176000, train_loss: 1.41423, acc: 0.40714


[INFO 2025-03-02 19:15:20,757] [0] Ed: 176000, train_loss: 1.41423, acc: 0.40714


INFO:root:[0] Ed: 179200, train_loss: 1.41306, acc: 0.40776


[INFO 2025-03-02 19:15:46,769] [0] Ed: 179200, train_loss: 1.41306, acc: 0.40776


INFO:root:[0] Ed: 182400, train_loss: 1.41221, acc: 0.40832


[INFO 2025-03-02 19:16:12,755] [0] Ed: 182400, train_loss: 1.41221, acc: 0.40832


INFO:root:[0] Ed: 185600, train_loss: 1.41103, acc: 0.40897


[INFO 2025-03-02 19:16:38,672] [0] Ed: 185600, train_loss: 1.41103, acc: 0.40897


INFO:root:[0] Ed: 188800, train_loss: 1.40995, acc: 0.40949


[INFO 2025-03-02 19:17:04,651] [0] Ed: 188800, train_loss: 1.40995, acc: 0.40949


INFO:root:[0] Ed: 192000, train_loss: 1.40916, acc: 0.40979


[INFO 2025-03-02 19:17:30,620] [0] Ed: 192000, train_loss: 1.40916, acc: 0.40979


INFO:root:[0] Ed: 195200, train_loss: 1.40817, acc: 0.41032


[INFO 2025-03-02 19:17:56,551] [0] Ed: 195200, train_loss: 1.40817, acc: 0.41032


INFO:root:[0] Ed: 198400, train_loss: 1.40731, acc: 0.41100


[INFO 2025-03-02 19:18:22,563] [0] Ed: 198400, train_loss: 1.40731, acc: 0.41100


INFO:root:[0] Ed: 201600, train_loss: 1.40650, acc: 0.41145


[INFO 2025-03-02 19:18:48,443] [0] Ed: 201600, train_loss: 1.40650, acc: 0.41145


INFO:root:[0] Ed: 204800, train_loss: 1.40588, acc: 0.41186


[INFO 2025-03-02 19:19:14,416] [0] Ed: 204800, train_loss: 1.40588, acc: 0.41186


INFO:root:[0] Ed: 208000, train_loss: 1.40524, acc: 0.41230


[INFO 2025-03-02 19:19:40,408] [0] Ed: 208000, train_loss: 1.40524, acc: 0.41230


INFO:root:[0] Ed: 211200, train_loss: 1.40442, acc: 0.41277


[INFO 2025-03-02 19:20:06,326] [0] Ed: 211200, train_loss: 1.40442, acc: 0.41277


INFO:root:[0] Ed: 214400, train_loss: 1.40372, acc: 0.41333


[INFO 2025-03-02 19:20:32,288] [0] Ed: 214400, train_loss: 1.40372, acc: 0.41333


INFO:root:[0] Ed: 217600, train_loss: 1.40297, acc: 0.41365


[INFO 2025-03-02 19:20:58,219] [0] Ed: 217600, train_loss: 1.40297, acc: 0.41365


INFO:root:[0] Ed: 220800, train_loss: 1.40203, acc: 0.41410


[INFO 2025-03-02 19:21:24,418] [0] Ed: 220800, train_loss: 1.40203, acc: 0.41410


INFO:root:[0] Ed: 224000, train_loss: 1.40106, acc: 0.41463


[INFO 2025-03-02 19:21:50,380] [0] Ed: 224000, train_loss: 1.40106, acc: 0.41463


INFO:root:[0] Ed: 227200, train_loss: 1.40008, acc: 0.41526


[INFO 2025-03-02 19:22:16,315] [0] Ed: 227200, train_loss: 1.40008, acc: 0.41526


INFO:root:[0] Ed: 230400, train_loss: 1.39912, acc: 0.41590


[INFO 2025-03-02 19:22:42,285] [0] Ed: 230400, train_loss: 1.39912, acc: 0.41590


INFO:root:[0] Ed: 233600, train_loss: 1.39797, acc: 0.41658


[INFO 2025-03-02 19:23:08,247] [0] Ed: 233600, train_loss: 1.39797, acc: 0.41658


INFO:root:Training finish.


[INFO 2025-03-02 19:23:30,330] Training finish.


INFO:root:Model saved to /content/model/epoch-1.pt.


[INFO 2025-03-02 19:23:30,386] Model saved to /content/model/epoch-1.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-03-02 19:23:30,671] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.36322, acc: 0.45125


[INFO 2025-03-02 19:23:56,625] [0] Ed: 3200, train_loss: 1.36322, acc: 0.45125


INFO:root:[0] Ed: 6400, train_loss: 1.34776, acc: 0.45531


[INFO 2025-03-02 19:24:22,570] [0] Ed: 6400, train_loss: 1.34776, acc: 0.45531


INFO:root:[0] Ed: 9600, train_loss: 1.34390, acc: 0.45146


[INFO 2025-03-02 19:24:48,592] [0] Ed: 9600, train_loss: 1.34390, acc: 0.45146


INFO:root:[0] Ed: 12800, train_loss: 1.34202, acc: 0.45141


[INFO 2025-03-02 19:25:14,602] [0] Ed: 12800, train_loss: 1.34202, acc: 0.45141


INFO:root:[0] Ed: 16000, train_loss: 1.34370, acc: 0.45019


[INFO 2025-03-02 19:25:40,602] [0] Ed: 16000, train_loss: 1.34370, acc: 0.45019


INFO:root:[0] Ed: 19200, train_loss: 1.34374, acc: 0.45021


[INFO 2025-03-02 19:26:06,551] [0] Ed: 19200, train_loss: 1.34374, acc: 0.45021


INFO:root:[0] Ed: 22400, train_loss: 1.34305, acc: 0.44991


[INFO 2025-03-02 19:26:32,430] [0] Ed: 22400, train_loss: 1.34305, acc: 0.44991


INFO:root:[0] Ed: 25600, train_loss: 1.34376, acc: 0.44926


[INFO 2025-03-02 19:26:58,318] [0] Ed: 25600, train_loss: 1.34376, acc: 0.44926


INFO:root:[0] Ed: 28800, train_loss: 1.34246, acc: 0.45083


[INFO 2025-03-02 19:27:24,121] [0] Ed: 28800, train_loss: 1.34246, acc: 0.45083


INFO:root:[0] Ed: 32000, train_loss: 1.34191, acc: 0.45081


[INFO 2025-03-02 19:27:49,958] [0] Ed: 32000, train_loss: 1.34191, acc: 0.45081


INFO:root:[0] Ed: 35200, train_loss: 1.34347, acc: 0.44952


[INFO 2025-03-02 19:28:15,882] [0] Ed: 35200, train_loss: 1.34347, acc: 0.44952


INFO:root:[0] Ed: 38400, train_loss: 1.34188, acc: 0.45055


[INFO 2025-03-02 19:28:41,796] [0] Ed: 38400, train_loss: 1.34188, acc: 0.45055


INFO:root:[0] Ed: 41600, train_loss: 1.34026, acc: 0.45175


[INFO 2025-03-02 19:29:07,810] [0] Ed: 41600, train_loss: 1.34026, acc: 0.45175


INFO:root:[0] Ed: 44800, train_loss: 1.33942, acc: 0.45223


[INFO 2025-03-02 19:29:33,919] [0] Ed: 44800, train_loss: 1.33942, acc: 0.45223


INFO:root:[0] Ed: 48000, train_loss: 1.33982, acc: 0.45167


[INFO 2025-03-02 19:29:59,889] [0] Ed: 48000, train_loss: 1.33982, acc: 0.45167


INFO:root:[0] Ed: 51200, train_loss: 1.33882, acc: 0.45186


[INFO 2025-03-02 19:30:25,862] [0] Ed: 51200, train_loss: 1.33882, acc: 0.45186


INFO:root:[0] Ed: 54400, train_loss: 1.33780, acc: 0.45285


[INFO 2025-03-02 19:30:51,788] [0] Ed: 54400, train_loss: 1.33780, acc: 0.45285


INFO:root:[0] Ed: 57600, train_loss: 1.33682, acc: 0.45342


[INFO 2025-03-02 19:31:18,041] [0] Ed: 57600, train_loss: 1.33682, acc: 0.45342


INFO:root:[0] Ed: 60800, train_loss: 1.33648, acc: 0.45334


[INFO 2025-03-02 19:31:44,020] [0] Ed: 60800, train_loss: 1.33648, acc: 0.45334


INFO:root:[0] Ed: 64000, train_loss: 1.33522, acc: 0.45386


[INFO 2025-03-02 19:32:10,001] [0] Ed: 64000, train_loss: 1.33522, acc: 0.45386


INFO:root:[0] Ed: 67200, train_loss: 1.33450, acc: 0.45464


[INFO 2025-03-02 19:32:35,919] [0] Ed: 67200, train_loss: 1.33450, acc: 0.45464


INFO:root:[0] Ed: 70400, train_loss: 1.33364, acc: 0.45496


[INFO 2025-03-02 19:33:01,908] [0] Ed: 70400, train_loss: 1.33364, acc: 0.45496


INFO:root:[0] Ed: 73600, train_loss: 1.33325, acc: 0.45489


[INFO 2025-03-02 19:33:27,819] [0] Ed: 73600, train_loss: 1.33325, acc: 0.45489


INFO:root:[0] Ed: 76800, train_loss: 1.33225, acc: 0.45536


[INFO 2025-03-02 19:33:53,760] [0] Ed: 76800, train_loss: 1.33225, acc: 0.45536


INFO:root:[0] Ed: 80000, train_loss: 1.33245, acc: 0.45511


[INFO 2025-03-02 19:34:19,738] [0] Ed: 80000, train_loss: 1.33245, acc: 0.45511


INFO:root:[0] Ed: 83200, train_loss: 1.33147, acc: 0.45585


[INFO 2025-03-02 19:34:45,624] [0] Ed: 83200, train_loss: 1.33147, acc: 0.45585


INFO:root:[0] Ed: 86400, train_loss: 1.33095, acc: 0.45582


[INFO 2025-03-02 19:35:11,586] [0] Ed: 86400, train_loss: 1.33095, acc: 0.45582


INFO:root:[0] Ed: 89600, train_loss: 1.32974, acc: 0.45640


[INFO 2025-03-02 19:35:37,471] [0] Ed: 89600, train_loss: 1.32974, acc: 0.45640


INFO:root:[0] Ed: 92800, train_loss: 1.32986, acc: 0.45609


[INFO 2025-03-02 19:36:03,465] [0] Ed: 92800, train_loss: 1.32986, acc: 0.45609


INFO:root:[0] Ed: 96000, train_loss: 1.32896, acc: 0.45660


[INFO 2025-03-02 19:36:29,409] [0] Ed: 96000, train_loss: 1.32896, acc: 0.45660


INFO:root:[0] Ed: 99200, train_loss: 1.32893, acc: 0.45640


[INFO 2025-03-02 19:36:55,311] [0] Ed: 99200, train_loss: 1.32893, acc: 0.45640


INFO:root:[0] Ed: 102400, train_loss: 1.32792, acc: 0.45672


[INFO 2025-03-02 19:37:21,270] [0] Ed: 102400, train_loss: 1.32792, acc: 0.45672


INFO:root:[0] Ed: 105600, train_loss: 1.32672, acc: 0.45742


[INFO 2025-03-02 19:37:47,275] [0] Ed: 105600, train_loss: 1.32672, acc: 0.45742


INFO:root:[0] Ed: 108800, train_loss: 1.32654, acc: 0.45754


[INFO 2025-03-02 19:38:13,209] [0] Ed: 108800, train_loss: 1.32654, acc: 0.45754


INFO:root:[0] Ed: 112000, train_loss: 1.32643, acc: 0.45729


[INFO 2025-03-02 19:38:39,196] [0] Ed: 112000, train_loss: 1.32643, acc: 0.45729


INFO:root:[0] Ed: 115200, train_loss: 1.32633, acc: 0.45722


[INFO 2025-03-02 19:39:05,142] [0] Ed: 115200, train_loss: 1.32633, acc: 0.45722


INFO:root:[0] Ed: 118400, train_loss: 1.32544, acc: 0.45762


[INFO 2025-03-02 19:39:31,071] [0] Ed: 118400, train_loss: 1.32544, acc: 0.45762


INFO:root:[0] Ed: 121600, train_loss: 1.32539, acc: 0.45773


[INFO 2025-03-02 19:39:57,070] [0] Ed: 121600, train_loss: 1.32539, acc: 0.45773


INFO:root:[0] Ed: 124800, train_loss: 1.32510, acc: 0.45773


[INFO 2025-03-02 19:40:22,954] [0] Ed: 124800, train_loss: 1.32510, acc: 0.45773


INFO:root:[0] Ed: 128000, train_loss: 1.32527, acc: 0.45766


[INFO 2025-03-02 19:40:49,213] [0] Ed: 128000, train_loss: 1.32527, acc: 0.45766


INFO:root:[0] Ed: 131200, train_loss: 1.32479, acc: 0.45812


[INFO 2025-03-02 19:41:15,279] [0] Ed: 131200, train_loss: 1.32479, acc: 0.45812


INFO:root:[0] Ed: 134400, train_loss: 1.32475, acc: 0.45815


[INFO 2025-03-02 19:41:41,338] [0] Ed: 134400, train_loss: 1.32475, acc: 0.45815


INFO:root:[0] Ed: 137600, train_loss: 1.32483, acc: 0.45828


[INFO 2025-03-02 19:42:07,423] [0] Ed: 137600, train_loss: 1.32483, acc: 0.45828


INFO:root:[0] Ed: 140800, train_loss: 1.32430, acc: 0.45882


[INFO 2025-03-02 19:42:33,381] [0] Ed: 140800, train_loss: 1.32430, acc: 0.45882


INFO:root:[0] Ed: 144000, train_loss: 1.32393, acc: 0.45885


[INFO 2025-03-02 19:42:59,285] [0] Ed: 144000, train_loss: 1.32393, acc: 0.45885


INFO:root:[0] Ed: 147200, train_loss: 1.32338, acc: 0.45923


[INFO 2025-03-02 19:43:25,206] [0] Ed: 147200, train_loss: 1.32338, acc: 0.45923


INFO:root:[0] Ed: 150400, train_loss: 1.32274, acc: 0.45932


[INFO 2025-03-02 19:43:51,181] [0] Ed: 150400, train_loss: 1.32274, acc: 0.45932


INFO:root:[0] Ed: 153600, train_loss: 1.32232, acc: 0.45939


[INFO 2025-03-02 19:44:17,202] [0] Ed: 153600, train_loss: 1.32232, acc: 0.45939


INFO:root:[0] Ed: 156800, train_loss: 1.32181, acc: 0.45978


[INFO 2025-03-02 19:44:43,324] [0] Ed: 156800, train_loss: 1.32181, acc: 0.45978


INFO:root:[0] Ed: 160000, train_loss: 1.32146, acc: 0.45988


[INFO 2025-03-02 19:45:09,390] [0] Ed: 160000, train_loss: 1.32146, acc: 0.45988


INFO:root:[0] Ed: 163200, train_loss: 1.32118, acc: 0.45993


[INFO 2025-03-02 19:45:35,404] [0] Ed: 163200, train_loss: 1.32118, acc: 0.45993


INFO:root:[0] Ed: 166400, train_loss: 1.32094, acc: 0.46016


[INFO 2025-03-02 19:46:01,430] [0] Ed: 166400, train_loss: 1.32094, acc: 0.46016


INFO:root:[0] Ed: 169600, train_loss: 1.32075, acc: 0.46034


[INFO 2025-03-02 19:46:27,454] [0] Ed: 169600, train_loss: 1.32075, acc: 0.46034


INFO:root:[0] Ed: 172800, train_loss: 1.32023, acc: 0.46071


[INFO 2025-03-02 19:46:53,366] [0] Ed: 172800, train_loss: 1.32023, acc: 0.46071


INFO:root:[0] Ed: 176000, train_loss: 1.31997, acc: 0.46085


[INFO 2025-03-02 19:47:19,399] [0] Ed: 176000, train_loss: 1.31997, acc: 0.46085


INFO:root:[0] Ed: 179200, train_loss: 1.31972, acc: 0.46099


[INFO 2025-03-02 19:47:45,467] [0] Ed: 179200, train_loss: 1.31972, acc: 0.46099


INFO:root:[0] Ed: 182400, train_loss: 1.31959, acc: 0.46119


[INFO 2025-03-02 19:48:11,499] [0] Ed: 182400, train_loss: 1.31959, acc: 0.46119


INFO:root:[0] Ed: 185600, train_loss: 1.31920, acc: 0.46137


[INFO 2025-03-02 19:48:37,512] [0] Ed: 185600, train_loss: 1.31920, acc: 0.46137


INFO:root:[0] Ed: 188800, train_loss: 1.31885, acc: 0.46153


[INFO 2025-03-02 19:49:03,486] [0] Ed: 188800, train_loss: 1.31885, acc: 0.46153


INFO:root:[0] Ed: 192000, train_loss: 1.31882, acc: 0.46144


[INFO 2025-03-02 19:49:29,545] [0] Ed: 192000, train_loss: 1.31882, acc: 0.46144


INFO:root:[0] Ed: 195200, train_loss: 1.31865, acc: 0.46165


[INFO 2025-03-02 19:49:55,657] [0] Ed: 195200, train_loss: 1.31865, acc: 0.46165


INFO:root:[0] Ed: 198400, train_loss: 1.31848, acc: 0.46180


[INFO 2025-03-02 19:50:21,837] [0] Ed: 198400, train_loss: 1.31848, acc: 0.46180


INFO:root:[0] Ed: 201600, train_loss: 1.31824, acc: 0.46192


[INFO 2025-03-02 19:50:48,297] [0] Ed: 201600, train_loss: 1.31824, acc: 0.46192


INFO:root:[0] Ed: 204800, train_loss: 1.31822, acc: 0.46184


[INFO 2025-03-02 19:51:14,370] [0] Ed: 204800, train_loss: 1.31822, acc: 0.46184


INFO:root:[0] Ed: 208000, train_loss: 1.31830, acc: 0.46185


[INFO 2025-03-02 19:51:40,502] [0] Ed: 208000, train_loss: 1.31830, acc: 0.46185


INFO:root:[0] Ed: 211200, train_loss: 1.31815, acc: 0.46187


[INFO 2025-03-02 19:52:06,571] [0] Ed: 211200, train_loss: 1.31815, acc: 0.46187


INFO:root:[0] Ed: 214400, train_loss: 1.31802, acc: 0.46199


[INFO 2025-03-02 19:52:32,780] [0] Ed: 214400, train_loss: 1.31802, acc: 0.46199


INFO:root:[0] Ed: 217600, train_loss: 1.31779, acc: 0.46208


[INFO 2025-03-02 19:52:58,821] [0] Ed: 217600, train_loss: 1.31779, acc: 0.46208


INFO:root:[0] Ed: 220800, train_loss: 1.31741, acc: 0.46233


[INFO 2025-03-02 19:53:24,852] [0] Ed: 220800, train_loss: 1.31741, acc: 0.46233


INFO:root:[0] Ed: 224000, train_loss: 1.31704, acc: 0.46264


[INFO 2025-03-02 19:53:50,953] [0] Ed: 224000, train_loss: 1.31704, acc: 0.46264


INFO:root:[0] Ed: 227200, train_loss: 1.31659, acc: 0.46302


[INFO 2025-03-02 19:54:17,107] [0] Ed: 227200, train_loss: 1.31659, acc: 0.46302


INFO:root:[0] Ed: 230400, train_loss: 1.31619, acc: 0.46336


[INFO 2025-03-02 19:54:43,168] [0] Ed: 230400, train_loss: 1.31619, acc: 0.46336


INFO:root:[0] Ed: 233600, train_loss: 1.31563, acc: 0.46360


[INFO 2025-03-02 19:55:09,190] [0] Ed: 233600, train_loss: 1.31563, acc: 0.46360


INFO:root:Training finish.


[INFO 2025-03-02 19:55:31,142] Training finish.


INFO:root:Model saved to /content/model/epoch-2.pt.


[INFO 2025-03-02 19:55:31,209] Model saved to /content/model/epoch-2.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-03-02 19:55:31,519] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.32168, acc: 0.46937


[INFO 2025-03-02 19:55:57,568] [0] Ed: 3200, train_loss: 1.32168, acc: 0.46937


INFO:root:[0] Ed: 6400, train_loss: 1.30593, acc: 0.46719


[INFO 2025-03-02 19:56:23,596] [0] Ed: 6400, train_loss: 1.30593, acc: 0.46719


INFO:root:[0] Ed: 9600, train_loss: 1.30141, acc: 0.47104


[INFO 2025-03-02 19:56:49,605] [0] Ed: 9600, train_loss: 1.30141, acc: 0.47104


INFO:root:[0] Ed: 12800, train_loss: 1.29787, acc: 0.47297


[INFO 2025-03-02 19:57:15,612] [0] Ed: 12800, train_loss: 1.29787, acc: 0.47297


INFO:root:[0] Ed: 16000, train_loss: 1.30191, acc: 0.46988


[INFO 2025-03-02 19:57:41,676] [0] Ed: 16000, train_loss: 1.30191, acc: 0.46988


INFO:root:[0] Ed: 19200, train_loss: 1.30310, acc: 0.46932


[INFO 2025-03-02 19:58:07,720] [0] Ed: 19200, train_loss: 1.30310, acc: 0.46932


INFO:root:[0] Ed: 22400, train_loss: 1.30209, acc: 0.47103


[INFO 2025-03-02 19:58:33,796] [0] Ed: 22400, train_loss: 1.30209, acc: 0.47103


INFO:root:[0] Ed: 25600, train_loss: 1.30267, acc: 0.47070


[INFO 2025-03-02 19:58:59,753] [0] Ed: 25600, train_loss: 1.30267, acc: 0.47070


INFO:root:[0] Ed: 28800, train_loss: 1.30229, acc: 0.47132


[INFO 2025-03-02 19:59:25,786] [0] Ed: 28800, train_loss: 1.30229, acc: 0.47132


INFO:root:[0] Ed: 32000, train_loss: 1.30291, acc: 0.47069


[INFO 2025-03-02 19:59:51,810] [0] Ed: 32000, train_loss: 1.30291, acc: 0.47069


INFO:root:[0] Ed: 35200, train_loss: 1.30486, acc: 0.46963


[INFO 2025-03-02 20:00:17,854] [0] Ed: 35200, train_loss: 1.30486, acc: 0.46963


INFO:root:[0] Ed: 38400, train_loss: 1.30392, acc: 0.46964


[INFO 2025-03-02 20:00:44,186] [0] Ed: 38400, train_loss: 1.30392, acc: 0.46964


INFO:root:[0] Ed: 41600, train_loss: 1.30192, acc: 0.47084


[INFO 2025-03-02 20:01:10,249] [0] Ed: 41600, train_loss: 1.30192, acc: 0.47084


INFO:root:[0] Ed: 44800, train_loss: 1.30056, acc: 0.47158


[INFO 2025-03-02 20:01:36,283] [0] Ed: 44800, train_loss: 1.30056, acc: 0.47158


INFO:root:[0] Ed: 48000, train_loss: 1.30065, acc: 0.47117


[INFO 2025-03-02 20:02:02,220] [0] Ed: 48000, train_loss: 1.30065, acc: 0.47117


INFO:root:[0] Ed: 51200, train_loss: 1.29990, acc: 0.47148


[INFO 2025-03-02 20:02:28,220] [0] Ed: 51200, train_loss: 1.29990, acc: 0.47148


INFO:root:[0] Ed: 54400, train_loss: 1.29894, acc: 0.47211


[INFO 2025-03-02 20:02:54,313] [0] Ed: 54400, train_loss: 1.29894, acc: 0.47211


INFO:root:[0] Ed: 57600, train_loss: 1.29851, acc: 0.47220


[INFO 2025-03-02 20:03:20,326] [0] Ed: 57600, train_loss: 1.29851, acc: 0.47220


INFO:root:[0] Ed: 60800, train_loss: 1.29856, acc: 0.47260


[INFO 2025-03-02 20:03:46,344] [0] Ed: 60800, train_loss: 1.29856, acc: 0.47260


INFO:root:[0] Ed: 64000, train_loss: 1.29750, acc: 0.47364


[INFO 2025-03-02 20:04:12,285] [0] Ed: 64000, train_loss: 1.29750, acc: 0.47364


INFO:root:[0] Ed: 67200, train_loss: 1.29691, acc: 0.47400


[INFO 2025-03-02 20:04:38,206] [0] Ed: 67200, train_loss: 1.29691, acc: 0.47400


INFO:root:[0] Ed: 70400, train_loss: 1.29629, acc: 0.47476


[INFO 2025-03-02 20:05:04,256] [0] Ed: 70400, train_loss: 1.29629, acc: 0.47476


INFO:root:[0] Ed: 73600, train_loss: 1.29608, acc: 0.47457


[INFO 2025-03-02 20:05:30,340] [0] Ed: 73600, train_loss: 1.29608, acc: 0.47457


INFO:root:[0] Ed: 76800, train_loss: 1.29509, acc: 0.47548


[INFO 2025-03-02 20:05:56,323] [0] Ed: 76800, train_loss: 1.29509, acc: 0.47548


INFO:root:[0] Ed: 80000, train_loss: 1.29525, acc: 0.47577


[INFO 2025-03-02 20:06:22,387] [0] Ed: 80000, train_loss: 1.29525, acc: 0.47577


INFO:root:[0] Ed: 83200, train_loss: 1.29412, acc: 0.47659


[INFO 2025-03-02 20:06:48,454] [0] Ed: 83200, train_loss: 1.29412, acc: 0.47659


INFO:root:[0] Ed: 86400, train_loss: 1.29355, acc: 0.47662


[INFO 2025-03-02 20:07:14,485] [0] Ed: 86400, train_loss: 1.29355, acc: 0.47662


INFO:root:[0] Ed: 89600, train_loss: 1.29273, acc: 0.47695


[INFO 2025-03-02 20:07:40,510] [0] Ed: 89600, train_loss: 1.29273, acc: 0.47695


INFO:root:[0] Ed: 92800, train_loss: 1.29311, acc: 0.47658


[INFO 2025-03-02 20:08:06,402] [0] Ed: 92800, train_loss: 1.29311, acc: 0.47658


INFO:root:[0] Ed: 96000, train_loss: 1.29246, acc: 0.47698


[INFO 2025-03-02 20:08:32,351] [0] Ed: 96000, train_loss: 1.29246, acc: 0.47698


INFO:root:[0] Ed: 99200, train_loss: 1.29221, acc: 0.47655


[INFO 2025-03-02 20:08:58,315] [0] Ed: 99200, train_loss: 1.29221, acc: 0.47655


INFO:root:[0] Ed: 102400, train_loss: 1.29128, acc: 0.47674


[INFO 2025-03-02 20:09:24,277] [0] Ed: 102400, train_loss: 1.29128, acc: 0.47674


INFO:root:[0] Ed: 105600, train_loss: 1.29018, acc: 0.47736


[INFO 2025-03-02 20:09:50,202] [0] Ed: 105600, train_loss: 1.29018, acc: 0.47736


INFO:root:[0] Ed: 108800, train_loss: 1.29026, acc: 0.47724


[INFO 2025-03-02 20:10:16,423] [0] Ed: 108800, train_loss: 1.29026, acc: 0.47724


INFO:root:[0] Ed: 112000, train_loss: 1.29041, acc: 0.47708


[INFO 2025-03-02 20:10:42,398] [0] Ed: 112000, train_loss: 1.29041, acc: 0.47708


INFO:root:[0] Ed: 115200, train_loss: 1.29038, acc: 0.47707


[INFO 2025-03-02 20:11:08,316] [0] Ed: 115200, train_loss: 1.29038, acc: 0.47707


INFO:root:[0] Ed: 118400, train_loss: 1.28954, acc: 0.47759


[INFO 2025-03-02 20:11:34,307] [0] Ed: 118400, train_loss: 1.28954, acc: 0.47759


INFO:root:[0] Ed: 121600, train_loss: 1.28976, acc: 0.47752


[INFO 2025-03-02 20:12:00,260] [0] Ed: 121600, train_loss: 1.28976, acc: 0.47752


INFO:root:[0] Ed: 124800, train_loss: 1.28962, acc: 0.47756


[INFO 2025-03-02 20:12:26,177] [0] Ed: 124800, train_loss: 1.28962, acc: 0.47756


INFO:root:[0] Ed: 128000, train_loss: 1.28999, acc: 0.47740


[INFO 2025-03-02 20:12:52,119] [0] Ed: 128000, train_loss: 1.28999, acc: 0.47740


INFO:root:[0] Ed: 131200, train_loss: 1.28966, acc: 0.47764


[INFO 2025-03-02 20:13:18,103] [0] Ed: 131200, train_loss: 1.28966, acc: 0.47764


INFO:root:[0] Ed: 134400, train_loss: 1.28973, acc: 0.47758


[INFO 2025-03-02 20:13:44,039] [0] Ed: 134400, train_loss: 1.28973, acc: 0.47758


INFO:root:[0] Ed: 137600, train_loss: 1.28992, acc: 0.47766


[INFO 2025-03-02 20:14:10,015] [0] Ed: 137600, train_loss: 1.28992, acc: 0.47766


INFO:root:[0] Ed: 140800, train_loss: 1.28950, acc: 0.47812


[INFO 2025-03-02 20:14:35,979] [0] Ed: 140800, train_loss: 1.28950, acc: 0.47812


INFO:root:[0] Ed: 144000, train_loss: 1.28911, acc: 0.47791


[INFO 2025-03-02 20:15:01,907] [0] Ed: 144000, train_loss: 1.28911, acc: 0.47791


INFO:root:[0] Ed: 147200, train_loss: 1.28866, acc: 0.47811


[INFO 2025-03-02 20:15:27,879] [0] Ed: 147200, train_loss: 1.28866, acc: 0.47811


INFO:root:[0] Ed: 150400, train_loss: 1.28800, acc: 0.47827


[INFO 2025-03-02 20:15:53,899] [0] Ed: 150400, train_loss: 1.28800, acc: 0.47827


INFO:root:[0] Ed: 153600, train_loss: 1.28741, acc: 0.47826


[INFO 2025-03-02 20:16:19,799] [0] Ed: 153600, train_loss: 1.28741, acc: 0.47826


INFO:root:[0] Ed: 156800, train_loss: 1.28695, acc: 0.47842


[INFO 2025-03-02 20:16:45,772] [0] Ed: 156800, train_loss: 1.28695, acc: 0.47842


INFO:root:[0] Ed: 160000, train_loss: 1.28670, acc: 0.47860


[INFO 2025-03-02 20:17:11,692] [0] Ed: 160000, train_loss: 1.28670, acc: 0.47860


INFO:root:[0] Ed: 163200, train_loss: 1.28660, acc: 0.47869


[INFO 2025-03-02 20:17:37,707] [0] Ed: 163200, train_loss: 1.28660, acc: 0.47869


INFO:root:[0] Ed: 166400, train_loss: 1.28660, acc: 0.47872


[INFO 2025-03-02 20:18:03,666] [0] Ed: 166400, train_loss: 1.28660, acc: 0.47872


INFO:root:[0] Ed: 169600, train_loss: 1.28659, acc: 0.47892


[INFO 2025-03-02 20:18:29,761] [0] Ed: 169600, train_loss: 1.28659, acc: 0.47892


INFO:root:[0] Ed: 172800, train_loss: 1.28614, acc: 0.47903


[INFO 2025-03-02 20:18:55,715] [0] Ed: 172800, train_loss: 1.28614, acc: 0.47903


INFO:root:[0] Ed: 176000, train_loss: 1.28592, acc: 0.47931


[INFO 2025-03-02 20:19:21,697] [0] Ed: 176000, train_loss: 1.28592, acc: 0.47931


INFO:root:[0] Ed: 179200, train_loss: 1.28568, acc: 0.47950


[INFO 2025-03-02 20:19:47,692] [0] Ed: 179200, train_loss: 1.28568, acc: 0.47950


INFO:root:[0] Ed: 182400, train_loss: 1.28563, acc: 0.47954


[INFO 2025-03-02 20:20:13,869] [0] Ed: 182400, train_loss: 1.28563, acc: 0.47954


INFO:root:[0] Ed: 185600, train_loss: 1.28524, acc: 0.47976


[INFO 2025-03-02 20:20:39,906] [0] Ed: 185600, train_loss: 1.28524, acc: 0.47976


INFO:root:[0] Ed: 188800, train_loss: 1.28506, acc: 0.47993


[INFO 2025-03-02 20:21:05,919] [0] Ed: 188800, train_loss: 1.28506, acc: 0.47993


INFO:root:[0] Ed: 192000, train_loss: 1.28516, acc: 0.47976


[INFO 2025-03-02 20:21:31,967] [0] Ed: 192000, train_loss: 1.28516, acc: 0.47976


INFO:root:[0] Ed: 195200, train_loss: 1.28507, acc: 0.47994


[INFO 2025-03-02 20:21:58,047] [0] Ed: 195200, train_loss: 1.28507, acc: 0.47994


INFO:root:[0] Ed: 198400, train_loss: 1.28506, acc: 0.48010


[INFO 2025-03-02 20:22:24,163] [0] Ed: 198400, train_loss: 1.28506, acc: 0.48010


INFO:root:[0] Ed: 201600, train_loss: 1.28490, acc: 0.48020


[INFO 2025-03-02 20:22:50,263] [0] Ed: 201600, train_loss: 1.28490, acc: 0.48020


INFO:root:[0] Ed: 204800, train_loss: 1.28499, acc: 0.48002


[INFO 2025-03-02 20:23:16,284] [0] Ed: 204800, train_loss: 1.28499, acc: 0.48002


INFO:root:[0] Ed: 208000, train_loss: 1.28510, acc: 0.48000


[INFO 2025-03-02 20:23:42,372] [0] Ed: 208000, train_loss: 1.28510, acc: 0.48000


INFO:root:[0] Ed: 211200, train_loss: 1.28503, acc: 0.48004


[INFO 2025-03-02 20:24:08,524] [0] Ed: 211200, train_loss: 1.28503, acc: 0.48004


INFO:root:[0] Ed: 214400, train_loss: 1.28485, acc: 0.48026


[INFO 2025-03-02 20:24:34,598] [0] Ed: 214400, train_loss: 1.28485, acc: 0.48026


INFO:root:[0] Ed: 217600, train_loss: 1.28464, acc: 0.48044


[INFO 2025-03-02 20:25:00,579] [0] Ed: 217600, train_loss: 1.28464, acc: 0.48044


INFO:root:[0] Ed: 220800, train_loss: 1.28436, acc: 0.48060


[INFO 2025-03-02 20:25:26,533] [0] Ed: 220800, train_loss: 1.28436, acc: 0.48060


INFO:root:[0] Ed: 224000, train_loss: 1.28404, acc: 0.48079


[INFO 2025-03-02 20:25:52,463] [0] Ed: 224000, train_loss: 1.28404, acc: 0.48079


INFO:root:[0] Ed: 227200, train_loss: 1.28369, acc: 0.48086


[INFO 2025-03-02 20:26:18,492] [0] Ed: 227200, train_loss: 1.28369, acc: 0.48086


INFO:root:[0] Ed: 230400, train_loss: 1.28336, acc: 0.48108


[INFO 2025-03-02 20:26:44,511] [0] Ed: 230400, train_loss: 1.28336, acc: 0.48108


INFO:root:[0] Ed: 233600, train_loss: 1.28291, acc: 0.48136


[INFO 2025-03-02 20:27:10,478] [0] Ed: 233600, train_loss: 1.28291, acc: 0.48136


INFO:root:Training finish.


[INFO 2025-03-02 20:27:32,366] Training finish.


INFO:root:Model saved to /content/model/epoch-3.pt.


[INFO 2025-03-02 20:27:32,407] Model saved to /content/model/epoch-3.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-03-02 20:27:32,690] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.29305, acc: 0.48562


[INFO 2025-03-02 20:27:58,613] [0] Ed: 3200, train_loss: 1.29305, acc: 0.48562


INFO:root:[0] Ed: 6400, train_loss: 1.27754, acc: 0.48766


[INFO 2025-03-02 20:28:24,608] [0] Ed: 6400, train_loss: 1.27754, acc: 0.48766


INFO:root:[0] Ed: 9600, train_loss: 1.27713, acc: 0.48500


[INFO 2025-03-02 20:28:50,570] [0] Ed: 9600, train_loss: 1.27713, acc: 0.48500


INFO:root:[0] Ed: 12800, train_loss: 1.27346, acc: 0.48750


[INFO 2025-03-02 20:29:16,456] [0] Ed: 12800, train_loss: 1.27346, acc: 0.48750


INFO:root:[0] Ed: 16000, train_loss: 1.27835, acc: 0.48306


[INFO 2025-03-02 20:29:42,684] [0] Ed: 16000, train_loss: 1.27835, acc: 0.48306


INFO:root:[0] Ed: 19200, train_loss: 1.27901, acc: 0.48234


[INFO 2025-03-02 20:30:08,558] [0] Ed: 19200, train_loss: 1.27901, acc: 0.48234


INFO:root:[0] Ed: 22400, train_loss: 1.27803, acc: 0.48438


[INFO 2025-03-02 20:30:34,577] [0] Ed: 22400, train_loss: 1.27803, acc: 0.48438


INFO:root:[0] Ed: 25600, train_loss: 1.27810, acc: 0.48398


[INFO 2025-03-02 20:31:00,539] [0] Ed: 25600, train_loss: 1.27810, acc: 0.48398


INFO:root:[0] Ed: 28800, train_loss: 1.27708, acc: 0.48469


[INFO 2025-03-02 20:31:26,558] [0] Ed: 28800, train_loss: 1.27708, acc: 0.48469


INFO:root:[0] Ed: 32000, train_loss: 1.27773, acc: 0.48384


[INFO 2025-03-02 20:31:52,489] [0] Ed: 32000, train_loss: 1.27773, acc: 0.48384


INFO:root:[0] Ed: 35200, train_loss: 1.28003, acc: 0.48202


[INFO 2025-03-02 20:32:18,484] [0] Ed: 35200, train_loss: 1.28003, acc: 0.48202


INFO:root:[0] Ed: 38400, train_loss: 1.27855, acc: 0.48305


[INFO 2025-03-02 20:32:44,388] [0] Ed: 38400, train_loss: 1.27855, acc: 0.48305


INFO:root:[0] Ed: 41600, train_loss: 1.27669, acc: 0.48389


[INFO 2025-03-02 20:33:10,426] [0] Ed: 41600, train_loss: 1.27669, acc: 0.48389


INFO:root:[0] Ed: 44800, train_loss: 1.27587, acc: 0.48404


[INFO 2025-03-02 20:33:36,385] [0] Ed: 44800, train_loss: 1.27587, acc: 0.48404


INFO:root:[0] Ed: 48000, train_loss: 1.27583, acc: 0.48390


[INFO 2025-03-02 20:34:02,365] [0] Ed: 48000, train_loss: 1.27583, acc: 0.48390


INFO:root:[0] Ed: 51200, train_loss: 1.27472, acc: 0.48428


[INFO 2025-03-02 20:34:28,356] [0] Ed: 51200, train_loss: 1.27472, acc: 0.48428


INFO:root:[0] Ed: 54400, train_loss: 1.27373, acc: 0.48471


[INFO 2025-03-02 20:34:54,335] [0] Ed: 54400, train_loss: 1.27373, acc: 0.48471


INFO:root:[0] Ed: 57600, train_loss: 1.27328, acc: 0.48470


[INFO 2025-03-02 20:35:20,293] [0] Ed: 57600, train_loss: 1.27328, acc: 0.48470


INFO:root:[0] Ed: 60800, train_loss: 1.27357, acc: 0.48462


[INFO 2025-03-02 20:35:46,250] [0] Ed: 60800, train_loss: 1.27357, acc: 0.48462


INFO:root:[0] Ed: 64000, train_loss: 1.27259, acc: 0.48556


[INFO 2025-03-02 20:36:12,277] [0] Ed: 64000, train_loss: 1.27259, acc: 0.48556


INFO:root:[0] Ed: 67200, train_loss: 1.27216, acc: 0.48644


[INFO 2025-03-02 20:36:38,289] [0] Ed: 67200, train_loss: 1.27216, acc: 0.48644


INFO:root:[0] Ed: 70400, train_loss: 1.27128, acc: 0.48703


[INFO 2025-03-02 20:37:04,274] [0] Ed: 70400, train_loss: 1.27128, acc: 0.48703


INFO:root:[0] Ed: 73600, train_loss: 1.27142, acc: 0.48686


[INFO 2025-03-02 20:37:30,257] [0] Ed: 73600, train_loss: 1.27142, acc: 0.48686


INFO:root:[0] Ed: 76800, train_loss: 1.27041, acc: 0.48734


[INFO 2025-03-02 20:37:56,215] [0] Ed: 76800, train_loss: 1.27041, acc: 0.48734


INFO:root:[0] Ed: 80000, train_loss: 1.27075, acc: 0.48734


[INFO 2025-03-02 20:38:22,134] [0] Ed: 80000, train_loss: 1.27075, acc: 0.48734


INFO:root:[0] Ed: 83200, train_loss: 1.26970, acc: 0.48812


[INFO 2025-03-02 20:38:48,069] [0] Ed: 83200, train_loss: 1.26970, acc: 0.48812


INFO:root:[0] Ed: 86400, train_loss: 1.26908, acc: 0.48825


[INFO 2025-03-02 20:39:14,067] [0] Ed: 86400, train_loss: 1.26908, acc: 0.48825


INFO:root:[0] Ed: 89600, train_loss: 1.26811, acc: 0.48860


[INFO 2025-03-02 20:39:40,231] [0] Ed: 89600, train_loss: 1.26811, acc: 0.48860


INFO:root:[0] Ed: 92800, train_loss: 1.26834, acc: 0.48820


[INFO 2025-03-02 20:40:06,194] [0] Ed: 92800, train_loss: 1.26834, acc: 0.48820


INFO:root:[0] Ed: 96000, train_loss: 1.26750, acc: 0.48864


[INFO 2025-03-02 20:40:32,203] [0] Ed: 96000, train_loss: 1.26750, acc: 0.48864


INFO:root:[0] Ed: 99200, train_loss: 1.26725, acc: 0.48844


[INFO 2025-03-02 20:40:58,118] [0] Ed: 99200, train_loss: 1.26725, acc: 0.48844


INFO:root:[0] Ed: 102400, train_loss: 1.26604, acc: 0.48884


[INFO 2025-03-02 20:41:24,118] [0] Ed: 102400, train_loss: 1.26604, acc: 0.48884


INFO:root:[0] Ed: 105600, train_loss: 1.26491, acc: 0.48937


[INFO 2025-03-02 20:41:50,109] [0] Ed: 105600, train_loss: 1.26491, acc: 0.48937


INFO:root:[0] Ed: 108800, train_loss: 1.26488, acc: 0.48951


[INFO 2025-03-02 20:42:16,055] [0] Ed: 108800, train_loss: 1.26488, acc: 0.48951


INFO:root:[0] Ed: 112000, train_loss: 1.26514, acc: 0.48929


[INFO 2025-03-02 20:42:41,975] [0] Ed: 112000, train_loss: 1.26514, acc: 0.48929


INFO:root:[0] Ed: 115200, train_loss: 1.26504, acc: 0.48927


[INFO 2025-03-02 20:43:07,941] [0] Ed: 115200, train_loss: 1.26504, acc: 0.48927


INFO:root:[0] Ed: 118400, train_loss: 1.26411, acc: 0.48973


[INFO 2025-03-02 20:43:33,848] [0] Ed: 118400, train_loss: 1.26411, acc: 0.48973


INFO:root:[0] Ed: 121600, train_loss: 1.26414, acc: 0.48981


[INFO 2025-03-02 20:43:59,825] [0] Ed: 121600, train_loss: 1.26414, acc: 0.48981


INFO:root:[0] Ed: 124800, train_loss: 1.26412, acc: 0.48977


[INFO 2025-03-02 20:44:25,828] [0] Ed: 124800, train_loss: 1.26412, acc: 0.48977


INFO:root:[0] Ed: 128000, train_loss: 1.26453, acc: 0.48953


[INFO 2025-03-02 20:44:51,752] [0] Ed: 128000, train_loss: 1.26453, acc: 0.48953


INFO:root:[0] Ed: 131200, train_loss: 1.26423, acc: 0.48992


[INFO 2025-03-02 20:45:17,765] [0] Ed: 131200, train_loss: 1.26423, acc: 0.48992


INFO:root:[0] Ed: 134400, train_loss: 1.26434, acc: 0.48988


[INFO 2025-03-02 20:45:43,765] [0] Ed: 134400, train_loss: 1.26434, acc: 0.48988


INFO:root:[0] Ed: 137600, train_loss: 1.26451, acc: 0.48996


[INFO 2025-03-02 20:46:09,740] [0] Ed: 137600, train_loss: 1.26451, acc: 0.48996


INFO:root:[0] Ed: 140800, train_loss: 1.26402, acc: 0.49056


[INFO 2025-03-02 20:46:35,672] [0] Ed: 140800, train_loss: 1.26402, acc: 0.49056


INFO:root:[0] Ed: 144000, train_loss: 1.26377, acc: 0.49035


[INFO 2025-03-02 20:47:01,707] [0] Ed: 144000, train_loss: 1.26377, acc: 0.49035


INFO:root:[0] Ed: 147200, train_loss: 1.26337, acc: 0.49063


[INFO 2025-03-02 20:47:27,642] [0] Ed: 147200, train_loss: 1.26337, acc: 0.49063


INFO:root:[0] Ed: 150400, train_loss: 1.26272, acc: 0.49070


[INFO 2025-03-02 20:47:53,601] [0] Ed: 150400, train_loss: 1.26272, acc: 0.49070


INFO:root:[0] Ed: 153600, train_loss: 1.26217, acc: 0.49083


[INFO 2025-03-02 20:48:19,588] [0] Ed: 153600, train_loss: 1.26217, acc: 0.49083


INFO:root:[0] Ed: 156800, train_loss: 1.26171, acc: 0.49104


[INFO 2025-03-02 20:48:45,480] [0] Ed: 156800, train_loss: 1.26171, acc: 0.49104


INFO:root:[0] Ed: 160000, train_loss: 1.26136, acc: 0.49129


[INFO 2025-03-02 20:49:11,740] [0] Ed: 160000, train_loss: 1.26136, acc: 0.49129


INFO:root:[0] Ed: 163200, train_loss: 1.26120, acc: 0.49117


[INFO 2025-03-02 20:49:37,688] [0] Ed: 163200, train_loss: 1.26120, acc: 0.49117


INFO:root:[0] Ed: 166400, train_loss: 1.26136, acc: 0.49102


[INFO 2025-03-02 20:50:03,552] [0] Ed: 166400, train_loss: 1.26136, acc: 0.49102


INFO:root:[0] Ed: 169600, train_loss: 1.26147, acc: 0.49107


[INFO 2025-03-02 20:50:29,417] [0] Ed: 169600, train_loss: 1.26147, acc: 0.49107


INFO:root:[0] Ed: 172800, train_loss: 1.26110, acc: 0.49135


[INFO 2025-03-02 20:50:55,354] [0] Ed: 172800, train_loss: 1.26110, acc: 0.49135


INFO:root:[0] Ed: 176000, train_loss: 1.26085, acc: 0.49159


[INFO 2025-03-02 20:51:21,247] [0] Ed: 176000, train_loss: 1.26085, acc: 0.49159


INFO:root:[0] Ed: 179200, train_loss: 1.26071, acc: 0.49165


[INFO 2025-03-02 20:51:47,265] [0] Ed: 179200, train_loss: 1.26071, acc: 0.49165


INFO:root:[0] Ed: 182400, train_loss: 1.26070, acc: 0.49179


[INFO 2025-03-02 20:52:13,170] [0] Ed: 182400, train_loss: 1.26070, acc: 0.49179


INFO:root:[0] Ed: 185600, train_loss: 1.26028, acc: 0.49197


[INFO 2025-03-02 20:52:39,193] [0] Ed: 185600, train_loss: 1.26028, acc: 0.49197


INFO:root:[0] Ed: 188800, train_loss: 1.26005, acc: 0.49208


[INFO 2025-03-02 20:53:05,190] [0] Ed: 188800, train_loss: 1.26005, acc: 0.49208


INFO:root:[0] Ed: 192000, train_loss: 1.26016, acc: 0.49201


[INFO 2025-03-02 20:53:31,096] [0] Ed: 192000, train_loss: 1.26016, acc: 0.49201


INFO:root:[0] Ed: 195200, train_loss: 1.26000, acc: 0.49211


[INFO 2025-03-02 20:53:57,104] [0] Ed: 195200, train_loss: 1.26000, acc: 0.49211


INFO:root:[0] Ed: 198400, train_loss: 1.26008, acc: 0.49213


[INFO 2025-03-02 20:54:23,039] [0] Ed: 198400, train_loss: 1.26008, acc: 0.49213


INFO:root:[0] Ed: 201600, train_loss: 1.25980, acc: 0.49233


[INFO 2025-03-02 20:54:48,919] [0] Ed: 201600, train_loss: 1.25980, acc: 0.49233


INFO:root:[0] Ed: 204800, train_loss: 1.25995, acc: 0.49228


[INFO 2025-03-02 20:55:14,908] [0] Ed: 204800, train_loss: 1.25995, acc: 0.49228


INFO:root:[0] Ed: 208000, train_loss: 1.26010, acc: 0.49231


[INFO 2025-03-02 20:55:40,895] [0] Ed: 208000, train_loss: 1.26010, acc: 0.49231


INFO:root:[0] Ed: 211200, train_loss: 1.25996, acc: 0.49230


[INFO 2025-03-02 20:56:06,805] [0] Ed: 211200, train_loss: 1.25996, acc: 0.49230


INFO:root:[0] Ed: 214400, train_loss: 1.25991, acc: 0.49235


[INFO 2025-03-02 20:56:32,771] [0] Ed: 214400, train_loss: 1.25991, acc: 0.49235


INFO:root:[0] Ed: 217600, train_loss: 1.25969, acc: 0.49242


[INFO 2025-03-02 20:56:58,759] [0] Ed: 217600, train_loss: 1.25969, acc: 0.49242


INFO:root:[0] Ed: 220800, train_loss: 1.25945, acc: 0.49256


[INFO 2025-03-02 20:57:24,806] [0] Ed: 220800, train_loss: 1.25945, acc: 0.49256


INFO:root:[0] Ed: 224000, train_loss: 1.25912, acc: 0.49279


[INFO 2025-03-02 20:57:50,730] [0] Ed: 224000, train_loss: 1.25912, acc: 0.49279


INFO:root:[0] Ed: 227200, train_loss: 1.25888, acc: 0.49294


[INFO 2025-03-02 20:58:16,658] [0] Ed: 227200, train_loss: 1.25888, acc: 0.49294


INFO:root:[0] Ed: 230400, train_loss: 1.25858, acc: 0.49310


[INFO 2025-03-02 20:58:42,649] [0] Ed: 230400, train_loss: 1.25858, acc: 0.49310


INFO:root:[0] Ed: 233600, train_loss: 1.25819, acc: 0.49336


[INFO 2025-03-02 20:59:08,891] [0] Ed: 233600, train_loss: 1.25819, acc: 0.49336


INFO:root:Training finish.


[INFO 2025-03-02 20:59:30,953] Training finish.


INFO:root:Model saved to /content/model/epoch-4.pt.


[INFO 2025-03-02 20:59:30,997] Model saved to /content/model/epoch-4.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-03-02 20:59:31,283] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.27580, acc: 0.49531


[INFO 2025-03-02 20:59:57,250] [0] Ed: 3200, train_loss: 1.27580, acc: 0.49531


INFO:root:[0] Ed: 6400, train_loss: 1.25757, acc: 0.50266


[INFO 2025-03-02 21:00:23,234] [0] Ed: 6400, train_loss: 1.25757, acc: 0.50266


INFO:root:[0] Ed: 9600, train_loss: 1.25656, acc: 0.50208


[INFO 2025-03-02 21:00:49,233] [0] Ed: 9600, train_loss: 1.25656, acc: 0.50208


INFO:root:[0] Ed: 12800, train_loss: 1.25226, acc: 0.50258


[INFO 2025-03-02 21:01:15,273] [0] Ed: 12800, train_loss: 1.25226, acc: 0.50258


INFO:root:[0] Ed: 16000, train_loss: 1.25657, acc: 0.49888


[INFO 2025-03-02 21:01:41,239] [0] Ed: 16000, train_loss: 1.25657, acc: 0.49888


INFO:root:[0] Ed: 19200, train_loss: 1.25660, acc: 0.49792


[INFO 2025-03-02 21:02:07,188] [0] Ed: 19200, train_loss: 1.25660, acc: 0.49792


INFO:root:[0] Ed: 22400, train_loss: 1.25540, acc: 0.49830


[INFO 2025-03-02 21:02:33,229] [0] Ed: 22400, train_loss: 1.25540, acc: 0.49830


INFO:root:[0] Ed: 25600, train_loss: 1.25469, acc: 0.49793


[INFO 2025-03-02 21:02:59,223] [0] Ed: 25600, train_loss: 1.25469, acc: 0.49793


INFO:root:[0] Ed: 28800, train_loss: 1.25479, acc: 0.49715


[INFO 2025-03-02 21:03:25,104] [0] Ed: 28800, train_loss: 1.25479, acc: 0.49715


INFO:root:[0] Ed: 32000, train_loss: 1.25549, acc: 0.49584


[INFO 2025-03-02 21:03:51,056] [0] Ed: 32000, train_loss: 1.25549, acc: 0.49584


INFO:root:[0] Ed: 35200, train_loss: 1.25723, acc: 0.49437


[INFO 2025-03-02 21:04:17,050] [0] Ed: 35200, train_loss: 1.25723, acc: 0.49437


INFO:root:[0] Ed: 38400, train_loss: 1.25530, acc: 0.49521


[INFO 2025-03-02 21:04:43,078] [0] Ed: 38400, train_loss: 1.25530, acc: 0.49521


INFO:root:[0] Ed: 41600, train_loss: 1.25311, acc: 0.49615


[INFO 2025-03-02 21:05:09,038] [0] Ed: 41600, train_loss: 1.25311, acc: 0.49615


INFO:root:[0] Ed: 44800, train_loss: 1.25243, acc: 0.49627


[INFO 2025-03-02 21:05:35,046] [0] Ed: 44800, train_loss: 1.25243, acc: 0.49627


INFO:root:[0] Ed: 48000, train_loss: 1.25192, acc: 0.49646


[INFO 2025-03-02 21:06:01,014] [0] Ed: 48000, train_loss: 1.25192, acc: 0.49646


INFO:root:[0] Ed: 51200, train_loss: 1.25144, acc: 0.49654


[INFO 2025-03-02 21:06:26,934] [0] Ed: 51200, train_loss: 1.25144, acc: 0.49654


INFO:root:[0] Ed: 54400, train_loss: 1.25020, acc: 0.49704


[INFO 2025-03-02 21:06:52,917] [0] Ed: 54400, train_loss: 1.25020, acc: 0.49704


INFO:root:[0] Ed: 57600, train_loss: 1.24981, acc: 0.49710


[INFO 2025-03-02 21:07:18,904] [0] Ed: 57600, train_loss: 1.24981, acc: 0.49710


INFO:root:[0] Ed: 60800, train_loss: 1.25006, acc: 0.49702


[INFO 2025-03-02 21:07:44,838] [0] Ed: 60800, train_loss: 1.25006, acc: 0.49702


INFO:root:[0] Ed: 64000, train_loss: 1.24915, acc: 0.49783


[INFO 2025-03-02 21:08:10,864] [0] Ed: 64000, train_loss: 1.24915, acc: 0.49783


INFO:root:[0] Ed: 67200, train_loss: 1.24849, acc: 0.49875


[INFO 2025-03-02 21:08:36,838] [0] Ed: 67200, train_loss: 1.24849, acc: 0.49875


INFO:root:[0] Ed: 70400, train_loss: 1.24768, acc: 0.49932


[INFO 2025-03-02 21:09:02,968] [0] Ed: 70400, train_loss: 1.24768, acc: 0.49932


INFO:root:[0] Ed: 73600, train_loss: 1.24775, acc: 0.49895


[INFO 2025-03-02 21:09:28,954] [0] Ed: 73600, train_loss: 1.24775, acc: 0.49895


INFO:root:[0] Ed: 76800, train_loss: 1.24698, acc: 0.49954


[INFO 2025-03-02 21:09:54,906] [0] Ed: 76800, train_loss: 1.24698, acc: 0.49954


INFO:root:[0] Ed: 80000, train_loss: 1.24752, acc: 0.49951


[INFO 2025-03-02 21:10:20,862] [0] Ed: 80000, train_loss: 1.24752, acc: 0.49951


INFO:root:[0] Ed: 83200, train_loss: 1.24687, acc: 0.49993


[INFO 2025-03-02 21:10:46,860] [0] Ed: 83200, train_loss: 1.24687, acc: 0.49993


INFO:root:[0] Ed: 86400, train_loss: 1.24628, acc: 0.49997


[INFO 2025-03-02 21:11:12,885] [0] Ed: 86400, train_loss: 1.24628, acc: 0.49997


INFO:root:[0] Ed: 89600, train_loss: 1.24540, acc: 0.50044


[INFO 2025-03-02 21:11:38,879] [0] Ed: 89600, train_loss: 1.24540, acc: 0.50044


INFO:root:[0] Ed: 92800, train_loss: 1.24593, acc: 0.50031


[INFO 2025-03-02 21:12:04,825] [0] Ed: 92800, train_loss: 1.24593, acc: 0.50031


INFO:root:[0] Ed: 96000, train_loss: 1.24516, acc: 0.50070


[INFO 2025-03-02 21:12:30,846] [0] Ed: 96000, train_loss: 1.24516, acc: 0.50070


INFO:root:[0] Ed: 99200, train_loss: 1.24490, acc: 0.50045


[INFO 2025-03-02 21:12:56,875] [0] Ed: 99200, train_loss: 1.24490, acc: 0.50045


INFO:root:[0] Ed: 102400, train_loss: 1.24408, acc: 0.50051


[INFO 2025-03-02 21:13:22,780] [0] Ed: 102400, train_loss: 1.24408, acc: 0.50051


INFO:root:[0] Ed: 105600, train_loss: 1.24282, acc: 0.50101


[INFO 2025-03-02 21:13:48,756] [0] Ed: 105600, train_loss: 1.24282, acc: 0.50101


INFO:root:[0] Ed: 108800, train_loss: 1.24268, acc: 0.50124


[INFO 2025-03-02 21:14:14,755] [0] Ed: 108800, train_loss: 1.24268, acc: 0.50124


INFO:root:[0] Ed: 112000, train_loss: 1.24287, acc: 0.50089


[INFO 2025-03-02 21:14:40,653] [0] Ed: 112000, train_loss: 1.24287, acc: 0.50089


INFO:root:[0] Ed: 115200, train_loss: 1.24269, acc: 0.50089


[INFO 2025-03-02 21:15:06,657] [0] Ed: 115200, train_loss: 1.24269, acc: 0.50089


INFO:root:[0] Ed: 118400, train_loss: 1.24169, acc: 0.50147


[INFO 2025-03-02 21:15:32,663] [0] Ed: 118400, train_loss: 1.24169, acc: 0.50147


INFO:root:[0] Ed: 121600, train_loss: 1.24145, acc: 0.50157


[INFO 2025-03-02 21:15:58,588] [0] Ed: 121600, train_loss: 1.24145, acc: 0.50157


INFO:root:[0] Ed: 124800, train_loss: 1.24137, acc: 0.50149


[INFO 2025-03-02 21:16:24,536] [0] Ed: 124800, train_loss: 1.24137, acc: 0.50149


INFO:root:[0] Ed: 128000, train_loss: 1.24178, acc: 0.50126


[INFO 2025-03-02 21:16:50,541] [0] Ed: 128000, train_loss: 1.24178, acc: 0.50126


INFO:root:[0] Ed: 131200, train_loss: 1.24150, acc: 0.50146


[INFO 2025-03-02 21:17:16,514] [0] Ed: 131200, train_loss: 1.24150, acc: 0.50146


INFO:root:[0] Ed: 134400, train_loss: 1.24145, acc: 0.50158


[INFO 2025-03-02 21:17:42,511] [0] Ed: 134400, train_loss: 1.24145, acc: 0.50158


INFO:root:[0] Ed: 137600, train_loss: 1.24153, acc: 0.50149


[INFO 2025-03-02 21:18:08,504] [0] Ed: 137600, train_loss: 1.24153, acc: 0.50149


INFO:root:[0] Ed: 140800, train_loss: 1.24098, acc: 0.50197


[INFO 2025-03-02 21:18:34,751] [0] Ed: 140800, train_loss: 1.24098, acc: 0.50197


INFO:root:[0] Ed: 144000, train_loss: 1.24073, acc: 0.50174


[INFO 2025-03-02 21:19:00,737] [0] Ed: 144000, train_loss: 1.24073, acc: 0.50174


INFO:root:[0] Ed: 147200, train_loss: 1.24035, acc: 0.50198


[INFO 2025-03-02 21:19:26,644] [0] Ed: 147200, train_loss: 1.24035, acc: 0.50198


INFO:root:[0] Ed: 150400, train_loss: 1.23949, acc: 0.50220


[INFO 2025-03-02 21:19:52,624] [0] Ed: 150400, train_loss: 1.23949, acc: 0.50220


INFO:root:[0] Ed: 153600, train_loss: 1.23895, acc: 0.50228


[INFO 2025-03-02 21:20:18,629] [0] Ed: 153600, train_loss: 1.23895, acc: 0.50228


INFO:root:[0] Ed: 156800, train_loss: 1.23850, acc: 0.50239


[INFO 2025-03-02 21:20:44,532] [0] Ed: 156800, train_loss: 1.23850, acc: 0.50239


INFO:root:[0] Ed: 160000, train_loss: 1.23839, acc: 0.50266


[INFO 2025-03-02 21:21:10,521] [0] Ed: 160000, train_loss: 1.23839, acc: 0.50266


INFO:root:[0] Ed: 163200, train_loss: 1.23808, acc: 0.50275


[INFO 2025-03-02 21:21:36,505] [0] Ed: 163200, train_loss: 1.23808, acc: 0.50275


INFO:root:[0] Ed: 166400, train_loss: 1.23815, acc: 0.50257


[INFO 2025-03-02 21:22:02,436] [0] Ed: 166400, train_loss: 1.23815, acc: 0.50257


INFO:root:[0] Ed: 169600, train_loss: 1.23835, acc: 0.50249


[INFO 2025-03-02 21:22:28,445] [0] Ed: 169600, train_loss: 1.23835, acc: 0.50249


INFO:root:[0] Ed: 172800, train_loss: 1.23797, acc: 0.50286


[INFO 2025-03-02 21:22:54,481] [0] Ed: 172800, train_loss: 1.23797, acc: 0.50286


INFO:root:[0] Ed: 176000, train_loss: 1.23781, acc: 0.50302


[INFO 2025-03-02 21:23:20,426] [0] Ed: 176000, train_loss: 1.23781, acc: 0.50302


INFO:root:[0] Ed: 179200, train_loss: 1.23771, acc: 0.50307


[INFO 2025-03-02 21:23:46,436] [0] Ed: 179200, train_loss: 1.23771, acc: 0.50307


INFO:root:[0] Ed: 182400, train_loss: 1.23767, acc: 0.50322


[INFO 2025-03-02 21:24:12,427] [0] Ed: 182400, train_loss: 1.23767, acc: 0.50322


INFO:root:[0] Ed: 185600, train_loss: 1.23734, acc: 0.50337


[INFO 2025-03-02 21:24:38,367] [0] Ed: 185600, train_loss: 1.23734, acc: 0.50337


INFO:root:[0] Ed: 188800, train_loss: 1.23710, acc: 0.50355


[INFO 2025-03-02 21:25:04,345] [0] Ed: 188800, train_loss: 1.23710, acc: 0.50355


INFO:root:[0] Ed: 192000, train_loss: 1.23714, acc: 0.50354


[INFO 2025-03-02 21:25:30,346] [0] Ed: 192000, train_loss: 1.23714, acc: 0.50354


INFO:root:[0] Ed: 195200, train_loss: 1.23708, acc: 0.50362


[INFO 2025-03-02 21:25:56,356] [0] Ed: 195200, train_loss: 1.23708, acc: 0.50362


INFO:root:[0] Ed: 198400, train_loss: 1.23719, acc: 0.50357


[INFO 2025-03-02 21:26:22,366] [0] Ed: 198400, train_loss: 1.23719, acc: 0.50357


INFO:root:[0] Ed: 201600, train_loss: 1.23705, acc: 0.50364


[INFO 2025-03-02 21:26:48,271] [0] Ed: 201600, train_loss: 1.23705, acc: 0.50364


INFO:root:[0] Ed: 204800, train_loss: 1.23725, acc: 0.50346


[INFO 2025-03-02 21:27:14,230] [0] Ed: 204800, train_loss: 1.23725, acc: 0.50346


INFO:root:[0] Ed: 208000, train_loss: 1.23744, acc: 0.50341


[INFO 2025-03-02 21:27:40,215] [0] Ed: 208000, train_loss: 1.23744, acc: 0.50341


INFO:root:[0] Ed: 211200, train_loss: 1.23747, acc: 0.50340


[INFO 2025-03-02 21:28:06,158] [0] Ed: 211200, train_loss: 1.23747, acc: 0.50340


INFO:root:[0] Ed: 214400, train_loss: 1.23742, acc: 0.50345


[INFO 2025-03-02 21:28:32,356] [0] Ed: 214400, train_loss: 1.23742, acc: 0.50345


INFO:root:[0] Ed: 217600, train_loss: 1.23728, acc: 0.50339


[INFO 2025-03-02 21:28:58,314] [0] Ed: 217600, train_loss: 1.23728, acc: 0.50339


INFO:root:[0] Ed: 220800, train_loss: 1.23705, acc: 0.50347


[INFO 2025-03-02 21:29:24,236] [0] Ed: 220800, train_loss: 1.23705, acc: 0.50347


INFO:root:[0] Ed: 224000, train_loss: 1.23672, acc: 0.50364


[INFO 2025-03-02 21:29:50,212] [0] Ed: 224000, train_loss: 1.23672, acc: 0.50364


INFO:root:[0] Ed: 227200, train_loss: 1.23642, acc: 0.50377


[INFO 2025-03-02 21:30:16,154] [0] Ed: 227200, train_loss: 1.23642, acc: 0.50377


INFO:root:[0] Ed: 230400, train_loss: 1.23617, acc: 0.50382


[INFO 2025-03-02 21:30:42,137] [0] Ed: 230400, train_loss: 1.23617, acc: 0.50382


INFO:root:[0] Ed: 233600, train_loss: 1.23579, acc: 0.50413


[INFO 2025-03-02 21:31:08,156] [0] Ed: 233600, train_loss: 1.23579, acc: 0.50413


INFO:root:Training finish.


[INFO 2025-03-02 21:31:30,161] Training finish.


INFO:root:Model saved to /content/model/epoch-5.pt.


[INFO 2025-03-02 21:31:30,207] Model saved to /content/model/epoch-5.pt.


In [None]:
import google.colab
from google.colab import files
files.download('/content/model/epoch-5.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
def test(rank, args):
    is_distributed = False

    torch.cuda.set_device(rank)

    if args.load_ckpt_name is not None:
        ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)

    assert ckpt_path is not None, 'No checkpoint found.'
    checkpoint = torch.load(ckpt_path, map_location='cpu')

    subcategory_dict = checkpoint['subcategory_dict']
    category_dict = checkpoint['category_dict']
    word_dict = checkpoint['word_dict']

    dummy_embedding_matrix = np.zeros((len(word_dict) + 1, args.word_embedding_dim))
    model = NRMS(args, dummy_embedding_matrix)
    model.load_state_dict(checkpoint['model_state_dict'])
    logging.info(f"Model loaded from {ckpt_path}")

    if args.enable_gpu:
        model.cuda(rank)

    model.eval()
    torch.set_grad_enabled(False)

    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args.test_abstract_dir, args, mode='train')
    news_title, news_category, news_subcategory, news_abstract = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory, news_abstract] if x is not None], axis=-1)

    news_dataset = NewsDataset(news_combined)
    news_dataloader = DataLoader(news_dataset,
                                 batch_size=args.batch_size,
                                 num_workers=4)

    news_scoring = []
    with torch.no_grad():
        for input_ids in tqdm(news_dataloader):
            input_ids = input_ids.cuda(rank)
            news_vec = model.news_encoder(input_ids)
            news_vec = news_vec.to(torch.device("cpu")).detach().numpy()
            news_scoring.extend(news_vec)

    news_scoring = np.array(news_scoring)
    logging.info("news scoring num: {}".format(news_scoring.shape[0]))

    if rank == 0:
        doc_sim = 0
        for _ in tqdm(range(1000000)):
            i = random.randrange(1, len(news_scoring))
            j = random.randrange(1, len(news_scoring))
            if i != j:
                doc_sim += np.dot(news_scoring[i], news_scoring[j]) / (np.linalg.norm(news_scoring[i]) * np.linalg.norm(news_scoring[j]))
        logging.info(f'News doc-sim: {doc_sim / 1000000}')

    data_file_path = os.path.join(args.test_data_dir, f'behaviors_{rank}.tsv')

    def collate_fn(tuple_list):
        log_vecs = torch.FloatTensor([x[0] for x in tuple_list])
        # log_mask = torch.FloatTensor([x[1] for x in tuple_list])
        news_vecs = [x[2] for x in tuple_list]
        labels = [x[3] for x in tuple_list]
        return (log_vecs, news_vecs, labels)

    dataset = DatasetTest(data_file_path, news_index, news_scoring, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn)

    AUC = []
    MRR = []
    nDCG5 = []
    nDCG10 = []

    def print_metrics(rank, cnt, x):
        logging.info("[{}] {} samples: {}".format(rank, cnt, '\t'.join(["{:0.2f}".format(i * 100) for i in x])))

    def get_mean(arr):
        return [np.array(i).mean() for i in arr]

    def get_sum(arr):
        return [np.array(i).sum() for i in arr]

    local_sample_num = 0

    for cnt, (log_vecs, news_vecs, labels) in enumerate(dataloader):
        local_sample_num += log_vecs.shape[0]

        if args.enable_gpu:
            log_vecs = log_vecs.cuda(rank, non_blocking=True)

        user_vecs = model.user_encoder(log_vecs).to(torch.device("cpu")).detach().numpy()

        for user_vec, news_vec, label in zip(user_vecs, news_vecs, labels):
            if label.mean() == 0 or label.mean() == 1:
                continue

            score = np.dot(news_vec, user_vec)

            auc = roc_auc_score(label, score)
            mrr = mrr_score(label, score)
            ndcg5 = ndcg_score(label, score, k=5)
            ndcg10 = ndcg_score(label, score, k=10)

            AUC.append(auc)
            MRR.append(mrr)
            nDCG5.append(ndcg5)
            nDCG10.append(ndcg10)

        if cnt % args.log_steps == 0:
            print_metrics(rank, local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))

    logging.info('[{}] local_sample_num: {}'.format(rank, local_sample_num))
    if is_distributed:
        local_sample_num = torch.tensor(local_sample_num).cuda(rank)
        dist.reduce(local_sample_num, dst=0, op=dist.ReduceOp.SUM)
        local_metrics_sum = torch.FloatTensor(get_sum([AUC, MRR, nDCG5, nDCG10])).cuda(rank)
        dist.reduce(local_metrics_sum, dst=0, op=dist.ReduceOp.SUM)
        if rank == 0:
            print_metrics('*', local_sample_num, local_metrics_sum / local_sample_num)
    else:
        print_metrics('*', local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))


In [None]:
args.mode = 'test'
args.user_log_mask=True
args.batch_size=128
args.load_ckpt_name= 'epoch-5.pt'
args.prepare=True
if 'test' in args.mode:
        if args.prepare:
            logging.info('Preparing testing data...')
            total_sample_num = prepare_testing_data(args.test_data_dir, args.nGPU)
        else:
            total_sample_num = 0
            for i in range(args.nGPU):
                data_file_path = os.path.join(args.test_data_dir, f'behaviors_{i}.tsv')
                if not os.path.exists(data_file_path):
                    logging.error(f'Splited testing data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                    exit()
                result = subprocess.getoutput(f'wc -l {data_file_path}')
                total_sample_num += int(result.split(' ')[0])
            logging.info('Skip testing data preparation.')
        logging.info(f'{total_sample_num} testing samples in total.')



INFO:root:Preparing testing data...


[INFO 2025-02-18 14:25:36,690] Preparing testing data...
[INFO 2025-02-18 14:25:36,690] Preparing testing data...


73152it [00:00, 1011058.00it/s]
INFO:root:Writing files...


[INFO 2025-02-18 14:25:36,777] Writing files...
[INFO 2025-02-18 14:25:36,777] Writing files...


INFO:root:73152 testing samples in total.


[INFO 2025-02-18 14:25:36,971] 73152 testing samples in total.
[INFO 2025-02-18 14:25:36,971] 73152 testing samples in total.


# Test


In [None]:
test(0, args)

  checkpoint = torch.load(ckpt_path, map_location='cpu')
INFO:root:Model loaded from /content/model/epoch-5.pt


[INFO 2025-02-18 14:34:16,715] Model loaded from /content/model/epoch-5.pt
[INFO 2025-02-18 14:34:16,715] Model loaded from /content/model/epoch-5.pt


51282it [00:04, 12674.23it/s]
100%|██████████| 51282/51282 [00:00<00:00, 172832.50it/s]
100%|██████████| 401/401 [00:02<00:00, 192.52it/s]
INFO:root:news scoring num: 51283


[INFO 2025-02-18 14:34:24,059] news scoring num: 51283
[INFO 2025-02-18 14:34:24,059] news scoring num: 51283


100%|██████████| 1000000/1000000 [00:10<00:00, 98701.88it/s]
INFO:root:News doc-sim: 0.09012434340446164


[INFO 2025-02-18 14:34:34,199] News doc-sim: 0.09012434340446164
[INFO 2025-02-18 14:34:34,199] News doc-sim: 0.09012434340446164


INFO:root:[0] 128 samples: 64.78	30.16	33.72	39.86


[INFO 2025-02-18 14:34:34,786] [0] 128 samples: 64.78	30.16	33.72	39.86
[INFO 2025-02-18 14:34:34,786] [0] 128 samples: 64.78	30.16	33.72	39.86


INFO:root:[0] 12928 samples: 64.21	30.83	33.66	39.80


[INFO 2025-02-18 14:35:37,252] [0] 12928 samples: 64.21	30.83	33.66	39.80
[INFO 2025-02-18 14:35:37,252] [0] 12928 samples: 64.21	30.83	33.66	39.80


INFO:root:[0] 25728 samples: 64.33	31.06	33.89	39.92


[INFO 2025-02-18 14:36:39,724] [0] 25728 samples: 64.33	31.06	33.89	39.92
[INFO 2025-02-18 14:36:39,724] [0] 25728 samples: 64.33	31.06	33.89	39.92


INFO:root:[0] 38528 samples: 64.31	30.89	33.74	39.79


[INFO 2025-02-18 14:37:42,161] [0] 38528 samples: 64.31	30.89	33.74	39.79
[INFO 2025-02-18 14:37:42,161] [0] 38528 samples: 64.31	30.89	33.74	39.79


INFO:root:[0] 51328 samples: 64.23	30.69	33.50	39.60


[INFO 2025-02-18 14:38:44,514] [0] 51328 samples: 64.23	30.69	33.50	39.60
[INFO 2025-02-18 14:38:44,514] [0] 51328 samples: 64.23	30.69	33.50	39.60


INFO:root:[0] 64128 samples: 64.22	30.71	33.52	39.62


[INFO 2025-02-18 14:39:46,838] [0] 64128 samples: 64.22	30.71	33.52	39.62
[INFO 2025-02-18 14:39:46,838] [0] 64128 samples: 64.22	30.71	33.52	39.62


INFO:root:[0] local_sample_num: 73152


[INFO 2025-02-18 14:40:31,017] [0] local_sample_num: 73152
[INFO 2025-02-18 14:40:31,017] [0] local_sample_num: 73152


INFO:root:[*] 73152 samples: 64.22	30.76	33.60	39.67


[INFO 2025-02-18 14:40:31,050] [*] 73152 samples: 64.22	30.76	33.60	39.67
[INFO 2025-02-18 14:40:31,050] [*] 73152 samples: 64.22	30.76	33.60	39.67


# OG Version only title


In [None]:
import torch.optim as optim
news_combined = np.concatenate([x for x in [news_title] if x is not None], axis=-1)

model = NRMS(args, embedding_matrix)
if args.enable_gpu:
    model = model.cuda(0)
is_distributed = False
if args.load_ckpt_name is not None:
	ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
	checkpoint = torch.load(ckpt_path, map_location='cpu')
	model.load_state_dict(checkpoint['model_state_dict'])
	logging.info(f"Model loaded from {ckpt_path}.")

optimizer = optim.Adam(model.parameters(), lr=args.lr)

if args.enable_gpu:
	model = model.cuda(rank)

if is_distributed:
	model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv')

dataset = DatasetTrain(data_file_path, news_index, news_combined, args)
dataloader = DataLoader(dataset, batch_size=args.batch_size)

logging.info('Training...')
for ep in range(args.start_epoch, args.epochs):
	loss = 0.0
	accuary = 0.0
	for cnt, (log_ids, input_ids, targets) in enumerate(dataloader):
		if args.enable_gpu:
			log_ids = log_ids.cuda(rank, non_blocking=True)
			# log_mask = log_mask.cuda(rank, non_blocking=True)
			input_ids = input_ids.cuda(rank, non_blocking=True)
			targets = targets.cuda(rank, non_blocking=True)


		bz_loss, y_hat = model(log_ids, input_ids, targets)
		loss += bz_loss.data.float()
		accuary += acc(targets, y_hat)
		optimizer.zero_grad()
		bz_loss.backward()
		optimizer.step()

		if cnt % args.log_steps == 0:
			logging.info(
				'[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
					rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)
			)

		if rank == 0 and cnt != 0 and cnt % args.save_steps == 0:
			ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
			torch.save(
				{
					'model_state_dict':
						{'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
						if is_distributed else model.state_dict(),
					'category_dict': category_dict,
					'word_dict': word_dict,
					'subcategory_dict': subcategory_dict
				}, ckpt_path)
			logging.info(f"Model saved to {ckpt_path}.")

	logging.info('Training finish.')

	if rank == 0:
		ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
		torch.save(
			{
				'model_state_dict':
					{'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
					if is_distributed else model.state_dict(),
				'category_dict': category_dict,
				'subcategory_dict': subcategory_dict,
				'word_dict': word_dict,
			}, ckpt_path)
		logging.info(f"Model saved to {ckpt_path}.")



INFO:root:Training...


[INFO 2025-02-19 16:07:13,177] Training...
[INFO 2025-02-19 16:07:13,177] Training...


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-19 16:07:13,353] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-19 16:07:13,353] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.59732, acc: 0.28375


[INFO 2025-02-19 16:07:29,700] [0] Ed: 3200, train_loss: 1.59732, acc: 0.28375
[INFO 2025-02-19 16:07:29,700] [0] Ed: 3200, train_loss: 1.59732, acc: 0.28375


INFO:root:[0] Ed: 6400, train_loss: 1.56525, acc: 0.30484


[INFO 2025-02-19 16:07:45,056] [0] Ed: 6400, train_loss: 1.56525, acc: 0.30484
[INFO 2025-02-19 16:07:45,056] [0] Ed: 6400, train_loss: 1.56525, acc: 0.30484


INFO:root:[0] Ed: 9600, train_loss: 1.55043, acc: 0.31854


[INFO 2025-02-19 16:08:00,283] [0] Ed: 9600, train_loss: 1.55043, acc: 0.31854
[INFO 2025-02-19 16:08:00,283] [0] Ed: 9600, train_loss: 1.55043, acc: 0.31854


INFO:root:[0] Ed: 12800, train_loss: 1.53782, acc: 0.32656


[INFO 2025-02-19 16:08:15,429] [0] Ed: 12800, train_loss: 1.53782, acc: 0.32656
[INFO 2025-02-19 16:08:15,429] [0] Ed: 12800, train_loss: 1.53782, acc: 0.32656


INFO:root:[0] Ed: 16000, train_loss: 1.52480, acc: 0.33425


[INFO 2025-02-19 16:08:30,639] [0] Ed: 16000, train_loss: 1.52480, acc: 0.33425
[INFO 2025-02-19 16:08:30,639] [0] Ed: 16000, train_loss: 1.52480, acc: 0.33425


INFO:root:[0] Ed: 19200, train_loss: 1.51392, acc: 0.34422


[INFO 2025-02-19 16:08:46,170] [0] Ed: 19200, train_loss: 1.51392, acc: 0.34422
[INFO 2025-02-19 16:08:46,170] [0] Ed: 19200, train_loss: 1.51392, acc: 0.34422


INFO:root:[0] Ed: 22400, train_loss: 1.50393, acc: 0.35290


[INFO 2025-02-19 16:09:01,554] [0] Ed: 22400, train_loss: 1.50393, acc: 0.35290
[INFO 2025-02-19 16:09:01,554] [0] Ed: 22400, train_loss: 1.50393, acc: 0.35290


INFO:root:[0] Ed: 25600, train_loss: 1.49807, acc: 0.35703


[INFO 2025-02-19 16:09:16,497] [0] Ed: 25600, train_loss: 1.49807, acc: 0.35703
[INFO 2025-02-19 16:09:16,497] [0] Ed: 25600, train_loss: 1.49807, acc: 0.35703


INFO:root:[0] Ed: 28800, train_loss: 1.48914, acc: 0.36240


[INFO 2025-02-19 16:09:31,671] [0] Ed: 28800, train_loss: 1.48914, acc: 0.36240
[INFO 2025-02-19 16:09:31,671] [0] Ed: 28800, train_loss: 1.48914, acc: 0.36240


INFO:root:[0] Ed: 32000, train_loss: 1.48337, acc: 0.36650


[INFO 2025-02-19 16:09:46,915] [0] Ed: 32000, train_loss: 1.48337, acc: 0.36650
[INFO 2025-02-19 16:09:46,915] [0] Ed: 32000, train_loss: 1.48337, acc: 0.36650


INFO:root:[0] Ed: 35200, train_loss: 1.47883, acc: 0.36957


[INFO 2025-02-19 16:10:02,447] [0] Ed: 35200, train_loss: 1.47883, acc: 0.36957
[INFO 2025-02-19 16:10:02,447] [0] Ed: 35200, train_loss: 1.47883, acc: 0.36957


INFO:root:[0] Ed: 38400, train_loss: 1.47340, acc: 0.37232


[INFO 2025-02-19 16:10:18,241] [0] Ed: 38400, train_loss: 1.47340, acc: 0.37232
[INFO 2025-02-19 16:10:18,241] [0] Ed: 38400, train_loss: 1.47340, acc: 0.37232


INFO:root:[0] Ed: 41600, train_loss: 1.46793, acc: 0.37644


[INFO 2025-02-19 16:10:33,612] [0] Ed: 41600, train_loss: 1.46793, acc: 0.37644
[INFO 2025-02-19 16:10:33,612] [0] Ed: 41600, train_loss: 1.46793, acc: 0.37644


INFO:root:[0] Ed: 44800, train_loss: 1.46322, acc: 0.37951


[INFO 2025-02-19 16:10:48,869] [0] Ed: 44800, train_loss: 1.46322, acc: 0.37951
[INFO 2025-02-19 16:10:48,869] [0] Ed: 44800, train_loss: 1.46322, acc: 0.37951


INFO:root:[0] Ed: 48000, train_loss: 1.46011, acc: 0.38142


[INFO 2025-02-19 16:11:03,948] [0] Ed: 48000, train_loss: 1.46011, acc: 0.38142
[INFO 2025-02-19 16:11:03,948] [0] Ed: 48000, train_loss: 1.46011, acc: 0.38142


INFO:root:[0] Ed: 51200, train_loss: 1.45632, acc: 0.38297


[INFO 2025-02-19 16:11:19,793] [0] Ed: 51200, train_loss: 1.45632, acc: 0.38297
[INFO 2025-02-19 16:11:19,793] [0] Ed: 51200, train_loss: 1.45632, acc: 0.38297


INFO:root:[0] Ed: 54400, train_loss: 1.45307, acc: 0.38450


[INFO 2025-02-19 16:11:34,733] [0] Ed: 54400, train_loss: 1.45307, acc: 0.38450
[INFO 2025-02-19 16:11:34,733] [0] Ed: 54400, train_loss: 1.45307, acc: 0.38450


INFO:root:[0] Ed: 57600, train_loss: 1.44957, acc: 0.38682


[INFO 2025-02-19 16:11:49,849] [0] Ed: 57600, train_loss: 1.44957, acc: 0.38682
[INFO 2025-02-19 16:11:49,849] [0] Ed: 57600, train_loss: 1.44957, acc: 0.38682


INFO:root:[0] Ed: 60800, train_loss: 1.44668, acc: 0.38877


[INFO 2025-02-19 16:12:04,997] [0] Ed: 60800, train_loss: 1.44668, acc: 0.38877
[INFO 2025-02-19 16:12:04,997] [0] Ed: 60800, train_loss: 1.44668, acc: 0.38877


INFO:root:[0] Ed: 64000, train_loss: 1.44335, acc: 0.39083


[INFO 2025-02-19 16:12:19,990] [0] Ed: 64000, train_loss: 1.44335, acc: 0.39083
[INFO 2025-02-19 16:12:19,990] [0] Ed: 64000, train_loss: 1.44335, acc: 0.39083


INFO:root:[0] Ed: 67200, train_loss: 1.44048, acc: 0.39310


[INFO 2025-02-19 16:12:35,793] [0] Ed: 67200, train_loss: 1.44048, acc: 0.39310
[INFO 2025-02-19 16:12:35,793] [0] Ed: 67200, train_loss: 1.44048, acc: 0.39310


INFO:root:[0] Ed: 70400, train_loss: 1.43761, acc: 0.39466


[INFO 2025-02-19 16:12:50,842] [0] Ed: 70400, train_loss: 1.43761, acc: 0.39466
[INFO 2025-02-19 16:12:50,842] [0] Ed: 70400, train_loss: 1.43761, acc: 0.39466


INFO:root:[0] Ed: 73600, train_loss: 1.43565, acc: 0.39601


[INFO 2025-02-19 16:13:05,779] [0] Ed: 73600, train_loss: 1.43565, acc: 0.39601
[INFO 2025-02-19 16:13:05,779] [0] Ed: 73600, train_loss: 1.43565, acc: 0.39601


INFO:root:[0] Ed: 76800, train_loss: 1.43299, acc: 0.39745


[INFO 2025-02-19 16:13:20,773] [0] Ed: 76800, train_loss: 1.43299, acc: 0.39745
[INFO 2025-02-19 16:13:20,773] [0] Ed: 76800, train_loss: 1.43299, acc: 0.39745


INFO:root:[0] Ed: 80000, train_loss: 1.43119, acc: 0.39849


[INFO 2025-02-19 16:13:35,691] [0] Ed: 80000, train_loss: 1.43119, acc: 0.39849
[INFO 2025-02-19 16:13:35,691] [0] Ed: 80000, train_loss: 1.43119, acc: 0.39849


INFO:root:[0] Ed: 83200, train_loss: 1.42873, acc: 0.40011


[INFO 2025-02-19 16:13:51,180] [0] Ed: 83200, train_loss: 1.42873, acc: 0.40011
[INFO 2025-02-19 16:13:51,180] [0] Ed: 83200, train_loss: 1.42873, acc: 0.40011


INFO:root:[0] Ed: 86400, train_loss: 1.42655, acc: 0.40128


[INFO 2025-02-19 16:14:06,454] [0] Ed: 86400, train_loss: 1.42655, acc: 0.40128
[INFO 2025-02-19 16:14:06,454] [0] Ed: 86400, train_loss: 1.42655, acc: 0.40128


INFO:root:[0] Ed: 89600, train_loss: 1.42401, acc: 0.40246


[INFO 2025-02-19 16:14:21,402] [0] Ed: 89600, train_loss: 1.42401, acc: 0.40246
[INFO 2025-02-19 16:14:21,402] [0] Ed: 89600, train_loss: 1.42401, acc: 0.40246


INFO:root:[0] Ed: 92800, train_loss: 1.42276, acc: 0.40324


[INFO 2025-02-19 16:14:36,354] [0] Ed: 92800, train_loss: 1.42276, acc: 0.40324
[INFO 2025-02-19 16:14:36,354] [0] Ed: 92800, train_loss: 1.42276, acc: 0.40324


INFO:root:[0] Ed: 96000, train_loss: 1.42100, acc: 0.40448


[INFO 2025-02-19 16:14:51,320] [0] Ed: 96000, train_loss: 1.42100, acc: 0.40448
[INFO 2025-02-19 16:14:51,320] [0] Ed: 96000, train_loss: 1.42100, acc: 0.40448


INFO:root:[0] Ed: 99200, train_loss: 1.41971, acc: 0.40479


[INFO 2025-02-19 16:15:06,522] [0] Ed: 99200, train_loss: 1.41971, acc: 0.40479
[INFO 2025-02-19 16:15:06,522] [0] Ed: 99200, train_loss: 1.41971, acc: 0.40479


INFO:root:[0] Ed: 102400, train_loss: 1.41728, acc: 0.40588


[INFO 2025-02-19 16:15:22,007] [0] Ed: 102400, train_loss: 1.41728, acc: 0.40588
[INFO 2025-02-19 16:15:22,007] [0] Ed: 102400, train_loss: 1.41728, acc: 0.40588


INFO:root:[0] Ed: 105600, train_loss: 1.41519, acc: 0.40714


[INFO 2025-02-19 16:15:36,914] [0] Ed: 105600, train_loss: 1.41519, acc: 0.40714
[INFO 2025-02-19 16:15:36,914] [0] Ed: 105600, train_loss: 1.41519, acc: 0.40714


INFO:root:[0] Ed: 108800, train_loss: 1.41406, acc: 0.40763


[INFO 2025-02-19 16:15:51,870] [0] Ed: 108800, train_loss: 1.41406, acc: 0.40763
[INFO 2025-02-19 16:15:51,870] [0] Ed: 108800, train_loss: 1.41406, acc: 0.40763


INFO:root:[0] Ed: 112000, train_loss: 1.41269, acc: 0.40848


[INFO 2025-02-19 16:16:06,774] [0] Ed: 112000, train_loss: 1.41269, acc: 0.40848
[INFO 2025-02-19 16:16:06,774] [0] Ed: 112000, train_loss: 1.41269, acc: 0.40848


INFO:root:[0] Ed: 115200, train_loss: 1.41144, acc: 0.40910


[INFO 2025-02-19 16:16:21,851] [0] Ed: 115200, train_loss: 1.41144, acc: 0.40910
[INFO 2025-02-19 16:16:21,851] [0] Ed: 115200, train_loss: 1.41144, acc: 0.40910


INFO:root:[0] Ed: 118400, train_loss: 1.40970, acc: 0.40985


[INFO 2025-02-19 16:16:37,475] [0] Ed: 118400, train_loss: 1.40970, acc: 0.40985
[INFO 2025-02-19 16:16:37,475] [0] Ed: 118400, train_loss: 1.40970, acc: 0.40985


INFO:root:[0] Ed: 121600, train_loss: 1.40874, acc: 0.41052


[INFO 2025-02-19 16:16:52,415] [0] Ed: 121600, train_loss: 1.40874, acc: 0.41052
[INFO 2025-02-19 16:16:52,415] [0] Ed: 121600, train_loss: 1.40874, acc: 0.41052


INFO:root:[0] Ed: 124800, train_loss: 1.40738, acc: 0.41122


[INFO 2025-02-19 16:17:07,329] [0] Ed: 124800, train_loss: 1.40738, acc: 0.41122
[INFO 2025-02-19 16:17:07,329] [0] Ed: 124800, train_loss: 1.40738, acc: 0.41122


INFO:root:[0] Ed: 128000, train_loss: 1.40671, acc: 0.41168


[INFO 2025-02-19 16:17:22,325] [0] Ed: 128000, train_loss: 1.40671, acc: 0.41168
[INFO 2025-02-19 16:17:22,325] [0] Ed: 128000, train_loss: 1.40671, acc: 0.41168


INFO:root:[0] Ed: 131200, train_loss: 1.40541, acc: 0.41244


[INFO 2025-02-19 16:17:37,201] [0] Ed: 131200, train_loss: 1.40541, acc: 0.41244
[INFO 2025-02-19 16:17:37,201] [0] Ed: 131200, train_loss: 1.40541, acc: 0.41244


INFO:root:[0] Ed: 134400, train_loss: 1.40436, acc: 0.41335


[INFO 2025-02-19 16:17:53,190] [0] Ed: 134400, train_loss: 1.40436, acc: 0.41335
[INFO 2025-02-19 16:17:53,190] [0] Ed: 134400, train_loss: 1.40436, acc: 0.41335


INFO:root:[0] Ed: 137600, train_loss: 1.40357, acc: 0.41416


[INFO 2025-02-19 16:18:08,083] [0] Ed: 137600, train_loss: 1.40357, acc: 0.41416
[INFO 2025-02-19 16:18:08,083] [0] Ed: 137600, train_loss: 1.40357, acc: 0.41416


INFO:root:[0] Ed: 140800, train_loss: 1.40219, acc: 0.41531


[INFO 2025-02-19 16:18:22,957] [0] Ed: 140800, train_loss: 1.40219, acc: 0.41531
[INFO 2025-02-19 16:18:22,957] [0] Ed: 140800, train_loss: 1.40219, acc: 0.41531


INFO:root:[0] Ed: 144000, train_loss: 1.40119, acc: 0.41581


[INFO 2025-02-19 16:18:37,852] [0] Ed: 144000, train_loss: 1.40119, acc: 0.41581
[INFO 2025-02-19 16:18:37,852] [0] Ed: 144000, train_loss: 1.40119, acc: 0.41581


INFO:root:[0] Ed: 147200, train_loss: 1.40004, acc: 0.41674


[INFO 2025-02-19 16:18:52,801] [0] Ed: 147200, train_loss: 1.40004, acc: 0.41674
[INFO 2025-02-19 16:18:52,801] [0] Ed: 147200, train_loss: 1.40004, acc: 0.41674


INFO:root:[0] Ed: 150400, train_loss: 1.39881, acc: 0.41746


[INFO 2025-02-19 16:19:08,326] [0] Ed: 150400, train_loss: 1.39881, acc: 0.41746
[INFO 2025-02-19 16:19:08,326] [0] Ed: 150400, train_loss: 1.39881, acc: 0.41746


INFO:root:[0] Ed: 153600, train_loss: 1.39755, acc: 0.41818


[INFO 2025-02-19 16:19:23,651] [0] Ed: 153600, train_loss: 1.39755, acc: 0.41818
[INFO 2025-02-19 16:19:23,651] [0] Ed: 153600, train_loss: 1.39755, acc: 0.41818


INFO:root:[0] Ed: 156800, train_loss: 1.39660, acc: 0.41883


[INFO 2025-02-19 16:19:38,627] [0] Ed: 156800, train_loss: 1.39660, acc: 0.41883
[INFO 2025-02-19 16:19:38,627] [0] Ed: 156800, train_loss: 1.39660, acc: 0.41883


INFO:root:[0] Ed: 160000, train_loss: 1.39550, acc: 0.41951


[INFO 2025-02-19 16:19:53,681] [0] Ed: 160000, train_loss: 1.39550, acc: 0.41951
[INFO 2025-02-19 16:19:53,681] [0] Ed: 160000, train_loss: 1.39550, acc: 0.41951


INFO:root:[0] Ed: 163200, train_loss: 1.39451, acc: 0.42006


[INFO 2025-02-19 16:20:08,654] [0] Ed: 163200, train_loss: 1.39451, acc: 0.42006
[INFO 2025-02-19 16:20:08,654] [0] Ed: 163200, train_loss: 1.39451, acc: 0.42006


INFO:root:[0] Ed: 166400, train_loss: 1.39372, acc: 0.42049


[INFO 2025-02-19 16:20:23,999] [0] Ed: 166400, train_loss: 1.39372, acc: 0.42049
[INFO 2025-02-19 16:20:23,999] [0] Ed: 166400, train_loss: 1.39372, acc: 0.42049


INFO:root:[0] Ed: 169600, train_loss: 1.39275, acc: 0.42117


[INFO 2025-02-19 16:20:39,326] [0] Ed: 169600, train_loss: 1.39275, acc: 0.42117
[INFO 2025-02-19 16:20:39,326] [0] Ed: 169600, train_loss: 1.39275, acc: 0.42117


INFO:root:[0] Ed: 172800, train_loss: 1.39170, acc: 0.42193


[INFO 2025-02-19 16:20:54,298] [0] Ed: 172800, train_loss: 1.39170, acc: 0.42193
[INFO 2025-02-19 16:20:54,298] [0] Ed: 172800, train_loss: 1.39170, acc: 0.42193


INFO:root:[0] Ed: 176000, train_loss: 1.39080, acc: 0.42258


[INFO 2025-02-19 16:21:09,227] [0] Ed: 176000, train_loss: 1.39080, acc: 0.42258
[INFO 2025-02-19 16:21:09,227] [0] Ed: 176000, train_loss: 1.39080, acc: 0.42258


INFO:root:[0] Ed: 179200, train_loss: 1.38981, acc: 0.42311


[INFO 2025-02-19 16:21:24,157] [0] Ed: 179200, train_loss: 1.38981, acc: 0.42311
[INFO 2025-02-19 16:21:24,157] [0] Ed: 179200, train_loss: 1.38981, acc: 0.42311


INFO:root:[0] Ed: 182400, train_loss: 1.38909, acc: 0.42368


[INFO 2025-02-19 16:21:39,062] [0] Ed: 182400, train_loss: 1.38909, acc: 0.42368
[INFO 2025-02-19 16:21:39,062] [0] Ed: 182400, train_loss: 1.38909, acc: 0.42368


INFO:root:[0] Ed: 185600, train_loss: 1.38801, acc: 0.42419


[INFO 2025-02-19 16:21:54,831] [0] Ed: 185600, train_loss: 1.38801, acc: 0.42419
[INFO 2025-02-19 16:21:54,831] [0] Ed: 185600, train_loss: 1.38801, acc: 0.42419


INFO:root:[0] Ed: 188800, train_loss: 1.38722, acc: 0.42451


[INFO 2025-02-19 16:22:09,775] [0] Ed: 188800, train_loss: 1.38722, acc: 0.42451
[INFO 2025-02-19 16:22:09,775] [0] Ed: 188800, train_loss: 1.38722, acc: 0.42451


INFO:root:[0] Ed: 192000, train_loss: 1.38664, acc: 0.42467


[INFO 2025-02-19 16:22:24,767] [0] Ed: 192000, train_loss: 1.38664, acc: 0.42467
[INFO 2025-02-19 16:22:24,767] [0] Ed: 192000, train_loss: 1.38664, acc: 0.42467


INFO:root:[0] Ed: 195200, train_loss: 1.38585, acc: 0.42514


[INFO 2025-02-19 16:22:39,708] [0] Ed: 195200, train_loss: 1.38585, acc: 0.42514
[INFO 2025-02-19 16:22:39,708] [0] Ed: 195200, train_loss: 1.38585, acc: 0.42514


INFO:root:[0] Ed: 198400, train_loss: 1.38519, acc: 0.42570


[INFO 2025-02-19 16:22:54,548] [0] Ed: 198400, train_loss: 1.38519, acc: 0.42570
[INFO 2025-02-19 16:22:54,548] [0] Ed: 198400, train_loss: 1.38519, acc: 0.42570


INFO:root:[0] Ed: 201600, train_loss: 1.38434, acc: 0.42606


[INFO 2025-02-19 16:23:09,948] [0] Ed: 201600, train_loss: 1.38434, acc: 0.42606
[INFO 2025-02-19 16:23:09,948] [0] Ed: 201600, train_loss: 1.38434, acc: 0.42606


INFO:root:[0] Ed: 204800, train_loss: 1.38386, acc: 0.42633


[INFO 2025-02-19 16:23:25,180] [0] Ed: 204800, train_loss: 1.38386, acc: 0.42633
[INFO 2025-02-19 16:23:25,180] [0] Ed: 204800, train_loss: 1.38386, acc: 0.42633


INFO:root:[0] Ed: 208000, train_loss: 1.38342, acc: 0.42664


[INFO 2025-02-19 16:23:40,372] [0] Ed: 208000, train_loss: 1.38342, acc: 0.42664
[INFO 2025-02-19 16:23:40,372] [0] Ed: 208000, train_loss: 1.38342, acc: 0.42664


INFO:root:[0] Ed: 211200, train_loss: 1.38282, acc: 0.42708


[INFO 2025-02-19 16:23:55,343] [0] Ed: 211200, train_loss: 1.38282, acc: 0.42708
[INFO 2025-02-19 16:23:55,343] [0] Ed: 211200, train_loss: 1.38282, acc: 0.42708


INFO:root:[0] Ed: 214400, train_loss: 1.38221, acc: 0.42752


[INFO 2025-02-19 16:24:10,197] [0] Ed: 214400, train_loss: 1.38221, acc: 0.42752
[INFO 2025-02-19 16:24:10,197] [0] Ed: 214400, train_loss: 1.38221, acc: 0.42752


INFO:root:[0] Ed: 217600, train_loss: 1.38164, acc: 0.42773


[INFO 2025-02-19 16:24:25,531] [0] Ed: 217600, train_loss: 1.38164, acc: 0.42773
[INFO 2025-02-19 16:24:25,531] [0] Ed: 217600, train_loss: 1.38164, acc: 0.42773


INFO:root:[0] Ed: 220800, train_loss: 1.38085, acc: 0.42814


[INFO 2025-02-19 16:24:40,716] [0] Ed: 220800, train_loss: 1.38085, acc: 0.42814
[INFO 2025-02-19 16:24:40,716] [0] Ed: 220800, train_loss: 1.38085, acc: 0.42814


INFO:root:[0] Ed: 224000, train_loss: 1.38003, acc: 0.42856


[INFO 2025-02-19 16:24:55,761] [0] Ed: 224000, train_loss: 1.38003, acc: 0.42856
[INFO 2025-02-19 16:24:55,761] [0] Ed: 224000, train_loss: 1.38003, acc: 0.42856


INFO:root:[0] Ed: 227200, train_loss: 1.37925, acc: 0.42907


[INFO 2025-02-19 16:25:10,668] [0] Ed: 227200, train_loss: 1.37925, acc: 0.42907
[INFO 2025-02-19 16:25:10,668] [0] Ed: 227200, train_loss: 1.37925, acc: 0.42907


INFO:root:[0] Ed: 230400, train_loss: 1.37837, acc: 0.42953


[INFO 2025-02-19 16:25:25,653] [0] Ed: 230400, train_loss: 1.37837, acc: 0.42953
[INFO 2025-02-19 16:25:25,653] [0] Ed: 230400, train_loss: 1.37837, acc: 0.42953


INFO:root:[0] Ed: 233600, train_loss: 1.37742, acc: 0.43009


[INFO 2025-02-19 16:25:40,755] [0] Ed: 233600, train_loss: 1.37742, acc: 0.43009
[INFO 2025-02-19 16:25:40,755] [0] Ed: 233600, train_loss: 1.37742, acc: 0.43009


INFO:root:Training finish.


[INFO 2025-02-19 16:25:53,542] Training finish.
[INFO 2025-02-19 16:25:53,542] Training finish.


INFO:root:Model saved to /content/model/epoch-1.pt.


[INFO 2025-02-19 16:25:53,609] Model saved to /content/model/epoch-1.pt.
[INFO 2025-02-19 16:25:53,609] Model saved to /content/model/epoch-1.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-19 16:25:53,837] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-19 16:25:53,837] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.34824, acc: 0.46125


[INFO 2025-02-19 16:26:09,310] [0] Ed: 3200, train_loss: 1.34824, acc: 0.46125
[INFO 2025-02-19 16:26:09,310] [0] Ed: 3200, train_loss: 1.34824, acc: 0.46125


INFO:root:[0] Ed: 6400, train_loss: 1.33333, acc: 0.46203


[INFO 2025-02-19 16:26:24,298] [0] Ed: 6400, train_loss: 1.33333, acc: 0.46203
[INFO 2025-02-19 16:26:24,298] [0] Ed: 6400, train_loss: 1.33333, acc: 0.46203


INFO:root:[0] Ed: 9600, train_loss: 1.33269, acc: 0.45896


[INFO 2025-02-19 16:26:39,183] [0] Ed: 9600, train_loss: 1.33269, acc: 0.45896
[INFO 2025-02-19 16:26:39,183] [0] Ed: 9600, train_loss: 1.33269, acc: 0.45896


INFO:root:[0] Ed: 12800, train_loss: 1.32964, acc: 0.46094


[INFO 2025-02-19 16:26:54,165] [0] Ed: 12800, train_loss: 1.32964, acc: 0.46094
[INFO 2025-02-19 16:26:54,165] [0] Ed: 12800, train_loss: 1.32964, acc: 0.46094


INFO:root:[0] Ed: 16000, train_loss: 1.33154, acc: 0.45819


[INFO 2025-02-19 16:27:09,304] [0] Ed: 16000, train_loss: 1.33154, acc: 0.45819
[INFO 2025-02-19 16:27:09,304] [0] Ed: 16000, train_loss: 1.33154, acc: 0.45819


INFO:root:[0] Ed: 19200, train_loss: 1.33051, acc: 0.45661


[INFO 2025-02-19 16:27:24,989] [0] Ed: 19200, train_loss: 1.33051, acc: 0.45661
[INFO 2025-02-19 16:27:24,989] [0] Ed: 19200, train_loss: 1.33051, acc: 0.45661


INFO:root:[0] Ed: 22400, train_loss: 1.32930, acc: 0.45790


[INFO 2025-02-19 16:27:39,925] [0] Ed: 22400, train_loss: 1.32930, acc: 0.45790
[INFO 2025-02-19 16:27:39,925] [0] Ed: 22400, train_loss: 1.32930, acc: 0.45790


INFO:root:[0] Ed: 25600, train_loss: 1.32882, acc: 0.45895


[INFO 2025-02-19 16:27:54,993] [0] Ed: 25600, train_loss: 1.32882, acc: 0.45895
[INFO 2025-02-19 16:27:54,993] [0] Ed: 25600, train_loss: 1.32882, acc: 0.45895


INFO:root:[0] Ed: 28800, train_loss: 1.32776, acc: 0.46035


[INFO 2025-02-19 16:28:09,998] [0] Ed: 28800, train_loss: 1.32776, acc: 0.46035
[INFO 2025-02-19 16:28:09,998] [0] Ed: 28800, train_loss: 1.32776, acc: 0.46035


INFO:root:[0] Ed: 32000, train_loss: 1.32698, acc: 0.46097


[INFO 2025-02-19 16:28:25,198] [0] Ed: 32000, train_loss: 1.32698, acc: 0.46097
[INFO 2025-02-19 16:28:25,198] [0] Ed: 32000, train_loss: 1.32698, acc: 0.46097


INFO:root:[0] Ed: 35200, train_loss: 1.32796, acc: 0.45986


[INFO 2025-02-19 16:28:40,895] [0] Ed: 35200, train_loss: 1.32796, acc: 0.45986
[INFO 2025-02-19 16:28:40,895] [0] Ed: 35200, train_loss: 1.32796, acc: 0.45986


INFO:root:[0] Ed: 38400, train_loss: 1.32706, acc: 0.45922


[INFO 2025-02-19 16:28:55,762] [0] Ed: 38400, train_loss: 1.32706, acc: 0.45922
[INFO 2025-02-19 16:28:55,762] [0] Ed: 38400, train_loss: 1.32706, acc: 0.45922


INFO:root:[0] Ed: 41600, train_loss: 1.32452, acc: 0.46077


[INFO 2025-02-19 16:29:10,891] [0] Ed: 41600, train_loss: 1.32452, acc: 0.46077
[INFO 2025-02-19 16:29:10,891] [0] Ed: 41600, train_loss: 1.32452, acc: 0.46077


INFO:root:[0] Ed: 44800, train_loss: 1.32372, acc: 0.46065


[INFO 2025-02-19 16:29:25,866] [0] Ed: 44800, train_loss: 1.32372, acc: 0.46065
[INFO 2025-02-19 16:29:25,866] [0] Ed: 44800, train_loss: 1.32372, acc: 0.46065


INFO:root:[0] Ed: 48000, train_loss: 1.32377, acc: 0.46046


[INFO 2025-02-19 16:29:40,743] [0] Ed: 48000, train_loss: 1.32377, acc: 0.46046
[INFO 2025-02-19 16:29:40,743] [0] Ed: 48000, train_loss: 1.32377, acc: 0.46046


INFO:root:[0] Ed: 51200, train_loss: 1.32247, acc: 0.46129


[INFO 2025-02-19 16:29:56,717] [0] Ed: 51200, train_loss: 1.32247, acc: 0.46129
[INFO 2025-02-19 16:29:56,717] [0] Ed: 51200, train_loss: 1.32247, acc: 0.46129


INFO:root:[0] Ed: 54400, train_loss: 1.32143, acc: 0.46199


[INFO 2025-02-19 16:30:11,914] [0] Ed: 54400, train_loss: 1.32143, acc: 0.46199
[INFO 2025-02-19 16:30:11,914] [0] Ed: 54400, train_loss: 1.32143, acc: 0.46199


INFO:root:[0] Ed: 57600, train_loss: 1.32072, acc: 0.46313


[INFO 2025-02-19 16:30:27,186] [0] Ed: 57600, train_loss: 1.32072, acc: 0.46313
[INFO 2025-02-19 16:30:27,186] [0] Ed: 57600, train_loss: 1.32072, acc: 0.46313


INFO:root:[0] Ed: 60800, train_loss: 1.32036, acc: 0.46326


[INFO 2025-02-19 16:30:42,449] [0] Ed: 60800, train_loss: 1.32036, acc: 0.46326
[INFO 2025-02-19 16:30:42,449] [0] Ed: 60800, train_loss: 1.32036, acc: 0.46326


INFO:root:[0] Ed: 64000, train_loss: 1.31943, acc: 0.46422


[INFO 2025-02-19 16:30:57,782] [0] Ed: 64000, train_loss: 1.31943, acc: 0.46422
[INFO 2025-02-19 16:30:57,782] [0] Ed: 64000, train_loss: 1.31943, acc: 0.46422


INFO:root:[0] Ed: 67200, train_loss: 1.31850, acc: 0.46503


[INFO 2025-02-19 16:31:13,338] [0] Ed: 67200, train_loss: 1.31850, acc: 0.46503
[INFO 2025-02-19 16:31:13,338] [0] Ed: 67200, train_loss: 1.31850, acc: 0.46503


INFO:root:[0] Ed: 70400, train_loss: 1.31756, acc: 0.46547


[INFO 2025-02-19 16:31:28,653] [0] Ed: 70400, train_loss: 1.31756, acc: 0.46547
[INFO 2025-02-19 16:31:28,653] [0] Ed: 70400, train_loss: 1.31756, acc: 0.46547


INFO:root:[0] Ed: 73600, train_loss: 1.31731, acc: 0.46538


[INFO 2025-02-19 16:31:43,537] [0] Ed: 73600, train_loss: 1.31731, acc: 0.46538
[INFO 2025-02-19 16:31:43,537] [0] Ed: 73600, train_loss: 1.31731, acc: 0.46538


INFO:root:[0] Ed: 76800, train_loss: 1.31585, acc: 0.46600


[INFO 2025-02-19 16:31:58,531] [0] Ed: 76800, train_loss: 1.31585, acc: 0.46600
[INFO 2025-02-19 16:31:58,531] [0] Ed: 76800, train_loss: 1.31585, acc: 0.46600


INFO:root:[0] Ed: 80000, train_loss: 1.31584, acc: 0.46576


[INFO 2025-02-19 16:32:13,737] [0] Ed: 80000, train_loss: 1.31584, acc: 0.46576
[INFO 2025-02-19 16:32:13,737] [0] Ed: 80000, train_loss: 1.31584, acc: 0.46576


INFO:root:[0] Ed: 83200, train_loss: 1.31477, acc: 0.46641


[INFO 2025-02-19 16:32:29,548] [0] Ed: 83200, train_loss: 1.31477, acc: 0.46641
[INFO 2025-02-19 16:32:29,548] [0] Ed: 83200, train_loss: 1.31477, acc: 0.46641


INFO:root:[0] Ed: 86400, train_loss: 1.31410, acc: 0.46637


[INFO 2025-02-19 16:32:44,534] [0] Ed: 86400, train_loss: 1.31410, acc: 0.46637
[INFO 2025-02-19 16:32:44,534] [0] Ed: 86400, train_loss: 1.31410, acc: 0.46637


INFO:root:[0] Ed: 89600, train_loss: 1.31329, acc: 0.46698


[INFO 2025-02-19 16:32:59,559] [0] Ed: 89600, train_loss: 1.31329, acc: 0.46698
[INFO 2025-02-19 16:32:59,559] [0] Ed: 89600, train_loss: 1.31329, acc: 0.46698


INFO:root:[0] Ed: 92800, train_loss: 1.31360, acc: 0.46669


[INFO 2025-02-19 16:33:14,538] [0] Ed: 92800, train_loss: 1.31360, acc: 0.46669
[INFO 2025-02-19 16:33:14,538] [0] Ed: 92800, train_loss: 1.31360, acc: 0.46669


INFO:root:[0] Ed: 96000, train_loss: 1.31288, acc: 0.46733


[INFO 2025-02-19 16:33:29,603] [0] Ed: 96000, train_loss: 1.31288, acc: 0.46733
[INFO 2025-02-19 16:33:29,603] [0] Ed: 96000, train_loss: 1.31288, acc: 0.46733


INFO:root:[0] Ed: 99200, train_loss: 1.31283, acc: 0.46728


[INFO 2025-02-19 16:33:45,279] [0] Ed: 99200, train_loss: 1.31283, acc: 0.46728
[INFO 2025-02-19 16:33:45,279] [0] Ed: 99200, train_loss: 1.31283, acc: 0.46728


INFO:root:[0] Ed: 102400, train_loss: 1.31168, acc: 0.46782


[INFO 2025-02-19 16:34:00,319] [0] Ed: 102400, train_loss: 1.31168, acc: 0.46782
[INFO 2025-02-19 16:34:00,319] [0] Ed: 102400, train_loss: 1.31168, acc: 0.46782


INFO:root:[0] Ed: 105600, train_loss: 1.31041, acc: 0.46847


[INFO 2025-02-19 16:34:15,326] [0] Ed: 105600, train_loss: 1.31041, acc: 0.46847
[INFO 2025-02-19 16:34:15,326] [0] Ed: 105600, train_loss: 1.31041, acc: 0.46847


INFO:root:[0] Ed: 108800, train_loss: 1.31035, acc: 0.46846


[INFO 2025-02-19 16:34:30,347] [0] Ed: 108800, train_loss: 1.31035, acc: 0.46846
[INFO 2025-02-19 16:34:30,347] [0] Ed: 108800, train_loss: 1.31035, acc: 0.46846


INFO:root:[0] Ed: 112000, train_loss: 1.31011, acc: 0.46854


[INFO 2025-02-19 16:34:45,259] [0] Ed: 112000, train_loss: 1.31011, acc: 0.46854
[INFO 2025-02-19 16:34:45,259] [0] Ed: 112000, train_loss: 1.31011, acc: 0.46854


INFO:root:[0] Ed: 115200, train_loss: 1.30979, acc: 0.46841


[INFO 2025-02-19 16:35:01,333] [0] Ed: 115200, train_loss: 1.30979, acc: 0.46841
[INFO 2025-02-19 16:35:01,333] [0] Ed: 115200, train_loss: 1.30979, acc: 0.46841


INFO:root:[0] Ed: 118400, train_loss: 1.30890, acc: 0.46910


[INFO 2025-02-19 16:35:16,362] [0] Ed: 118400, train_loss: 1.30890, acc: 0.46910
[INFO 2025-02-19 16:35:16,362] [0] Ed: 118400, train_loss: 1.30890, acc: 0.46910


INFO:root:[0] Ed: 121600, train_loss: 1.30876, acc: 0.46911


[INFO 2025-02-19 16:35:31,379] [0] Ed: 121600, train_loss: 1.30876, acc: 0.46911
[INFO 2025-02-19 16:35:31,379] [0] Ed: 121600, train_loss: 1.30876, acc: 0.46911


INFO:root:[0] Ed: 124800, train_loss: 1.30852, acc: 0.46893


[INFO 2025-02-19 16:35:46,385] [0] Ed: 124800, train_loss: 1.30852, acc: 0.46893
[INFO 2025-02-19 16:35:46,385] [0] Ed: 124800, train_loss: 1.30852, acc: 0.46893


INFO:root:[0] Ed: 128000, train_loss: 1.30882, acc: 0.46886


[INFO 2025-02-19 16:36:01,393] [0] Ed: 128000, train_loss: 1.30882, acc: 0.46886
[INFO 2025-02-19 16:36:01,393] [0] Ed: 128000, train_loss: 1.30882, acc: 0.46886


INFO:root:[0] Ed: 131200, train_loss: 1.30839, acc: 0.46913


[INFO 2025-02-19 16:36:17,153] [0] Ed: 131200, train_loss: 1.30839, acc: 0.46913
[INFO 2025-02-19 16:36:17,153] [0] Ed: 131200, train_loss: 1.30839, acc: 0.46913


INFO:root:[0] Ed: 134400, train_loss: 1.30816, acc: 0.46931


[INFO 2025-02-19 16:36:32,096] [0] Ed: 134400, train_loss: 1.30816, acc: 0.46931
[INFO 2025-02-19 16:36:32,096] [0] Ed: 134400, train_loss: 1.30816, acc: 0.46931


INFO:root:[0] Ed: 137600, train_loss: 1.30828, acc: 0.46933


[INFO 2025-02-19 16:36:46,964] [0] Ed: 137600, train_loss: 1.30828, acc: 0.46933
[INFO 2025-02-19 16:36:46,964] [0] Ed: 137600, train_loss: 1.30828, acc: 0.46933


INFO:root:[0] Ed: 140800, train_loss: 1.30769, acc: 0.46975


[INFO 2025-02-19 16:37:01,992] [0] Ed: 140800, train_loss: 1.30769, acc: 0.46975
[INFO 2025-02-19 16:37:01,992] [0] Ed: 140800, train_loss: 1.30769, acc: 0.46975


INFO:root:[0] Ed: 144000, train_loss: 1.30734, acc: 0.46972


[INFO 2025-02-19 16:37:17,002] [0] Ed: 144000, train_loss: 1.30734, acc: 0.46972
[INFO 2025-02-19 16:37:17,002] [0] Ed: 144000, train_loss: 1.30734, acc: 0.46972


INFO:root:[0] Ed: 147200, train_loss: 1.30705, acc: 0.46980


[INFO 2025-02-19 16:37:32,752] [0] Ed: 147200, train_loss: 1.30705, acc: 0.46980
[INFO 2025-02-19 16:37:32,752] [0] Ed: 147200, train_loss: 1.30705, acc: 0.46980


INFO:root:[0] Ed: 150400, train_loss: 1.30647, acc: 0.46997


[INFO 2025-02-19 16:37:47,782] [0] Ed: 150400, train_loss: 1.30647, acc: 0.46997
[INFO 2025-02-19 16:37:47,782] [0] Ed: 150400, train_loss: 1.30647, acc: 0.46997


INFO:root:[0] Ed: 153600, train_loss: 1.30598, acc: 0.47010


[INFO 2025-02-19 16:38:02,824] [0] Ed: 153600, train_loss: 1.30598, acc: 0.47010
[INFO 2025-02-19 16:38:02,824] [0] Ed: 153600, train_loss: 1.30598, acc: 0.47010


INFO:root:[0] Ed: 156800, train_loss: 1.30543, acc: 0.47050


[INFO 2025-02-19 16:38:17,905] [0] Ed: 156800, train_loss: 1.30543, acc: 0.47050
[INFO 2025-02-19 16:38:17,905] [0] Ed: 156800, train_loss: 1.30543, acc: 0.47050


INFO:root:[0] Ed: 160000, train_loss: 1.30510, acc: 0.47083


[INFO 2025-02-19 16:38:32,905] [0] Ed: 160000, train_loss: 1.30510, acc: 0.47083
[INFO 2025-02-19 16:38:32,905] [0] Ed: 160000, train_loss: 1.30510, acc: 0.47083


INFO:root:[0] Ed: 163200, train_loss: 1.30480, acc: 0.47094


[INFO 2025-02-19 16:38:48,682] [0] Ed: 163200, train_loss: 1.30480, acc: 0.47094
[INFO 2025-02-19 16:38:48,682] [0] Ed: 163200, train_loss: 1.30480, acc: 0.47094


INFO:root:[0] Ed: 166400, train_loss: 1.30484, acc: 0.47082


[INFO 2025-02-19 16:39:03,694] [0] Ed: 166400, train_loss: 1.30484, acc: 0.47082
[INFO 2025-02-19 16:39:03,694] [0] Ed: 166400, train_loss: 1.30484, acc: 0.47082


INFO:root:[0] Ed: 169600, train_loss: 1.30467, acc: 0.47104


[INFO 2025-02-19 16:39:18,641] [0] Ed: 169600, train_loss: 1.30467, acc: 0.47104
[INFO 2025-02-19 16:39:18,641] [0] Ed: 169600, train_loss: 1.30467, acc: 0.47104


INFO:root:[0] Ed: 172800, train_loss: 1.30413, acc: 0.47123


[INFO 2025-02-19 16:39:33,583] [0] Ed: 172800, train_loss: 1.30413, acc: 0.47123
[INFO 2025-02-19 16:39:33,583] [0] Ed: 172800, train_loss: 1.30413, acc: 0.47123


INFO:root:[0] Ed: 176000, train_loss: 1.30390, acc: 0.47132


[INFO 2025-02-19 16:39:48,382] [0] Ed: 176000, train_loss: 1.30390, acc: 0.47132
[INFO 2025-02-19 16:39:48,382] [0] Ed: 176000, train_loss: 1.30390, acc: 0.47132


INFO:root:[0] Ed: 179200, train_loss: 1.30346, acc: 0.47162


[INFO 2025-02-19 16:40:03,749] [0] Ed: 179200, train_loss: 1.30346, acc: 0.47162
[INFO 2025-02-19 16:40:03,749] [0] Ed: 179200, train_loss: 1.30346, acc: 0.47162


INFO:root:[0] Ed: 182400, train_loss: 1.30331, acc: 0.47177


[INFO 2025-02-19 16:40:18,900] [0] Ed: 182400, train_loss: 1.30331, acc: 0.47177
[INFO 2025-02-19 16:40:18,900] [0] Ed: 182400, train_loss: 1.30331, acc: 0.47177


INFO:root:[0] Ed: 185600, train_loss: 1.30285, acc: 0.47199


[INFO 2025-02-19 16:40:33,875] [0] Ed: 185600, train_loss: 1.30285, acc: 0.47199
[INFO 2025-02-19 16:40:33,875] [0] Ed: 185600, train_loss: 1.30285, acc: 0.47199


INFO:root:[0] Ed: 188800, train_loss: 1.30258, acc: 0.47228


[INFO 2025-02-19 16:40:48,966] [0] Ed: 188800, train_loss: 1.30258, acc: 0.47228
[INFO 2025-02-19 16:40:48,966] [0] Ed: 188800, train_loss: 1.30258, acc: 0.47228


INFO:root:[0] Ed: 192000, train_loss: 1.30259, acc: 0.47222


[INFO 2025-02-19 16:41:03,869] [0] Ed: 192000, train_loss: 1.30259, acc: 0.47222
[INFO 2025-02-19 16:41:03,869] [0] Ed: 192000, train_loss: 1.30259, acc: 0.47222


INFO:root:[0] Ed: 195200, train_loss: 1.30231, acc: 0.47248


[INFO 2025-02-19 16:41:19,004] [0] Ed: 195200, train_loss: 1.30231, acc: 0.47248
[INFO 2025-02-19 16:41:19,004] [0] Ed: 195200, train_loss: 1.30231, acc: 0.47248


INFO:root:[0] Ed: 198400, train_loss: 1.30219, acc: 0.47272


[INFO 2025-02-19 16:41:34,392] [0] Ed: 198400, train_loss: 1.30219, acc: 0.47272
[INFO 2025-02-19 16:41:34,392] [0] Ed: 198400, train_loss: 1.30219, acc: 0.47272


INFO:root:[0] Ed: 201600, train_loss: 1.30191, acc: 0.47276


[INFO 2025-02-19 16:41:49,218] [0] Ed: 201600, train_loss: 1.30191, acc: 0.47276
[INFO 2025-02-19 16:41:49,218] [0] Ed: 201600, train_loss: 1.30191, acc: 0.47276


INFO:root:[0] Ed: 204800, train_loss: 1.30191, acc: 0.47269


[INFO 2025-02-19 16:42:04,054] [0] Ed: 204800, train_loss: 1.30191, acc: 0.47269
[INFO 2025-02-19 16:42:04,054] [0] Ed: 204800, train_loss: 1.30191, acc: 0.47269


INFO:root:[0] Ed: 208000, train_loss: 1.30187, acc: 0.47276


[INFO 2025-02-19 16:42:18,940] [0] Ed: 208000, train_loss: 1.30187, acc: 0.47276
[INFO 2025-02-19 16:42:18,940] [0] Ed: 208000, train_loss: 1.30187, acc: 0.47276


INFO:root:[0] Ed: 211200, train_loss: 1.30174, acc: 0.47285


[INFO 2025-02-19 16:42:33,872] [0] Ed: 211200, train_loss: 1.30174, acc: 0.47285
[INFO 2025-02-19 16:42:33,872] [0] Ed: 211200, train_loss: 1.30174, acc: 0.47285


INFO:root:[0] Ed: 214400, train_loss: 1.30160, acc: 0.47288


[INFO 2025-02-19 16:42:49,473] [0] Ed: 214400, train_loss: 1.30160, acc: 0.47288
[INFO 2025-02-19 16:42:49,473] [0] Ed: 214400, train_loss: 1.30160, acc: 0.47288


INFO:root:[0] Ed: 217600, train_loss: 1.30130, acc: 0.47294


[INFO 2025-02-19 16:43:04,355] [0] Ed: 217600, train_loss: 1.30130, acc: 0.47294
[INFO 2025-02-19 16:43:04,355] [0] Ed: 217600, train_loss: 1.30130, acc: 0.47294


INFO:root:[0] Ed: 220800, train_loss: 1.30095, acc: 0.47308


[INFO 2025-02-19 16:43:19,303] [0] Ed: 220800, train_loss: 1.30095, acc: 0.47308
[INFO 2025-02-19 16:43:19,303] [0] Ed: 220800, train_loss: 1.30095, acc: 0.47308


INFO:root:[0] Ed: 224000, train_loss: 1.30053, acc: 0.47326


[INFO 2025-02-19 16:43:34,302] [0] Ed: 224000, train_loss: 1.30053, acc: 0.47326
[INFO 2025-02-19 16:43:34,302] [0] Ed: 224000, train_loss: 1.30053, acc: 0.47326


INFO:root:[0] Ed: 227200, train_loss: 1.30009, acc: 0.47350


[INFO 2025-02-19 16:43:49,262] [0] Ed: 227200, train_loss: 1.30009, acc: 0.47350
[INFO 2025-02-19 16:43:49,262] [0] Ed: 227200, train_loss: 1.30009, acc: 0.47350


INFO:root:[0] Ed: 230400, train_loss: 1.29959, acc: 0.47361


[INFO 2025-02-19 16:44:04,806] [0] Ed: 230400, train_loss: 1.29959, acc: 0.47361
[INFO 2025-02-19 16:44:04,806] [0] Ed: 230400, train_loss: 1.29959, acc: 0.47361


INFO:root:[0] Ed: 233600, train_loss: 1.29898, acc: 0.47399


[INFO 2025-02-19 16:44:19,881] [0] Ed: 233600, train_loss: 1.29898, acc: 0.47399
[INFO 2025-02-19 16:44:19,881] [0] Ed: 233600, train_loss: 1.29898, acc: 0.47399


INFO:root:Training finish.


[INFO 2025-02-19 16:44:32,632] Training finish.
[INFO 2025-02-19 16:44:32,632] Training finish.


INFO:root:Model saved to /content/model/epoch-2.pt.


[INFO 2025-02-19 16:44:32,675] Model saved to /content/model/epoch-2.pt.
[INFO 2025-02-19 16:44:32,675] Model saved to /content/model/epoch-2.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-19 16:44:32,844] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-19 16:44:32,844] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.30297, acc: 0.48562


[INFO 2025-02-19 16:44:47,708] [0] Ed: 3200, train_loss: 1.30297, acc: 0.48562
[INFO 2025-02-19 16:44:47,708] [0] Ed: 3200, train_loss: 1.30297, acc: 0.48562


INFO:root:[0] Ed: 6400, train_loss: 1.28626, acc: 0.48781


[INFO 2025-02-19 16:45:02,669] [0] Ed: 6400, train_loss: 1.28626, acc: 0.48781
[INFO 2025-02-19 16:45:02,669] [0] Ed: 6400, train_loss: 1.28626, acc: 0.48781


INFO:root:[0] Ed: 9600, train_loss: 1.28647, acc: 0.48615


[INFO 2025-02-19 16:45:17,617] [0] Ed: 9600, train_loss: 1.28647, acc: 0.48615
[INFO 2025-02-19 16:45:17,617] [0] Ed: 9600, train_loss: 1.28647, acc: 0.48615


INFO:root:[0] Ed: 12800, train_loss: 1.28315, acc: 0.48844


[INFO 2025-02-19 16:45:33,211] [0] Ed: 12800, train_loss: 1.28315, acc: 0.48844
[INFO 2025-02-19 16:45:33,211] [0] Ed: 12800, train_loss: 1.28315, acc: 0.48844


INFO:root:[0] Ed: 16000, train_loss: 1.28569, acc: 0.48425


[INFO 2025-02-19 16:45:48,468] [0] Ed: 16000, train_loss: 1.28569, acc: 0.48425
[INFO 2025-02-19 16:45:48,468] [0] Ed: 16000, train_loss: 1.28569, acc: 0.48425


INFO:root:[0] Ed: 19200, train_loss: 1.28506, acc: 0.48448


[INFO 2025-02-19 16:46:03,414] [0] Ed: 19200, train_loss: 1.28506, acc: 0.48448
[INFO 2025-02-19 16:46:03,414] [0] Ed: 19200, train_loss: 1.28506, acc: 0.48448


INFO:root:[0] Ed: 22400, train_loss: 1.28443, acc: 0.48460


[INFO 2025-02-19 16:46:18,297] [0] Ed: 22400, train_loss: 1.28443, acc: 0.48460
[INFO 2025-02-19 16:46:18,297] [0] Ed: 22400, train_loss: 1.28443, acc: 0.48460


INFO:root:[0] Ed: 25600, train_loss: 1.28250, acc: 0.48590


[INFO 2025-02-19 16:46:33,436] [0] Ed: 25600, train_loss: 1.28250, acc: 0.48590
[INFO 2025-02-19 16:46:33,436] [0] Ed: 25600, train_loss: 1.28250, acc: 0.48590


INFO:root:[0] Ed: 28800, train_loss: 1.28214, acc: 0.48601


[INFO 2025-02-19 16:46:48,674] [0] Ed: 28800, train_loss: 1.28214, acc: 0.48601
[INFO 2025-02-19 16:46:48,674] [0] Ed: 28800, train_loss: 1.28214, acc: 0.48601


INFO:root:[0] Ed: 32000, train_loss: 1.28156, acc: 0.48503


[INFO 2025-02-19 16:47:03,961] [0] Ed: 32000, train_loss: 1.28156, acc: 0.48503
[INFO 2025-02-19 16:47:03,961] [0] Ed: 32000, train_loss: 1.28156, acc: 0.48503


INFO:root:[0] Ed: 35200, train_loss: 1.28321, acc: 0.48327


[INFO 2025-02-19 16:47:18,777] [0] Ed: 35200, train_loss: 1.28321, acc: 0.48327
[INFO 2025-02-19 16:47:18,777] [0] Ed: 35200, train_loss: 1.28321, acc: 0.48327


INFO:root:[0] Ed: 38400, train_loss: 1.28229, acc: 0.48346


[INFO 2025-02-19 16:47:33,712] [0] Ed: 38400, train_loss: 1.28229, acc: 0.48346
[INFO 2025-02-19 16:47:33,712] [0] Ed: 38400, train_loss: 1.28229, acc: 0.48346


INFO:root:[0] Ed: 41600, train_loss: 1.28020, acc: 0.48495


[INFO 2025-02-19 16:47:48,572] [0] Ed: 41600, train_loss: 1.28020, acc: 0.48495
[INFO 2025-02-19 16:47:48,572] [0] Ed: 41600, train_loss: 1.28020, acc: 0.48495


INFO:root:[0] Ed: 44800, train_loss: 1.27932, acc: 0.48487


[INFO 2025-02-19 16:48:03,510] [0] Ed: 44800, train_loss: 1.27932, acc: 0.48487
[INFO 2025-02-19 16:48:03,510] [0] Ed: 44800, train_loss: 1.27932, acc: 0.48487


INFO:root:[0] Ed: 48000, train_loss: 1.27910, acc: 0.48479


[INFO 2025-02-19 16:48:19,090] [0] Ed: 48000, train_loss: 1.27910, acc: 0.48479
[INFO 2025-02-19 16:48:19,090] [0] Ed: 48000, train_loss: 1.27910, acc: 0.48479


INFO:root:[0] Ed: 51200, train_loss: 1.27821, acc: 0.48547


[INFO 2025-02-19 16:48:33,985] [0] Ed: 51200, train_loss: 1.27821, acc: 0.48547
[INFO 2025-02-19 16:48:33,985] [0] Ed: 51200, train_loss: 1.27821, acc: 0.48547


INFO:root:[0] Ed: 54400, train_loss: 1.27721, acc: 0.48649


[INFO 2025-02-19 16:48:48,829] [0] Ed: 54400, train_loss: 1.27721, acc: 0.48649
[INFO 2025-02-19 16:48:48,829] [0] Ed: 54400, train_loss: 1.27721, acc: 0.48649


INFO:root:[0] Ed: 57600, train_loss: 1.27669, acc: 0.48712


[INFO 2025-02-19 16:49:03,763] [0] Ed: 57600, train_loss: 1.27669, acc: 0.48712
[INFO 2025-02-19 16:49:03,763] [0] Ed: 57600, train_loss: 1.27669, acc: 0.48712


INFO:root:[0] Ed: 60800, train_loss: 1.27688, acc: 0.48692


[INFO 2025-02-19 16:49:18,637] [0] Ed: 60800, train_loss: 1.27688, acc: 0.48692
[INFO 2025-02-19 16:49:18,637] [0] Ed: 60800, train_loss: 1.27688, acc: 0.48692


INFO:root:[0] Ed: 64000, train_loss: 1.27581, acc: 0.48766


[INFO 2025-02-19 16:49:34,116] [0] Ed: 64000, train_loss: 1.27581, acc: 0.48766
[INFO 2025-02-19 16:49:34,116] [0] Ed: 64000, train_loss: 1.27581, acc: 0.48766


INFO:root:[0] Ed: 67200, train_loss: 1.27512, acc: 0.48844


[INFO 2025-02-19 16:49:49,161] [0] Ed: 67200, train_loss: 1.27512, acc: 0.48844
[INFO 2025-02-19 16:49:49,161] [0] Ed: 67200, train_loss: 1.27512, acc: 0.48844


INFO:root:[0] Ed: 70400, train_loss: 1.27434, acc: 0.48869


[INFO 2025-02-19 16:50:04,042] [0] Ed: 70400, train_loss: 1.27434, acc: 0.48869
[INFO 2025-02-19 16:50:04,042] [0] Ed: 70400, train_loss: 1.27434, acc: 0.48869


INFO:root:[0] Ed: 73600, train_loss: 1.27468, acc: 0.48817


[INFO 2025-02-19 16:50:18,954] [0] Ed: 73600, train_loss: 1.27468, acc: 0.48817
[INFO 2025-02-19 16:50:18,954] [0] Ed: 73600, train_loss: 1.27468, acc: 0.48817


INFO:root:[0] Ed: 76800, train_loss: 1.27374, acc: 0.48865


[INFO 2025-02-19 16:50:33,997] [0] Ed: 76800, train_loss: 1.27374, acc: 0.48865
[INFO 2025-02-19 16:50:33,997] [0] Ed: 76800, train_loss: 1.27374, acc: 0.48865


INFO:root:[0] Ed: 80000, train_loss: 1.27402, acc: 0.48830


[INFO 2025-02-19 16:50:49,275] [0] Ed: 80000, train_loss: 1.27402, acc: 0.48830
[INFO 2025-02-19 16:50:49,275] [0] Ed: 80000, train_loss: 1.27402, acc: 0.48830


INFO:root:[0] Ed: 83200, train_loss: 1.27328, acc: 0.48897


[INFO 2025-02-19 16:51:04,783] [0] Ed: 83200, train_loss: 1.27328, acc: 0.48897
[INFO 2025-02-19 16:51:04,783] [0] Ed: 83200, train_loss: 1.27328, acc: 0.48897


INFO:root:[0] Ed: 86400, train_loss: 1.27237, acc: 0.48931


[INFO 2025-02-19 16:51:19,680] [0] Ed: 86400, train_loss: 1.27237, acc: 0.48931
[INFO 2025-02-19 16:51:19,680] [0] Ed: 86400, train_loss: 1.27237, acc: 0.48931


INFO:root:[0] Ed: 89600, train_loss: 1.27128, acc: 0.49009


[INFO 2025-02-19 16:51:34,556] [0] Ed: 89600, train_loss: 1.27128, acc: 0.49009
[INFO 2025-02-19 16:51:34,556] [0] Ed: 89600, train_loss: 1.27128, acc: 0.49009


INFO:root:[0] Ed: 92800, train_loss: 1.27150, acc: 0.48982


[INFO 2025-02-19 16:51:49,475] [0] Ed: 92800, train_loss: 1.27150, acc: 0.48982
[INFO 2025-02-19 16:51:49,475] [0] Ed: 92800, train_loss: 1.27150, acc: 0.48982


INFO:root:[0] Ed: 96000, train_loss: 1.27111, acc: 0.48991


[INFO 2025-02-19 16:52:05,112] [0] Ed: 96000, train_loss: 1.27111, acc: 0.48991
[INFO 2025-02-19 16:52:05,112] [0] Ed: 96000, train_loss: 1.27111, acc: 0.48991


INFO:root:[0] Ed: 99200, train_loss: 1.27096, acc: 0.48993


[INFO 2025-02-19 16:52:21,098] [0] Ed: 99200, train_loss: 1.27096, acc: 0.48993
[INFO 2025-02-19 16:52:21,098] [0] Ed: 99200, train_loss: 1.27096, acc: 0.48993


INFO:root:[0] Ed: 102400, train_loss: 1.26982, acc: 0.49026


[INFO 2025-02-19 16:52:36,502] [0] Ed: 102400, train_loss: 1.26982, acc: 0.49026
[INFO 2025-02-19 16:52:36,502] [0] Ed: 102400, train_loss: 1.26982, acc: 0.49026


INFO:root:[0] Ed: 105600, train_loss: 1.26851, acc: 0.49095


[INFO 2025-02-19 16:52:51,792] [0] Ed: 105600, train_loss: 1.26851, acc: 0.49095
[INFO 2025-02-19 16:52:51,792] [0] Ed: 105600, train_loss: 1.26851, acc: 0.49095


INFO:root:[0] Ed: 108800, train_loss: 1.26859, acc: 0.49091


[INFO 2025-02-19 16:53:07,023] [0] Ed: 108800, train_loss: 1.26859, acc: 0.49091
[INFO 2025-02-19 16:53:07,023] [0] Ed: 108800, train_loss: 1.26859, acc: 0.49091


INFO:root:[0] Ed: 112000, train_loss: 1.26864, acc: 0.49071


[INFO 2025-02-19 16:53:22,960] [0] Ed: 112000, train_loss: 1.26864, acc: 0.49071
[INFO 2025-02-19 16:53:22,960] [0] Ed: 112000, train_loss: 1.26864, acc: 0.49071


INFO:root:[0] Ed: 115200, train_loss: 1.26825, acc: 0.49052


[INFO 2025-02-19 16:53:38,519] [0] Ed: 115200, train_loss: 1.26825, acc: 0.49052
[INFO 2025-02-19 16:53:38,519] [0] Ed: 115200, train_loss: 1.26825, acc: 0.49052


INFO:root:[0] Ed: 118400, train_loss: 1.26711, acc: 0.49101


[INFO 2025-02-19 16:53:53,682] [0] Ed: 118400, train_loss: 1.26711, acc: 0.49101
[INFO 2025-02-19 16:53:53,682] [0] Ed: 118400, train_loss: 1.26711, acc: 0.49101


INFO:root:[0] Ed: 121600, train_loss: 1.26695, acc: 0.49109


[INFO 2025-02-19 16:54:08,808] [0] Ed: 121600, train_loss: 1.26695, acc: 0.49109
[INFO 2025-02-19 16:54:08,808] [0] Ed: 121600, train_loss: 1.26695, acc: 0.49109


INFO:root:[0] Ed: 124800, train_loss: 1.26687, acc: 0.49111


[INFO 2025-02-19 16:54:23,930] [0] Ed: 124800, train_loss: 1.26687, acc: 0.49111
[INFO 2025-02-19 16:54:23,930] [0] Ed: 124800, train_loss: 1.26687, acc: 0.49111


INFO:root:[0] Ed: 128000, train_loss: 1.26729, acc: 0.49098


[INFO 2025-02-19 16:54:39,887] [0] Ed: 128000, train_loss: 1.26729, acc: 0.49098
[INFO 2025-02-19 16:54:39,887] [0] Ed: 128000, train_loss: 1.26729, acc: 0.49098


INFO:root:[0] Ed: 131200, train_loss: 1.26690, acc: 0.49113


[INFO 2025-02-19 16:54:55,061] [0] Ed: 131200, train_loss: 1.26690, acc: 0.49113
[INFO 2025-02-19 16:54:55,061] [0] Ed: 131200, train_loss: 1.26690, acc: 0.49113


INFO:root:[0] Ed: 134400, train_loss: 1.26695, acc: 0.49087


[INFO 2025-02-19 16:55:10,235] [0] Ed: 134400, train_loss: 1.26695, acc: 0.49087
[INFO 2025-02-19 16:55:10,235] [0] Ed: 134400, train_loss: 1.26695, acc: 0.49087


INFO:root:[0] Ed: 137600, train_loss: 1.26701, acc: 0.49086


[INFO 2025-02-19 16:55:25,275] [0] Ed: 137600, train_loss: 1.26701, acc: 0.49086
[INFO 2025-02-19 16:55:25,275] [0] Ed: 137600, train_loss: 1.26701, acc: 0.49086


INFO:root:[0] Ed: 140800, train_loss: 1.26657, acc: 0.49140


[INFO 2025-02-19 16:55:40,680] [0] Ed: 140800, train_loss: 1.26657, acc: 0.49140
[INFO 2025-02-19 16:55:40,680] [0] Ed: 140800, train_loss: 1.26657, acc: 0.49140


INFO:root:[0] Ed: 144000, train_loss: 1.26633, acc: 0.49120


[INFO 2025-02-19 16:55:56,399] [0] Ed: 144000, train_loss: 1.26633, acc: 0.49120
[INFO 2025-02-19 16:55:56,399] [0] Ed: 144000, train_loss: 1.26633, acc: 0.49120


INFO:root:[0] Ed: 147200, train_loss: 1.26605, acc: 0.49135


[INFO 2025-02-19 16:56:11,581] [0] Ed: 147200, train_loss: 1.26605, acc: 0.49135
[INFO 2025-02-19 16:56:11,581] [0] Ed: 147200, train_loss: 1.26605, acc: 0.49135


INFO:root:[0] Ed: 150400, train_loss: 1.26549, acc: 0.49150


[INFO 2025-02-19 16:56:26,641] [0] Ed: 150400, train_loss: 1.26549, acc: 0.49150
[INFO 2025-02-19 16:56:26,641] [0] Ed: 150400, train_loss: 1.26549, acc: 0.49150


INFO:root:[0] Ed: 153600, train_loss: 1.26506, acc: 0.49158


[INFO 2025-02-19 16:56:41,619] [0] Ed: 153600, train_loss: 1.26506, acc: 0.49158
[INFO 2025-02-19 16:56:41,619] [0] Ed: 153600, train_loss: 1.26506, acc: 0.49158


INFO:root:[0] Ed: 156800, train_loss: 1.26458, acc: 0.49180


[INFO 2025-02-19 16:56:56,805] [0] Ed: 156800, train_loss: 1.26458, acc: 0.49180
[INFO 2025-02-19 16:56:56,805] [0] Ed: 156800, train_loss: 1.26458, acc: 0.49180


INFO:root:[0] Ed: 160000, train_loss: 1.26417, acc: 0.49222


[INFO 2025-02-19 16:57:12,824] [0] Ed: 160000, train_loss: 1.26417, acc: 0.49222
[INFO 2025-02-19 16:57:12,824] [0] Ed: 160000, train_loss: 1.26417, acc: 0.49222


INFO:root:[0] Ed: 163200, train_loss: 1.26406, acc: 0.49224


[INFO 2025-02-19 16:57:28,384] [0] Ed: 163200, train_loss: 1.26406, acc: 0.49224
[INFO 2025-02-19 16:57:28,384] [0] Ed: 163200, train_loss: 1.26406, acc: 0.49224


INFO:root:[0] Ed: 166400, train_loss: 1.26425, acc: 0.49208


[INFO 2025-02-19 16:57:43,805] [0] Ed: 166400, train_loss: 1.26425, acc: 0.49208
[INFO 2025-02-19 16:57:43,805] [0] Ed: 166400, train_loss: 1.26425, acc: 0.49208


INFO:root:[0] Ed: 169600, train_loss: 1.26431, acc: 0.49228


[INFO 2025-02-19 16:57:59,364] [0] Ed: 169600, train_loss: 1.26431, acc: 0.49228
[INFO 2025-02-19 16:57:59,364] [0] Ed: 169600, train_loss: 1.26431, acc: 0.49228


INFO:root:[0] Ed: 172800, train_loss: 1.26397, acc: 0.49239


[INFO 2025-02-19 16:58:15,868] [0] Ed: 172800, train_loss: 1.26397, acc: 0.49239
[INFO 2025-02-19 16:58:15,868] [0] Ed: 172800, train_loss: 1.26397, acc: 0.49239


INFO:root:[0] Ed: 176000, train_loss: 1.26374, acc: 0.49251


[INFO 2025-02-19 16:58:31,296] [0] Ed: 176000, train_loss: 1.26374, acc: 0.49251
[INFO 2025-02-19 16:58:31,296] [0] Ed: 176000, train_loss: 1.26374, acc: 0.49251


INFO:root:[0] Ed: 179200, train_loss: 1.26344, acc: 0.49260


[INFO 2025-02-19 16:58:46,544] [0] Ed: 179200, train_loss: 1.26344, acc: 0.49260
[INFO 2025-02-19 16:58:46,544] [0] Ed: 179200, train_loss: 1.26344, acc: 0.49260


INFO:root:[0] Ed: 182400, train_loss: 1.26334, acc: 0.49270


[INFO 2025-02-19 16:59:01,924] [0] Ed: 182400, train_loss: 1.26334, acc: 0.49270
[INFO 2025-02-19 16:59:01,924] [0] Ed: 182400, train_loss: 1.26334, acc: 0.49270


INFO:root:[0] Ed: 185600, train_loss: 1.26289, acc: 0.49289


[INFO 2025-02-19 16:59:17,575] [0] Ed: 185600, train_loss: 1.26289, acc: 0.49289
[INFO 2025-02-19 16:59:17,575] [0] Ed: 185600, train_loss: 1.26289, acc: 0.49289


INFO:root:[0] Ed: 188800, train_loss: 1.26263, acc: 0.49307


[INFO 2025-02-19 16:59:32,759] [0] Ed: 188800, train_loss: 1.26263, acc: 0.49307
[INFO 2025-02-19 16:59:32,759] [0] Ed: 188800, train_loss: 1.26263, acc: 0.49307


INFO:root:[0] Ed: 192000, train_loss: 1.26268, acc: 0.49304


[INFO 2025-02-19 16:59:47,657] [0] Ed: 192000, train_loss: 1.26268, acc: 0.49304
[INFO 2025-02-19 16:59:47,657] [0] Ed: 192000, train_loss: 1.26268, acc: 0.49304


INFO:root:[0] Ed: 195200, train_loss: 1.26246, acc: 0.49318


[INFO 2025-02-19 17:00:02,581] [0] Ed: 195200, train_loss: 1.26246, acc: 0.49318
[INFO 2025-02-19 17:00:02,581] [0] Ed: 195200, train_loss: 1.26246, acc: 0.49318


INFO:root:[0] Ed: 198400, train_loss: 1.26239, acc: 0.49323


[INFO 2025-02-19 17:00:17,532] [0] Ed: 198400, train_loss: 1.26239, acc: 0.49323
[INFO 2025-02-19 17:00:17,532] [0] Ed: 198400, train_loss: 1.26239, acc: 0.49323


INFO:root:[0] Ed: 201600, train_loss: 1.26211, acc: 0.49333


[INFO 2025-02-19 17:00:33,195] [0] Ed: 201600, train_loss: 1.26211, acc: 0.49333
[INFO 2025-02-19 17:00:33,195] [0] Ed: 201600, train_loss: 1.26211, acc: 0.49333


INFO:root:[0] Ed: 204800, train_loss: 1.26221, acc: 0.49328


[INFO 2025-02-19 17:00:48,713] [0] Ed: 204800, train_loss: 1.26221, acc: 0.49328
[INFO 2025-02-19 17:00:48,713] [0] Ed: 204800, train_loss: 1.26221, acc: 0.49328


INFO:root:[0] Ed: 208000, train_loss: 1.26232, acc: 0.49324


[INFO 2025-02-19 17:01:04,210] [0] Ed: 208000, train_loss: 1.26232, acc: 0.49324
[INFO 2025-02-19 17:01:04,210] [0] Ed: 208000, train_loss: 1.26232, acc: 0.49324


INFO:root:[0] Ed: 211200, train_loss: 1.26228, acc: 0.49316


[INFO 2025-02-19 17:01:19,581] [0] Ed: 211200, train_loss: 1.26228, acc: 0.49316
[INFO 2025-02-19 17:01:19,581] [0] Ed: 211200, train_loss: 1.26228, acc: 0.49316


INFO:root:[0] Ed: 214400, train_loss: 1.26219, acc: 0.49319


[INFO 2025-02-19 17:01:35,356] [0] Ed: 214400, train_loss: 1.26219, acc: 0.49319
[INFO 2025-02-19 17:01:35,356] [0] Ed: 214400, train_loss: 1.26219, acc: 0.49319


INFO:root:[0] Ed: 217600, train_loss: 1.26200, acc: 0.49303


[INFO 2025-02-19 17:01:51,452] [0] Ed: 217600, train_loss: 1.26200, acc: 0.49303
[INFO 2025-02-19 17:01:51,452] [0] Ed: 217600, train_loss: 1.26200, acc: 0.49303


INFO:root:[0] Ed: 220800, train_loss: 1.26179, acc: 0.49301


[INFO 2025-02-19 17:02:06,896] [0] Ed: 220800, train_loss: 1.26179, acc: 0.49301
[INFO 2025-02-19 17:02:06,896] [0] Ed: 220800, train_loss: 1.26179, acc: 0.49301


INFO:root:[0] Ed: 224000, train_loss: 1.26132, acc: 0.49322


[INFO 2025-02-19 17:02:22,385] [0] Ed: 224000, train_loss: 1.26132, acc: 0.49322
[INFO 2025-02-19 17:02:22,385] [0] Ed: 224000, train_loss: 1.26132, acc: 0.49322


INFO:root:[0] Ed: 227200, train_loss: 1.26100, acc: 0.49339


[INFO 2025-02-19 17:02:37,716] [0] Ed: 227200, train_loss: 1.26100, acc: 0.49339
[INFO 2025-02-19 17:02:37,716] [0] Ed: 227200, train_loss: 1.26100, acc: 0.49339


INFO:root:[0] Ed: 230400, train_loss: 1.26060, acc: 0.49352


[INFO 2025-02-19 17:02:53,455] [0] Ed: 230400, train_loss: 1.26060, acc: 0.49352
[INFO 2025-02-19 17:02:53,455] [0] Ed: 230400, train_loss: 1.26060, acc: 0.49352


INFO:root:[0] Ed: 233600, train_loss: 1.26016, acc: 0.49368


[INFO 2025-02-19 17:03:08,710] [0] Ed: 233600, train_loss: 1.26016, acc: 0.49368
[INFO 2025-02-19 17:03:08,710] [0] Ed: 233600, train_loss: 1.26016, acc: 0.49368


INFO:root:Training finish.


[INFO 2025-02-19 17:03:21,699] Training finish.
[INFO 2025-02-19 17:03:21,699] Training finish.


INFO:root:Model saved to /content/model/epoch-3.pt.


[INFO 2025-02-19 17:03:21,745] Model saved to /content/model/epoch-3.pt.
[INFO 2025-02-19 17:03:21,745] Model saved to /content/model/epoch-3.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-19 17:03:21,909] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-19 17:03:21,909] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.26644, acc: 0.50313


[INFO 2025-02-19 17:03:36,959] [0] Ed: 3200, train_loss: 1.26644, acc: 0.50313
[INFO 2025-02-19 17:03:36,959] [0] Ed: 3200, train_loss: 1.26644, acc: 0.50313


INFO:root:[0] Ed: 6400, train_loss: 1.24762, acc: 0.50516


[INFO 2025-02-19 17:03:51,951] [0] Ed: 6400, train_loss: 1.24762, acc: 0.50516
[INFO 2025-02-19 17:03:51,951] [0] Ed: 6400, train_loss: 1.24762, acc: 0.50516


INFO:root:[0] Ed: 9600, train_loss: 1.24751, acc: 0.50458


[INFO 2025-02-19 17:04:07,163] [0] Ed: 9600, train_loss: 1.24751, acc: 0.50458
[INFO 2025-02-19 17:04:07,163] [0] Ed: 9600, train_loss: 1.24751, acc: 0.50458


INFO:root:[0] Ed: 12800, train_loss: 1.24455, acc: 0.50648


[INFO 2025-02-19 17:04:23,017] [0] Ed: 12800, train_loss: 1.24455, acc: 0.50648
[INFO 2025-02-19 17:04:23,017] [0] Ed: 12800, train_loss: 1.24455, acc: 0.50648


INFO:root:[0] Ed: 16000, train_loss: 1.24895, acc: 0.50144


[INFO 2025-02-19 17:04:38,101] [0] Ed: 16000, train_loss: 1.24895, acc: 0.50144
[INFO 2025-02-19 17:04:38,101] [0] Ed: 16000, train_loss: 1.24895, acc: 0.50144


INFO:root:[0] Ed: 19200, train_loss: 1.24942, acc: 0.49906


[INFO 2025-02-19 17:04:52,984] [0] Ed: 19200, train_loss: 1.24942, acc: 0.49906
[INFO 2025-02-19 17:04:52,984] [0] Ed: 19200, train_loss: 1.24942, acc: 0.49906


INFO:root:[0] Ed: 22400, train_loss: 1.24793, acc: 0.49897


[INFO 2025-02-19 17:05:07,949] [0] Ed: 22400, train_loss: 1.24793, acc: 0.49897
[INFO 2025-02-19 17:05:07,949] [0] Ed: 22400, train_loss: 1.24793, acc: 0.49897


INFO:root:[0] Ed: 25600, train_loss: 1.24553, acc: 0.50102


[INFO 2025-02-19 17:05:22,884] [0] Ed: 25600, train_loss: 1.24553, acc: 0.50102
[INFO 2025-02-19 17:05:22,884] [0] Ed: 25600, train_loss: 1.24553, acc: 0.50102


INFO:root:[0] Ed: 28800, train_loss: 1.24538, acc: 0.50181


[INFO 2025-02-19 17:05:38,501] [0] Ed: 28800, train_loss: 1.24538, acc: 0.50181
[INFO 2025-02-19 17:05:38,501] [0] Ed: 28800, train_loss: 1.24538, acc: 0.50181


INFO:root:[0] Ed: 32000, train_loss: 1.24612, acc: 0.50084


[INFO 2025-02-19 17:05:53,700] [0] Ed: 32000, train_loss: 1.24612, acc: 0.50084
[INFO 2025-02-19 17:05:53,700] [0] Ed: 32000, train_loss: 1.24612, acc: 0.50084


INFO:root:[0] Ed: 35200, train_loss: 1.24773, acc: 0.50014


[INFO 2025-02-19 17:06:08,686] [0] Ed: 35200, train_loss: 1.24773, acc: 0.50014
[INFO 2025-02-19 17:06:08,686] [0] Ed: 35200, train_loss: 1.24773, acc: 0.50014


INFO:root:[0] Ed: 38400, train_loss: 1.24617, acc: 0.50104


[INFO 2025-02-19 17:06:23,811] [0] Ed: 38400, train_loss: 1.24617, acc: 0.50104
[INFO 2025-02-19 17:06:23,811] [0] Ed: 38400, train_loss: 1.24617, acc: 0.50104


INFO:root:[0] Ed: 41600, train_loss: 1.24382, acc: 0.50221


[INFO 2025-02-19 17:06:38,799] [0] Ed: 41600, train_loss: 1.24382, acc: 0.50221
[INFO 2025-02-19 17:06:38,799] [0] Ed: 41600, train_loss: 1.24382, acc: 0.50221


INFO:root:[0] Ed: 44800, train_loss: 1.24292, acc: 0.50259


[INFO 2025-02-19 17:06:54,343] [0] Ed: 44800, train_loss: 1.24292, acc: 0.50259
[INFO 2025-02-19 17:06:54,343] [0] Ed: 44800, train_loss: 1.24292, acc: 0.50259


INFO:root:[0] Ed: 48000, train_loss: 1.24238, acc: 0.50233


[INFO 2025-02-19 17:07:09,461] [0] Ed: 48000, train_loss: 1.24238, acc: 0.50233
[INFO 2025-02-19 17:07:09,461] [0] Ed: 48000, train_loss: 1.24238, acc: 0.50233


INFO:root:[0] Ed: 51200, train_loss: 1.24184, acc: 0.50264


[INFO 2025-02-19 17:07:24,527] [0] Ed: 51200, train_loss: 1.24184, acc: 0.50264
[INFO 2025-02-19 17:07:24,527] [0] Ed: 51200, train_loss: 1.24184, acc: 0.50264


INFO:root:[0] Ed: 54400, train_loss: 1.24123, acc: 0.50301


[INFO 2025-02-19 17:07:39,757] [0] Ed: 54400, train_loss: 1.24123, acc: 0.50301
[INFO 2025-02-19 17:07:39,757] [0] Ed: 54400, train_loss: 1.24123, acc: 0.50301


INFO:root:[0] Ed: 57600, train_loss: 1.24106, acc: 0.50314


[INFO 2025-02-19 17:07:55,114] [0] Ed: 57600, train_loss: 1.24106, acc: 0.50314
[INFO 2025-02-19 17:07:55,114] [0] Ed: 57600, train_loss: 1.24106, acc: 0.50314


INFO:root:[0] Ed: 60800, train_loss: 1.24124, acc: 0.50265


[INFO 2025-02-19 17:08:11,114] [0] Ed: 60800, train_loss: 1.24124, acc: 0.50265
[INFO 2025-02-19 17:08:11,114] [0] Ed: 60800, train_loss: 1.24124, acc: 0.50265


INFO:root:[0] Ed: 64000, train_loss: 1.24027, acc: 0.50298


[INFO 2025-02-19 17:08:26,379] [0] Ed: 64000, train_loss: 1.24027, acc: 0.50298
[INFO 2025-02-19 17:08:26,379] [0] Ed: 64000, train_loss: 1.24027, acc: 0.50298


INFO:root:[0] Ed: 67200, train_loss: 1.23957, acc: 0.50339


[INFO 2025-02-19 17:08:41,593] [0] Ed: 67200, train_loss: 1.23957, acc: 0.50339
[INFO 2025-02-19 17:08:41,593] [0] Ed: 67200, train_loss: 1.23957, acc: 0.50339


INFO:root:[0] Ed: 70400, train_loss: 1.23880, acc: 0.50361


[INFO 2025-02-19 17:08:56,810] [0] Ed: 70400, train_loss: 1.23880, acc: 0.50361
[INFO 2025-02-19 17:08:56,810] [0] Ed: 70400, train_loss: 1.23880, acc: 0.50361


INFO:root:[0] Ed: 73600, train_loss: 1.23942, acc: 0.50306


[INFO 2025-02-19 17:09:12,255] [0] Ed: 73600, train_loss: 1.23942, acc: 0.50306
[INFO 2025-02-19 17:09:12,255] [0] Ed: 73600, train_loss: 1.23942, acc: 0.50306


INFO:root:[0] Ed: 76800, train_loss: 1.23848, acc: 0.50357


[INFO 2025-02-19 17:09:27,795] [0] Ed: 76800, train_loss: 1.23848, acc: 0.50357
[INFO 2025-02-19 17:09:27,795] [0] Ed: 76800, train_loss: 1.23848, acc: 0.50357


INFO:root:[0] Ed: 80000, train_loss: 1.23902, acc: 0.50335


[INFO 2025-02-19 17:09:43,060] [0] Ed: 80000, train_loss: 1.23902, acc: 0.50335
[INFO 2025-02-19 17:09:43,060] [0] Ed: 80000, train_loss: 1.23902, acc: 0.50335


INFO:root:[0] Ed: 83200, train_loss: 1.23830, acc: 0.50399


[INFO 2025-02-19 17:09:58,024] [0] Ed: 83200, train_loss: 1.23830, acc: 0.50399
[INFO 2025-02-19 17:09:58,024] [0] Ed: 83200, train_loss: 1.23830, acc: 0.50399


INFO:root:[0] Ed: 86400, train_loss: 1.23731, acc: 0.50448


[INFO 2025-02-19 17:10:12,842] [0] Ed: 86400, train_loss: 1.23731, acc: 0.50448
[INFO 2025-02-19 17:10:12,842] [0] Ed: 86400, train_loss: 1.23731, acc: 0.50448


INFO:root:[0] Ed: 89600, train_loss: 1.23602, acc: 0.50525


[INFO 2025-02-19 17:10:28,033] [0] Ed: 89600, train_loss: 1.23602, acc: 0.50525
[INFO 2025-02-19 17:10:28,033] [0] Ed: 89600, train_loss: 1.23602, acc: 0.50525


INFO:root:[0] Ed: 92800, train_loss: 1.23652, acc: 0.50494


[INFO 2025-02-19 17:10:43,497] [0] Ed: 92800, train_loss: 1.23652, acc: 0.50494
[INFO 2025-02-19 17:10:43,497] [0] Ed: 92800, train_loss: 1.23652, acc: 0.50494


INFO:root:[0] Ed: 96000, train_loss: 1.23615, acc: 0.50517


[INFO 2025-02-19 17:10:58,467] [0] Ed: 96000, train_loss: 1.23615, acc: 0.50517
[INFO 2025-02-19 17:10:58,467] [0] Ed: 96000, train_loss: 1.23615, acc: 0.50517


INFO:root:[0] Ed: 99200, train_loss: 1.23594, acc: 0.50529


[INFO 2025-02-19 17:11:13,331] [0] Ed: 99200, train_loss: 1.23594, acc: 0.50529
[INFO 2025-02-19 17:11:13,331] [0] Ed: 99200, train_loss: 1.23594, acc: 0.50529


INFO:root:[0] Ed: 102400, train_loss: 1.23477, acc: 0.50557


[INFO 2025-02-19 17:11:28,330] [0] Ed: 102400, train_loss: 1.23477, acc: 0.50557
[INFO 2025-02-19 17:11:28,330] [0] Ed: 102400, train_loss: 1.23477, acc: 0.50557


INFO:root:[0] Ed: 105600, train_loss: 1.23377, acc: 0.50599


[INFO 2025-02-19 17:11:43,242] [0] Ed: 105600, train_loss: 1.23377, acc: 0.50599
[INFO 2025-02-19 17:11:43,242] [0] Ed: 105600, train_loss: 1.23377, acc: 0.50599


INFO:root:[0] Ed: 108800, train_loss: 1.23373, acc: 0.50599


[INFO 2025-02-19 17:11:58,935] [0] Ed: 108800, train_loss: 1.23373, acc: 0.50599
[INFO 2025-02-19 17:11:58,935] [0] Ed: 108800, train_loss: 1.23373, acc: 0.50599


INFO:root:[0] Ed: 112000, train_loss: 1.23371, acc: 0.50621


[INFO 2025-02-19 17:12:14,010] [0] Ed: 112000, train_loss: 1.23371, acc: 0.50621
[INFO 2025-02-19 17:12:14,010] [0] Ed: 112000, train_loss: 1.23371, acc: 0.50621


INFO:root:[0] Ed: 115200, train_loss: 1.23334, acc: 0.50626


[INFO 2025-02-19 17:12:29,014] [0] Ed: 115200, train_loss: 1.23334, acc: 0.50626
[INFO 2025-02-19 17:12:29,014] [0] Ed: 115200, train_loss: 1.23334, acc: 0.50626


INFO:root:[0] Ed: 118400, train_loss: 1.23214, acc: 0.50676


[INFO 2025-02-19 17:12:44,007] [0] Ed: 118400, train_loss: 1.23214, acc: 0.50676
[INFO 2025-02-19 17:12:44,007] [0] Ed: 118400, train_loss: 1.23214, acc: 0.50676


INFO:root:[0] Ed: 121600, train_loss: 1.23187, acc: 0.50687


[INFO 2025-02-19 17:12:59,232] [0] Ed: 121600, train_loss: 1.23187, acc: 0.50687
[INFO 2025-02-19 17:12:59,232] [0] Ed: 121600, train_loss: 1.23187, acc: 0.50687


INFO:root:[0] Ed: 124800, train_loss: 1.23180, acc: 0.50693


[INFO 2025-02-19 17:13:15,117] [0] Ed: 124800, train_loss: 1.23180, acc: 0.50693
[INFO 2025-02-19 17:13:15,117] [0] Ed: 124800, train_loss: 1.23180, acc: 0.50693


INFO:root:[0] Ed: 128000, train_loss: 1.23215, acc: 0.50689


[INFO 2025-02-19 17:13:30,090] [0] Ed: 128000, train_loss: 1.23215, acc: 0.50689
[INFO 2025-02-19 17:13:30,090] [0] Ed: 128000, train_loss: 1.23215, acc: 0.50689


INFO:root:[0] Ed: 131200, train_loss: 1.23171, acc: 0.50727


[INFO 2025-02-19 17:13:44,955] [0] Ed: 131200, train_loss: 1.23171, acc: 0.50727
[INFO 2025-02-19 17:13:44,955] [0] Ed: 131200, train_loss: 1.23171, acc: 0.50727


INFO:root:[0] Ed: 134400, train_loss: 1.23182, acc: 0.50716


[INFO 2025-02-19 17:14:00,026] [0] Ed: 134400, train_loss: 1.23182, acc: 0.50716
[INFO 2025-02-19 17:14:00,026] [0] Ed: 134400, train_loss: 1.23182, acc: 0.50716


INFO:root:[0] Ed: 137600, train_loss: 1.23173, acc: 0.50734


[INFO 2025-02-19 17:14:14,984] [0] Ed: 137600, train_loss: 1.23173, acc: 0.50734
[INFO 2025-02-19 17:14:14,984] [0] Ed: 137600, train_loss: 1.23173, acc: 0.50734


INFO:root:[0] Ed: 140800, train_loss: 1.23120, acc: 0.50763


[INFO 2025-02-19 17:14:30,646] [0] Ed: 140800, train_loss: 1.23120, acc: 0.50763
[INFO 2025-02-19 17:14:30,646] [0] Ed: 140800, train_loss: 1.23120, acc: 0.50763


INFO:root:[0] Ed: 144000, train_loss: 1.23106, acc: 0.50770


[INFO 2025-02-19 17:14:45,585] [0] Ed: 144000, train_loss: 1.23106, acc: 0.50770
[INFO 2025-02-19 17:14:45,585] [0] Ed: 144000, train_loss: 1.23106, acc: 0.50770


INFO:root:[0] Ed: 147200, train_loss: 1.23076, acc: 0.50793


[INFO 2025-02-19 17:15:00,487] [0] Ed: 147200, train_loss: 1.23076, acc: 0.50793
[INFO 2025-02-19 17:15:00,487] [0] Ed: 147200, train_loss: 1.23076, acc: 0.50793


INFO:root:[0] Ed: 150400, train_loss: 1.23010, acc: 0.50830


[INFO 2025-02-19 17:15:15,364] [0] Ed: 150400, train_loss: 1.23010, acc: 0.50830
[INFO 2025-02-19 17:15:15,364] [0] Ed: 150400, train_loss: 1.23010, acc: 0.50830


INFO:root:[0] Ed: 153600, train_loss: 1.22971, acc: 0.50840


[INFO 2025-02-19 17:15:30,549] [0] Ed: 153600, train_loss: 1.22971, acc: 0.50840
[INFO 2025-02-19 17:15:30,549] [0] Ed: 153600, train_loss: 1.22971, acc: 0.50840


INFO:root:[0] Ed: 156800, train_loss: 1.22921, acc: 0.50844


[INFO 2025-02-19 17:15:45,819] [0] Ed: 156800, train_loss: 1.22921, acc: 0.50844
[INFO 2025-02-19 17:15:45,819] [0] Ed: 156800, train_loss: 1.22921, acc: 0.50844


INFO:root:[0] Ed: 160000, train_loss: 1.22872, acc: 0.50892


[INFO 2025-02-19 17:16:01,035] [0] Ed: 160000, train_loss: 1.22872, acc: 0.50892
[INFO 2025-02-19 17:16:01,035] [0] Ed: 160000, train_loss: 1.22872, acc: 0.50892


INFO:root:[0] Ed: 163200, train_loss: 1.22856, acc: 0.50893


[INFO 2025-02-19 17:16:16,181] [0] Ed: 163200, train_loss: 1.22856, acc: 0.50893
[INFO 2025-02-19 17:16:16,181] [0] Ed: 163200, train_loss: 1.22856, acc: 0.50893


INFO:root:[0] Ed: 166400, train_loss: 1.22870, acc: 0.50873


[INFO 2025-02-19 17:16:31,101] [0] Ed: 166400, train_loss: 1.22870, acc: 0.50873
[INFO 2025-02-19 17:16:31,101] [0] Ed: 166400, train_loss: 1.22870, acc: 0.50873


INFO:root:[0] Ed: 169600, train_loss: 1.22892, acc: 0.50857


[INFO 2025-02-19 17:16:46,105] [0] Ed: 169600, train_loss: 1.22892, acc: 0.50857
[INFO 2025-02-19 17:16:46,105] [0] Ed: 169600, train_loss: 1.22892, acc: 0.50857


INFO:root:[0] Ed: 172800, train_loss: 1.22835, acc: 0.50891


[INFO 2025-02-19 17:17:01,678] [0] Ed: 172800, train_loss: 1.22835, acc: 0.50891
[INFO 2025-02-19 17:17:01,678] [0] Ed: 172800, train_loss: 1.22835, acc: 0.50891


INFO:root:[0] Ed: 176000, train_loss: 1.22819, acc: 0.50915


[INFO 2025-02-19 17:17:16,927] [0] Ed: 176000, train_loss: 1.22819, acc: 0.50915
[INFO 2025-02-19 17:17:16,927] [0] Ed: 176000, train_loss: 1.22819, acc: 0.50915


INFO:root:[0] Ed: 179200, train_loss: 1.22792, acc: 0.50942


[INFO 2025-02-19 17:17:31,842] [0] Ed: 179200, train_loss: 1.22792, acc: 0.50942
[INFO 2025-02-19 17:17:31,842] [0] Ed: 179200, train_loss: 1.22792, acc: 0.50942


INFO:root:[0] Ed: 182400, train_loss: 1.22794, acc: 0.50950


[INFO 2025-02-19 17:17:46,670] [0] Ed: 182400, train_loss: 1.22794, acc: 0.50950
[INFO 2025-02-19 17:17:46,670] [0] Ed: 182400, train_loss: 1.22794, acc: 0.50950


INFO:root:[0] Ed: 185600, train_loss: 1.22770, acc: 0.50947


[INFO 2025-02-19 17:18:01,566] [0] Ed: 185600, train_loss: 1.22770, acc: 0.50947
[INFO 2025-02-19 17:18:01,566] [0] Ed: 185600, train_loss: 1.22770, acc: 0.50947


INFO:root:[0] Ed: 188800, train_loss: 1.22738, acc: 0.50977


[INFO 2025-02-19 17:18:16,529] [0] Ed: 188800, train_loss: 1.22738, acc: 0.50977
[INFO 2025-02-19 17:18:16,529] [0] Ed: 188800, train_loss: 1.22738, acc: 0.50977


INFO:root:[0] Ed: 192000, train_loss: 1.22758, acc: 0.50964


[INFO 2025-02-19 17:18:32,093] [0] Ed: 192000, train_loss: 1.22758, acc: 0.50964
[INFO 2025-02-19 17:18:32,093] [0] Ed: 192000, train_loss: 1.22758, acc: 0.50964


INFO:root:[0] Ed: 195200, train_loss: 1.22746, acc: 0.50989


[INFO 2025-02-19 17:18:47,001] [0] Ed: 195200, train_loss: 1.22746, acc: 0.50989
[INFO 2025-02-19 17:18:47,001] [0] Ed: 195200, train_loss: 1.22746, acc: 0.50989


INFO:root:[0] Ed: 198400, train_loss: 1.22747, acc: 0.50992


[INFO 2025-02-19 17:19:01,874] [0] Ed: 198400, train_loss: 1.22747, acc: 0.50992
[INFO 2025-02-19 17:19:01,874] [0] Ed: 198400, train_loss: 1.22747, acc: 0.50992


INFO:root:[0] Ed: 201600, train_loss: 1.22720, acc: 0.51000


[INFO 2025-02-19 17:19:16,738] [0] Ed: 201600, train_loss: 1.22720, acc: 0.51000
[INFO 2025-02-19 17:19:16,738] [0] Ed: 201600, train_loss: 1.22720, acc: 0.51000


INFO:root:[0] Ed: 204800, train_loss: 1.22720, acc: 0.51000


[INFO 2025-02-19 17:19:31,642] [0] Ed: 204800, train_loss: 1.22720, acc: 0.51000
[INFO 2025-02-19 17:19:31,642] [0] Ed: 204800, train_loss: 1.22720, acc: 0.51000


INFO:root:[0] Ed: 208000, train_loss: 1.22743, acc: 0.50994


[INFO 2025-02-19 17:19:47,221] [0] Ed: 208000, train_loss: 1.22743, acc: 0.50994
[INFO 2025-02-19 17:19:47,221] [0] Ed: 208000, train_loss: 1.22743, acc: 0.50994


INFO:root:[0] Ed: 211200, train_loss: 1.22744, acc: 0.50997


[INFO 2025-02-19 17:20:02,169] [0] Ed: 211200, train_loss: 1.22744, acc: 0.50997
[INFO 2025-02-19 17:20:02,169] [0] Ed: 211200, train_loss: 1.22744, acc: 0.50997


INFO:root:[0] Ed: 214400, train_loss: 1.22732, acc: 0.50990


[INFO 2025-02-19 17:20:16,994] [0] Ed: 214400, train_loss: 1.22732, acc: 0.50990
[INFO 2025-02-19 17:20:16,994] [0] Ed: 214400, train_loss: 1.22732, acc: 0.50990


INFO:root:[0] Ed: 217600, train_loss: 1.22713, acc: 0.50991


[INFO 2025-02-19 17:20:31,940] [0] Ed: 217600, train_loss: 1.22713, acc: 0.50991
[INFO 2025-02-19 17:20:31,940] [0] Ed: 217600, train_loss: 1.22713, acc: 0.50991


INFO:root:[0] Ed: 220800, train_loss: 1.22686, acc: 0.50998


[INFO 2025-02-19 17:20:46,768] [0] Ed: 220800, train_loss: 1.22686, acc: 0.50998
[INFO 2025-02-19 17:20:46,768] [0] Ed: 220800, train_loss: 1.22686, acc: 0.50998


INFO:root:[0] Ed: 224000, train_loss: 1.22650, acc: 0.51019


[INFO 2025-02-19 17:21:02,092] [0] Ed: 224000, train_loss: 1.22650, acc: 0.51019
[INFO 2025-02-19 17:21:02,092] [0] Ed: 224000, train_loss: 1.22650, acc: 0.51019


INFO:root:[0] Ed: 227200, train_loss: 1.22620, acc: 0.51029


[INFO 2025-02-19 17:21:17,550] [0] Ed: 227200, train_loss: 1.22620, acc: 0.51029
[INFO 2025-02-19 17:21:17,550] [0] Ed: 227200, train_loss: 1.22620, acc: 0.51029


INFO:root:[0] Ed: 230400, train_loss: 1.22585, acc: 0.51039


[INFO 2025-02-19 17:21:32,500] [0] Ed: 230400, train_loss: 1.22585, acc: 0.51039
[INFO 2025-02-19 17:21:32,500] [0] Ed: 230400, train_loss: 1.22585, acc: 0.51039


INFO:root:[0] Ed: 233600, train_loss: 1.22538, acc: 0.51052


[INFO 2025-02-19 17:21:47,387] [0] Ed: 233600, train_loss: 1.22538, acc: 0.51052
[INFO 2025-02-19 17:21:47,387] [0] Ed: 233600, train_loss: 1.22538, acc: 0.51052


INFO:root:Training finish.


[INFO 2025-02-19 17:22:00,200] Training finish.
[INFO 2025-02-19 17:22:00,200] Training finish.


INFO:root:Model saved to /content/model/epoch-4.pt.


[INFO 2025-02-19 17:22:00,247] Model saved to /content/model/epoch-4.pt.
[INFO 2025-02-19 17:22:00,247] Model saved to /content/model/epoch-4.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-19 17:22:00,425] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-19 17:22:00,425] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.23119, acc: 0.52812


[INFO 2025-02-19 17:22:15,312] [0] Ed: 3200, train_loss: 1.23119, acc: 0.52812
[INFO 2025-02-19 17:22:15,312] [0] Ed: 3200, train_loss: 1.23119, acc: 0.52812


INFO:root:[0] Ed: 6400, train_loss: 1.21153, acc: 0.52531


[INFO 2025-02-19 17:22:30,750] [0] Ed: 6400, train_loss: 1.21153, acc: 0.52531
[INFO 2025-02-19 17:22:30,750] [0] Ed: 6400, train_loss: 1.21153, acc: 0.52531


INFO:root:[0] Ed: 9600, train_loss: 1.21444, acc: 0.52219


[INFO 2025-02-19 17:22:45,914] [0] Ed: 9600, train_loss: 1.21444, acc: 0.52219
[INFO 2025-02-19 17:22:45,914] [0] Ed: 9600, train_loss: 1.21444, acc: 0.52219


INFO:root:[0] Ed: 12800, train_loss: 1.21100, acc: 0.52508


[INFO 2025-02-19 17:23:00,863] [0] Ed: 12800, train_loss: 1.21100, acc: 0.52508
[INFO 2025-02-19 17:23:00,863] [0] Ed: 12800, train_loss: 1.21100, acc: 0.52508


INFO:root:[0] Ed: 16000, train_loss: 1.21595, acc: 0.51988


[INFO 2025-02-19 17:23:15,835] [0] Ed: 16000, train_loss: 1.21595, acc: 0.51988
[INFO 2025-02-19 17:23:15,835] [0] Ed: 16000, train_loss: 1.21595, acc: 0.51988


INFO:root:[0] Ed: 19200, train_loss: 1.21589, acc: 0.51854


[INFO 2025-02-19 17:23:30,800] [0] Ed: 19200, train_loss: 1.21589, acc: 0.51854
[INFO 2025-02-19 17:23:30,800] [0] Ed: 19200, train_loss: 1.21589, acc: 0.51854


INFO:root:[0] Ed: 22400, train_loss: 1.21387, acc: 0.51929


[INFO 2025-02-19 17:23:45,901] [0] Ed: 22400, train_loss: 1.21387, acc: 0.51929
[INFO 2025-02-19 17:23:45,901] [0] Ed: 22400, train_loss: 1.21387, acc: 0.51929


INFO:root:[0] Ed: 25600, train_loss: 1.21050, acc: 0.52121


[INFO 2025-02-19 17:24:01,408] [0] Ed: 25600, train_loss: 1.21050, acc: 0.52121
[INFO 2025-02-19 17:24:01,408] [0] Ed: 25600, train_loss: 1.21050, acc: 0.52121


INFO:root:[0] Ed: 28800, train_loss: 1.21128, acc: 0.52028


[INFO 2025-02-19 17:24:16,306] [0] Ed: 28800, train_loss: 1.21128, acc: 0.52028
[INFO 2025-02-19 17:24:16,306] [0] Ed: 28800, train_loss: 1.21128, acc: 0.52028


INFO:root:[0] Ed: 32000, train_loss: 1.21166, acc: 0.52003


[INFO 2025-02-19 17:24:31,253] [0] Ed: 32000, train_loss: 1.21166, acc: 0.52003
[INFO 2025-02-19 17:24:31,253] [0] Ed: 32000, train_loss: 1.21166, acc: 0.52003


INFO:root:[0] Ed: 35200, train_loss: 1.21381, acc: 0.51778


[INFO 2025-02-19 17:24:46,097] [0] Ed: 35200, train_loss: 1.21381, acc: 0.51778
[INFO 2025-02-19 17:24:46,097] [0] Ed: 35200, train_loss: 1.21381, acc: 0.51778


INFO:root:[0] Ed: 38400, train_loss: 1.21253, acc: 0.51766


[INFO 2025-02-19 17:25:00,994] [0] Ed: 38400, train_loss: 1.21253, acc: 0.51766
[INFO 2025-02-19 17:25:00,994] [0] Ed: 38400, train_loss: 1.21253, acc: 0.51766


INFO:root:[0] Ed: 41600, train_loss: 1.21048, acc: 0.51820


[INFO 2025-02-19 17:25:16,636] [0] Ed: 41600, train_loss: 1.21048, acc: 0.51820
[INFO 2025-02-19 17:25:16,636] [0] Ed: 41600, train_loss: 1.21048, acc: 0.51820


INFO:root:[0] Ed: 44800, train_loss: 1.21021, acc: 0.51839


[INFO 2025-02-19 17:25:31,596] [0] Ed: 44800, train_loss: 1.21021, acc: 0.51839
[INFO 2025-02-19 17:25:31,596] [0] Ed: 44800, train_loss: 1.21021, acc: 0.51839


INFO:root:[0] Ed: 48000, train_loss: 1.21024, acc: 0.51871


[INFO 2025-02-19 17:25:46,588] [0] Ed: 48000, train_loss: 1.21024, acc: 0.51871
[INFO 2025-02-19 17:25:46,588] [0] Ed: 48000, train_loss: 1.21024, acc: 0.51871


INFO:root:[0] Ed: 51200, train_loss: 1.20972, acc: 0.51861


[INFO 2025-02-19 17:26:01,478] [0] Ed: 51200, train_loss: 1.20972, acc: 0.51861
[INFO 2025-02-19 17:26:01,478] [0] Ed: 51200, train_loss: 1.20972, acc: 0.51861


INFO:root:[0] Ed: 54400, train_loss: 1.20943, acc: 0.51877


[INFO 2025-02-19 17:26:16,361] [0] Ed: 54400, train_loss: 1.20943, acc: 0.51877
[INFO 2025-02-19 17:26:16,361] [0] Ed: 54400, train_loss: 1.20943, acc: 0.51877


INFO:root:[0] Ed: 57600, train_loss: 1.20900, acc: 0.51932


[INFO 2025-02-19 17:26:31,986] [0] Ed: 57600, train_loss: 1.20900, acc: 0.51932
[INFO 2025-02-19 17:26:31,986] [0] Ed: 57600, train_loss: 1.20900, acc: 0.51932


INFO:root:[0] Ed: 60800, train_loss: 1.20897, acc: 0.51936


[INFO 2025-02-19 17:26:47,081] [0] Ed: 60800, train_loss: 1.20897, acc: 0.51936
[INFO 2025-02-19 17:26:47,081] [0] Ed: 60800, train_loss: 1.20897, acc: 0.51936


INFO:root:[0] Ed: 64000, train_loss: 1.20804, acc: 0.52006


[INFO 2025-02-19 17:27:02,390] [0] Ed: 64000, train_loss: 1.20804, acc: 0.52006
[INFO 2025-02-19 17:27:02,390] [0] Ed: 64000, train_loss: 1.20804, acc: 0.52006


INFO:root:[0] Ed: 67200, train_loss: 1.20742, acc: 0.52040


[INFO 2025-02-19 17:27:17,405] [0] Ed: 67200, train_loss: 1.20742, acc: 0.52040
[INFO 2025-02-19 17:27:17,405] [0] Ed: 67200, train_loss: 1.20742, acc: 0.52040


INFO:root:[0] Ed: 70400, train_loss: 1.20670, acc: 0.52050


[INFO 2025-02-19 17:27:32,459] [0] Ed: 70400, train_loss: 1.20670, acc: 0.52050
[INFO 2025-02-19 17:27:32,459] [0] Ed: 70400, train_loss: 1.20670, acc: 0.52050


INFO:root:[0] Ed: 73600, train_loss: 1.20752, acc: 0.52004


[INFO 2025-02-19 17:27:48,119] [0] Ed: 73600, train_loss: 1.20752, acc: 0.52004
[INFO 2025-02-19 17:27:48,119] [0] Ed: 73600, train_loss: 1.20752, acc: 0.52004


INFO:root:[0] Ed: 76800, train_loss: 1.20669, acc: 0.52053


[INFO 2025-02-19 17:28:03,113] [0] Ed: 76800, train_loss: 1.20669, acc: 0.52053
[INFO 2025-02-19 17:28:03,113] [0] Ed: 76800, train_loss: 1.20669, acc: 0.52053


INFO:root:[0] Ed: 80000, train_loss: 1.20699, acc: 0.52046


[INFO 2025-02-19 17:28:18,022] [0] Ed: 80000, train_loss: 1.20699, acc: 0.52046
[INFO 2025-02-19 17:28:18,022] [0] Ed: 80000, train_loss: 1.20699, acc: 0.52046


INFO:root:[0] Ed: 83200, train_loss: 1.20622, acc: 0.52067


[INFO 2025-02-19 17:28:32,938] [0] Ed: 83200, train_loss: 1.20622, acc: 0.52067
[INFO 2025-02-19 17:28:32,938] [0] Ed: 83200, train_loss: 1.20622, acc: 0.52067


INFO:root:[0] Ed: 86400, train_loss: 1.20532, acc: 0.52095


[INFO 2025-02-19 17:28:47,852] [0] Ed: 86400, train_loss: 1.20532, acc: 0.52095
[INFO 2025-02-19 17:28:47,852] [0] Ed: 86400, train_loss: 1.20532, acc: 0.52095


INFO:root:[0] Ed: 89600, train_loss: 1.20433, acc: 0.52166


[INFO 2025-02-19 17:29:03,245] [0] Ed: 89600, train_loss: 1.20433, acc: 0.52166
[INFO 2025-02-19 17:29:03,245] [0] Ed: 89600, train_loss: 1.20433, acc: 0.52166


INFO:root:[0] Ed: 92800, train_loss: 1.20446, acc: 0.52143


[INFO 2025-02-19 17:29:18,482] [0] Ed: 92800, train_loss: 1.20446, acc: 0.52143
[INFO 2025-02-19 17:29:18,482] [0] Ed: 92800, train_loss: 1.20446, acc: 0.52143


INFO:root:[0] Ed: 96000, train_loss: 1.20418, acc: 0.52138


[INFO 2025-02-19 17:29:33,513] [0] Ed: 96000, train_loss: 1.20418, acc: 0.52138
[INFO 2025-02-19 17:29:33,513] [0] Ed: 96000, train_loss: 1.20418, acc: 0.52138


INFO:root:[0] Ed: 99200, train_loss: 1.20422, acc: 0.52124


[INFO 2025-02-19 17:29:48,373] [0] Ed: 99200, train_loss: 1.20422, acc: 0.52124
[INFO 2025-02-19 17:29:48,373] [0] Ed: 99200, train_loss: 1.20422, acc: 0.52124


INFO:root:[0] Ed: 102400, train_loss: 1.20318, acc: 0.52143


[INFO 2025-02-19 17:30:03,454] [0] Ed: 102400, train_loss: 1.20318, acc: 0.52143
[INFO 2025-02-19 17:30:03,454] [0] Ed: 102400, train_loss: 1.20318, acc: 0.52143


INFO:root:[0] Ed: 105600, train_loss: 1.20185, acc: 0.52197


[INFO 2025-02-19 17:30:19,020] [0] Ed: 105600, train_loss: 1.20185, acc: 0.52197
[INFO 2025-02-19 17:30:19,020] [0] Ed: 105600, train_loss: 1.20185, acc: 0.52197


INFO:root:[0] Ed: 108800, train_loss: 1.20179, acc: 0.52204


[INFO 2025-02-19 17:30:34,448] [0] Ed: 108800, train_loss: 1.20179, acc: 0.52204
[INFO 2025-02-19 17:30:34,448] [0] Ed: 108800, train_loss: 1.20179, acc: 0.52204


INFO:root:[0] Ed: 112000, train_loss: 1.20210, acc: 0.52198


[INFO 2025-02-19 17:30:49,315] [0] Ed: 112000, train_loss: 1.20210, acc: 0.52198
[INFO 2025-02-19 17:30:49,315] [0] Ed: 112000, train_loss: 1.20210, acc: 0.52198


INFO:root:[0] Ed: 115200, train_loss: 1.20155, acc: 0.52215


[INFO 2025-02-19 17:31:04,245] [0] Ed: 115200, train_loss: 1.20155, acc: 0.52215
[INFO 2025-02-19 17:31:04,245] [0] Ed: 115200, train_loss: 1.20155, acc: 0.52215


INFO:root:[0] Ed: 118400, train_loss: 1.20034, acc: 0.52242


[INFO 2025-02-19 17:31:19,189] [0] Ed: 118400, train_loss: 1.20034, acc: 0.52242
[INFO 2025-02-19 17:31:19,189] [0] Ed: 118400, train_loss: 1.20034, acc: 0.52242


INFO:root:[0] Ed: 121600, train_loss: 1.20005, acc: 0.52266


[INFO 2025-02-19 17:31:34,417] [0] Ed: 121600, train_loss: 1.20005, acc: 0.52266
[INFO 2025-02-19 17:31:34,417] [0] Ed: 121600, train_loss: 1.20005, acc: 0.52266


INFO:root:[0] Ed: 124800, train_loss: 1.19994, acc: 0.52264


[INFO 2025-02-19 17:31:49,830] [0] Ed: 124800, train_loss: 1.19994, acc: 0.52264
[INFO 2025-02-19 17:31:49,830] [0] Ed: 124800, train_loss: 1.19994, acc: 0.52264


INFO:root:[0] Ed: 128000, train_loss: 1.20014, acc: 0.52255


[INFO 2025-02-19 17:32:04,732] [0] Ed: 128000, train_loss: 1.20014, acc: 0.52255
[INFO 2025-02-19 17:32:04,732] [0] Ed: 128000, train_loss: 1.20014, acc: 0.52255


INFO:root:[0] Ed: 131200, train_loss: 1.19971, acc: 0.52267


[INFO 2025-02-19 17:32:19,813] [0] Ed: 131200, train_loss: 1.19971, acc: 0.52267
[INFO 2025-02-19 17:32:19,813] [0] Ed: 131200, train_loss: 1.19971, acc: 0.52267


INFO:root:[0] Ed: 134400, train_loss: 1.19967, acc: 0.52262


[INFO 2025-02-19 17:32:34,760] [0] Ed: 134400, train_loss: 1.19967, acc: 0.52262
[INFO 2025-02-19 17:32:34,760] [0] Ed: 134400, train_loss: 1.19967, acc: 0.52262


INFO:root:[0] Ed: 137600, train_loss: 1.19961, acc: 0.52282


[INFO 2025-02-19 17:32:50,070] [0] Ed: 137600, train_loss: 1.19961, acc: 0.52282
[INFO 2025-02-19 17:32:50,070] [0] Ed: 137600, train_loss: 1.19961, acc: 0.52282


INFO:root:[0] Ed: 140800, train_loss: 1.19877, acc: 0.52331


[INFO 2025-02-19 17:33:05,612] [0] Ed: 140800, train_loss: 1.19877, acc: 0.52331
[INFO 2025-02-19 17:33:05,612] [0] Ed: 140800, train_loss: 1.19877, acc: 0.52331


INFO:root:[0] Ed: 144000, train_loss: 1.19858, acc: 0.52335


[INFO 2025-02-19 17:33:20,585] [0] Ed: 144000, train_loss: 1.19858, acc: 0.52335
[INFO 2025-02-19 17:33:20,585] [0] Ed: 144000, train_loss: 1.19858, acc: 0.52335


INFO:root:[0] Ed: 147200, train_loss: 1.19827, acc: 0.52345


[INFO 2025-02-19 17:33:35,606] [0] Ed: 147200, train_loss: 1.19827, acc: 0.52345
[INFO 2025-02-19 17:33:35,606] [0] Ed: 147200, train_loss: 1.19827, acc: 0.52345


INFO:root:[0] Ed: 150400, train_loss: 1.19764, acc: 0.52366


[INFO 2025-02-19 17:33:50,451] [0] Ed: 150400, train_loss: 1.19764, acc: 0.52366
[INFO 2025-02-19 17:33:50,451] [0] Ed: 150400, train_loss: 1.19764, acc: 0.52366


INFO:root:[0] Ed: 153600, train_loss: 1.19736, acc: 0.52381


[INFO 2025-02-19 17:34:05,302] [0] Ed: 153600, train_loss: 1.19736, acc: 0.52381
[INFO 2025-02-19 17:34:05,302] [0] Ed: 153600, train_loss: 1.19736, acc: 0.52381


INFO:root:[0] Ed: 156800, train_loss: 1.19708, acc: 0.52379


[INFO 2025-02-19 17:34:20,878] [0] Ed: 156800, train_loss: 1.19708, acc: 0.52379
[INFO 2025-02-19 17:34:20,878] [0] Ed: 156800, train_loss: 1.19708, acc: 0.52379


INFO:root:[0] Ed: 160000, train_loss: 1.19659, acc: 0.52409


[INFO 2025-02-19 17:34:35,646] [0] Ed: 160000, train_loss: 1.19659, acc: 0.52409
[INFO 2025-02-19 17:34:35,646] [0] Ed: 160000, train_loss: 1.19659, acc: 0.52409


INFO:root:[0] Ed: 163200, train_loss: 1.19631, acc: 0.52410


[INFO 2025-02-19 17:34:50,436] [0] Ed: 163200, train_loss: 1.19631, acc: 0.52410
[INFO 2025-02-19 17:34:50,436] [0] Ed: 163200, train_loss: 1.19631, acc: 0.52410


INFO:root:[0] Ed: 166400, train_loss: 1.19659, acc: 0.52394


[INFO 2025-02-19 17:35:05,262] [0] Ed: 166400, train_loss: 1.19659, acc: 0.52394
[INFO 2025-02-19 17:35:05,262] [0] Ed: 166400, train_loss: 1.19659, acc: 0.52394


INFO:root:[0] Ed: 169600, train_loss: 1.19681, acc: 0.52393


[INFO 2025-02-19 17:35:20,031] [0] Ed: 169600, train_loss: 1.19681, acc: 0.52393
[INFO 2025-02-19 17:35:20,031] [0] Ed: 169600, train_loss: 1.19681, acc: 0.52393


INFO:root:[0] Ed: 172800, train_loss: 1.19641, acc: 0.52409


[INFO 2025-02-19 17:35:35,345] [0] Ed: 172800, train_loss: 1.19641, acc: 0.52409
[INFO 2025-02-19 17:35:35,345] [0] Ed: 172800, train_loss: 1.19641, acc: 0.52409


INFO:root:[0] Ed: 176000, train_loss: 1.19613, acc: 0.52413


[INFO 2025-02-19 17:35:50,531] [0] Ed: 176000, train_loss: 1.19613, acc: 0.52413
[INFO 2025-02-19 17:35:50,531] [0] Ed: 176000, train_loss: 1.19613, acc: 0.52413


INFO:root:[0] Ed: 179200, train_loss: 1.19574, acc: 0.52436


[INFO 2025-02-19 17:36:05,348] [0] Ed: 179200, train_loss: 1.19574, acc: 0.52436
[INFO 2025-02-19 17:36:05,348] [0] Ed: 179200, train_loss: 1.19574, acc: 0.52436


INFO:root:[0] Ed: 182400, train_loss: 1.19577, acc: 0.52441


[INFO 2025-02-19 17:36:20,125] [0] Ed: 182400, train_loss: 1.19577, acc: 0.52441
[INFO 2025-02-19 17:36:20,125] [0] Ed: 182400, train_loss: 1.19577, acc: 0.52441


INFO:root:[0] Ed: 185600, train_loss: 1.19548, acc: 0.52448


[INFO 2025-02-19 17:36:34,968] [0] Ed: 185600, train_loss: 1.19548, acc: 0.52448
[INFO 2025-02-19 17:36:34,968] [0] Ed: 185600, train_loss: 1.19548, acc: 0.52448


INFO:root:[0] Ed: 188800, train_loss: 1.19515, acc: 0.52479


[INFO 2025-02-19 17:36:49,711] [0] Ed: 188800, train_loss: 1.19515, acc: 0.52479
[INFO 2025-02-19 17:36:49,711] [0] Ed: 188800, train_loss: 1.19515, acc: 0.52479


INFO:root:[0] Ed: 192000, train_loss: 1.19539, acc: 0.52469


[INFO 2025-02-19 17:37:05,285] [0] Ed: 192000, train_loss: 1.19539, acc: 0.52469
[INFO 2025-02-19 17:37:05,285] [0] Ed: 192000, train_loss: 1.19539, acc: 0.52469


INFO:root:[0] Ed: 195200, train_loss: 1.19517, acc: 0.52490


[INFO 2025-02-19 17:37:20,174] [0] Ed: 195200, train_loss: 1.19517, acc: 0.52490
[INFO 2025-02-19 17:37:20,174] [0] Ed: 195200, train_loss: 1.19517, acc: 0.52490


INFO:root:[0] Ed: 198400, train_loss: 1.19523, acc: 0.52504


[INFO 2025-02-19 17:37:35,158] [0] Ed: 198400, train_loss: 1.19523, acc: 0.52504
[INFO 2025-02-19 17:37:35,158] [0] Ed: 198400, train_loss: 1.19523, acc: 0.52504


INFO:root:[0] Ed: 201600, train_loss: 1.19501, acc: 0.52514


[INFO 2025-02-19 17:37:50,106] [0] Ed: 201600, train_loss: 1.19501, acc: 0.52514
[INFO 2025-02-19 17:37:50,106] [0] Ed: 201600, train_loss: 1.19501, acc: 0.52514


INFO:root:[0] Ed: 204800, train_loss: 1.19505, acc: 0.52506


[INFO 2025-02-19 17:38:04,964] [0] Ed: 204800, train_loss: 1.19505, acc: 0.52506
[INFO 2025-02-19 17:38:04,964] [0] Ed: 204800, train_loss: 1.19505, acc: 0.52506


INFO:root:[0] Ed: 208000, train_loss: 1.19526, acc: 0.52498


[INFO 2025-02-19 17:38:20,271] [0] Ed: 208000, train_loss: 1.19526, acc: 0.52498
[INFO 2025-02-19 17:38:20,271] [0] Ed: 208000, train_loss: 1.19526, acc: 0.52498


INFO:root:[0] Ed: 211200, train_loss: 1.19539, acc: 0.52479


[INFO 2025-02-19 17:38:35,580] [0] Ed: 211200, train_loss: 1.19539, acc: 0.52479
[INFO 2025-02-19 17:38:35,580] [0] Ed: 211200, train_loss: 1.19539, acc: 0.52479


INFO:root:[0] Ed: 214400, train_loss: 1.19528, acc: 0.52485


[INFO 2025-02-19 17:38:50,577] [0] Ed: 214400, train_loss: 1.19528, acc: 0.52485
[INFO 2025-02-19 17:38:50,577] [0] Ed: 214400, train_loss: 1.19528, acc: 0.52485


INFO:root:[0] Ed: 217600, train_loss: 1.19504, acc: 0.52481


[INFO 2025-02-19 17:39:05,395] [0] Ed: 217600, train_loss: 1.19504, acc: 0.52481
[INFO 2025-02-19 17:39:05,395] [0] Ed: 217600, train_loss: 1.19504, acc: 0.52481


INFO:root:[0] Ed: 220800, train_loss: 1.19470, acc: 0.52487


[INFO 2025-02-19 17:39:20,239] [0] Ed: 220800, train_loss: 1.19470, acc: 0.52487
[INFO 2025-02-19 17:39:20,239] [0] Ed: 220800, train_loss: 1.19470, acc: 0.52487


INFO:root:[0] Ed: 224000, train_loss: 1.19426, acc: 0.52503


[INFO 2025-02-19 17:39:35,868] [0] Ed: 224000, train_loss: 1.19426, acc: 0.52503
[INFO 2025-02-19 17:39:35,868] [0] Ed: 224000, train_loss: 1.19426, acc: 0.52503


INFO:root:[0] Ed: 227200, train_loss: 1.19396, acc: 0.52522


[INFO 2025-02-19 17:39:51,400] [0] Ed: 227200, train_loss: 1.19396, acc: 0.52522
[INFO 2025-02-19 17:39:51,400] [0] Ed: 227200, train_loss: 1.19396, acc: 0.52522


INFO:root:[0] Ed: 230400, train_loss: 1.19371, acc: 0.52532


[INFO 2025-02-19 17:40:06,285] [0] Ed: 230400, train_loss: 1.19371, acc: 0.52532
[INFO 2025-02-19 17:40:06,285] [0] Ed: 230400, train_loss: 1.19371, acc: 0.52532


INFO:root:[0] Ed: 233600, train_loss: 1.19323, acc: 0.52553


[INFO 2025-02-19 17:40:21,087] [0] Ed: 233600, train_loss: 1.19323, acc: 0.52553
[INFO 2025-02-19 17:40:21,087] [0] Ed: 233600, train_loss: 1.19323, acc: 0.52553


INFO:root:Training finish.


[INFO 2025-02-19 17:40:33,786] Training finish.
[INFO 2025-02-19 17:40:33,786] Training finish.


INFO:root:Model saved to /content/model/epoch-5.pt.


[INFO 2025-02-19 17:40:33,831] Model saved to /content/model/epoch-5.pt.
[INFO 2025-02-19 17:40:33,831] Model saved to /content/model/epoch-5.pt.


In [None]:
def test(rank, args):
    is_distributed = False

    torch.cuda.set_device(rank)

    if args.load_ckpt_name is not None:
        ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)

    assert ckpt_path is not None, 'No checkpoint found.'
    checkpoint = torch.load(ckpt_path, map_location='cpu')

    subcategory_dict = checkpoint['subcategory_dict']
    category_dict = checkpoint['category_dict']
    word_dict = checkpoint['word_dict']

    dummy_embedding_matrix = np.zeros((len(word_dict) + 1, args.word_embedding_dim))
    model = NRMS(args, dummy_embedding_matrix)
    model.load_state_dict(checkpoint['model_state_dict'])
    logging.info(f"Model loaded from {ckpt_path}")

    if args.enable_gpu:
        model.cuda(rank)

    model.eval()
    torch.set_grad_enabled(False)

    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args.test_abstract_dir, args, mode='train')
    news_title, news_category, news_subcategory, news_abstract = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([x for x in [news_title] if x is not None], axis=-1)

    news_dataset = NewsDataset(news_combined)
    news_dataloader = DataLoader(news_dataset,
                                 batch_size=args.batch_size,
                                 num_workers=4)

    news_scoring = []
    with torch.no_grad():
        for input_ids in tqdm(news_dataloader):
            input_ids = input_ids.cuda(rank)
            news_vec = model.news_encoder(input_ids)
            news_vec = news_vec.to(torch.device("cpu")).detach().numpy()
            news_scoring.extend(news_vec)

    news_scoring = np.array(news_scoring)
    logging.info("news scoring num: {}".format(news_scoring.shape[0]))

    if rank == 0:
        doc_sim = 0
        for _ in tqdm(range(1000000)):
            i = random.randrange(1, len(news_scoring))
            j = random.randrange(1, len(news_scoring))
            if i != j:
                doc_sim += np.dot(news_scoring[i], news_scoring[j]) / (np.linalg.norm(news_scoring[i]) * np.linalg.norm(news_scoring[j]))
        logging.info(f'News doc-sim: {doc_sim / 1000000}')

    data_file_path = os.path.join(args.test_data_dir, f'behaviors_{rank}.tsv')

    def collate_fn(tuple_list):
        log_vecs = torch.FloatTensor([x[0] for x in tuple_list])
        # log_mask = torch.FloatTensor([x[1] for x in tuple_list])
        news_vecs = [x[1] for x in tuple_list]
        labels = [x[2] for x in tuple_list]
        return (log_vecs, news_vecs, labels)

    dataset = DatasetTest(data_file_path, news_index, news_scoring, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn)

    AUC = []
    MRR = []
    nDCG5 = []
    nDCG10 = []

    def print_metrics(rank, cnt, x):
        logging.info("[{}] {} samples: {}".format(rank, cnt, '\t'.join(["{:0.2f}".format(i * 100) for i in x])))

    def get_mean(arr):
        return [np.array(i).mean() for i in arr]

    def get_sum(arr):
        return [np.array(i).sum() for i in arr]

    local_sample_num = 0

    for cnt, (log_vecs, news_vecs, labels) in enumerate(dataloader):
        local_sample_num += log_vecs.shape[0]

        if args.enable_gpu:
            log_vecs = log_vecs.cuda(rank, non_blocking=True)

        user_vecs = model.user_encoder(log_vecs).to(torch.device("cpu")).detach().numpy()

        for user_vec, news_vec, label in zip(user_vecs, news_vecs, labels):
            if label.mean() == 0 or label.mean() == 1:
                continue

            score = np.dot(news_vec, user_vec)

            auc = roc_auc_score(label, score)
            mrr = mrr_score(label, score)
            ndcg5 = ndcg_score(label, score, k=5)
            ndcg10 = ndcg_score(label, score, k=10)

            AUC.append(auc)
            MRR.append(mrr)
            nDCG5.append(ndcg5)
            nDCG10.append(ndcg10)

        if cnt % args.log_steps == 0:
            print_metrics(rank, local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))

    logging.info('[{}] local_sample_num: {}'.format(rank, local_sample_num))
    if is_distributed:
        local_sample_num = torch.tensor(local_sample_num).cuda(rank)
        dist.reduce(local_sample_num, dst=0, op=dist.ReduceOp.SUM)
        local_metrics_sum = torch.FloatTensor(get_sum([AUC, MRR, nDCG5, nDCG10])).cuda(rank)
        dist.reduce(local_metrics_sum, dst=0, op=dist.ReduceOp.SUM)
        if rank == 0:
            print_metrics('*', local_sample_num, local_metrics_sum / local_sample_num)
    else:
        print_metrics('*', local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))


In [None]:
args.mode = 'test'
args.user_log_mask=True
args.batch_size=128
args.load_ckpt_name= 'epoch-5.pt'
args.prepare=True
if 'test' in args.mode:
        if args.prepare:
            logging.info('Preparing testing data...')
            total_sample_num = prepare_testing_data(args.test_data_dir, args.nGPU)
        else:
            total_sample_num = 0
            for i in range(args.nGPU):
                data_file_path = os.path.join(args.test_data_dir, f'behaviors_{i}.tsv')
                if not os.path.exists(data_file_path):
                    logging.error(f'Splited testing data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                    exit()
                result = subprocess.getoutput(f'wc -l {data_file_path}')
                total_sample_num += int(result.split(' ')[0])
            logging.info('Skip testing data preparation.')
        logging.info(f'{total_sample_num} testing samples in total.')



INFO:root:Preparing testing data...


[INFO 2025-02-19 17:40:34,938] Preparing testing data...
[INFO 2025-02-19 17:40:34,938] Preparing testing data...


73152it [00:04, 15900.24it/s]
INFO:root:Writing files...


[INFO 2025-02-19 17:40:39,870] Writing files...
[INFO 2025-02-19 17:40:39,870] Writing files...


INFO:root:73152 testing samples in total.


[INFO 2025-02-19 17:40:45,042] 73152 testing samples in total.
[INFO 2025-02-19 17:40:45,042] 73152 testing samples in total.


In [None]:
test(0, args)

  checkpoint = torch.load(ckpt_path, map_location='cpu')
INFO:root:Model loaded from /content/model/epoch-5.pt


[INFO 2025-02-19 17:43:09,698] Model loaded from /content/model/epoch-5.pt
[INFO 2025-02-19 17:43:09,698] Model loaded from /content/model/epoch-5.pt


51282it [00:04, 12052.85it/s]
100%|██████████| 51282/51282 [00:00<00:00, 187115.04it/s]
100%|██████████| 401/401 [00:01<00:00, 313.05it/s]
INFO:root:news scoring num: 51283


[INFO 2025-02-19 17:43:15,751] news scoring num: 51283
[INFO 2025-02-19 17:43:15,751] news scoring num: 51283


100%|██████████| 1000000/1000000 [00:10<00:00, 97864.41it/s]
INFO:root:News doc-sim: 0.05019143021884577


[INFO 2025-02-19 17:43:25,975] News doc-sim: 0.05019143021884577
[INFO 2025-02-19 17:43:25,975] News doc-sim: 0.05019143021884577


INFO:root:[0] 128 samples: 65.98	31.36	34.26	40.93


[INFO 2025-02-19 17:43:26,536] [0] 128 samples: 65.98	31.36	34.26	40.93
[INFO 2025-02-19 17:43:26,536] [0] 128 samples: 65.98	31.36	34.26	40.93


INFO:root:[0] 12928 samples: 64.16	30.57	33.30	39.53


[INFO 2025-02-19 17:44:32,354] [0] 12928 samples: 64.16	30.57	33.30	39.53
[INFO 2025-02-19 17:44:32,354] [0] 12928 samples: 64.16	30.57	33.30	39.53


INFO:root:[0] 25728 samples: 64.29	30.83	33.59	39.75


[INFO 2025-02-19 17:45:39,188] [0] 25728 samples: 64.29	30.83	33.59	39.75
[INFO 2025-02-19 17:45:39,188] [0] 25728 samples: 64.29	30.83	33.59	39.75


INFO:root:[0] 38528 samples: 64.24	30.72	33.50	39.67


[INFO 2025-02-19 17:46:44,761] [0] 38528 samples: 64.24	30.72	33.50	39.67
[INFO 2025-02-19 17:46:44,761] [0] 38528 samples: 64.24	30.72	33.50	39.67


INFO:root:[0] 51328 samples: 64.15	30.49	33.26	39.43


[INFO 2025-02-19 17:47:50,154] [0] 51328 samples: 64.15	30.49	33.26	39.43
[INFO 2025-02-19 17:47:50,154] [0] 51328 samples: 64.15	30.49	33.26	39.43


INFO:root:[0] 64128 samples: 64.11	30.54	33.30	39.48


[INFO 2025-02-19 17:48:55,845] [0] 64128 samples: 64.11	30.54	33.30	39.48
[INFO 2025-02-19 17:48:55,845] [0] 64128 samples: 64.11	30.54	33.30	39.48


INFO:root:[0] local_sample_num: 73152


[INFO 2025-02-19 17:49:42,951] [0] local_sample_num: 73152
[INFO 2025-02-19 17:49:42,951] [0] local_sample_num: 73152


INFO:root:[*] 73152 samples: 64.10	30.59	33.36	39.52


[INFO 2025-02-19 17:49:43,011] [*] 73152 samples: 64.10	30.59	33.36	39.52
[INFO 2025-02-19 17:49:43,011] [*] 73152 samples: 64.10	30.59	33.36	39.52
