In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from collections import Counter
import numpy as np
import random
import logging
import os
import torch.optim as optim
import torch.distributed as dist
from torch.utils.data import DataLoader
from pathlib import Path



In [None]:
from dataclasses import dataclass
from typing import Optional

@dataclass
class Args:
    nGPU: int = 1
    seed: int = 0
    prepare: bool = True
    mode: str = "train"
    train_data_dir: str = "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train"
    test_data_dir: str = "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev"
    train_abstract_dir: str = '/content/genAbs0.json'
    # "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/train_gen_abs.json"
    test_abstract_dir: str = '/content/genAbs0.json'
    # "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/Dev_gen_abs.json"
    model_dir: str = '/content/model'
    batch_size: int = 32
    npratio: int = 4
    enable_gpu: bool = True
    filter_num: int = 3
    log_steps: int = 100
    epochs: int = 5
    lr: float = 0.0003
    num_words_title: int = 20
    num_words_abstract: int = 50
    user_log_length: int = 50
    word_embedding_dim: int = 300
    glove_embedding_path: str = '/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt'
    freeze_embedding: bool = False
    news_dim: int = 400
    news_query_vector_dim: int = 200
    user_query_vector_dim: int = 200
    num_attention_heads: int = 20
    user_log_mask: bool = True # Changed to True to handle user history correctly
    drop_rate: float = 0.2
    save_steps: int = 10000
    start_epoch: int = 0
    load_ckpt_name: Optional[str] = None
    use_category: bool = True
    use_subcategory: bool = True
    use_abstract: bool = True
    use_custom_abstract: bool = True
    category_emb_dim: int = 100

def parse_args():
  return Args()


**Dataset.py**

In [None]:
from torch.utils.data import IterableDataset, Dataset
import numpy as np
import random


class DatasetTrain(IterableDataset):
    def __init__(self, filename, news_index, news_combined, args):
        super(DatasetTrain).__init__()
        self.filename = filename
        self.news_index = news_index
        self.news_combined = news_combined
        self.args = args

    def trans_to_nindex(self, nids):
        return [self.news_index[i] if i in self.news_index else 0 for i in nids]

    def pad_to_fix_len(self, x, fix_length, padding_front=True, padding_value=0):
        if padding_front:
            pad_x = [padding_value] * (fix_length - len(x)) + x[-fix_length:]
            mask = [0] * (fix_length - len(x)) + [1] * min(fix_length, len(x))
        else:
            pad_x = x[-fix_length:] + [padding_value] * (fix_length - len(x))
            mask = [1] * min(fix_length, len(x)) + [0] * (fix_length - len(x))
        return pad_x, np.array(mask, dtype='float32')

    def line_mapper(self, line):
        line = line.strip().split('\t')
        click_docs = line[3].split()
        sess_pos = line[4].split()
        sess_neg = line[5].split()

        click_docs, log_mask = self.pad_to_fix_len(self.trans_to_nindex(click_docs), self.args.user_log_length)
        user_feature = self.news_combined[click_docs]

        pos = self.trans_to_nindex(sess_pos)
        neg = self.trans_to_nindex(sess_neg)

        label = random.randint(0, self.args.npratio)
        sample_news = neg[:label] + pos + neg[label:]
        news_feature = self.news_combined[sample_news]

        return user_feature, log_mask, news_feature, label

    def __iter__(self):
        file_iter = open(self.filename)
        return map(self.line_mapper, file_iter)


class DatasetTest(DatasetTrain):
    def __init__(self, filename, news_index, news_scoring, args):
        super(DatasetTrain).__init__()
        self.filename = filename
        self.news_index = news_index
        self.news_scoring = news_scoring
        self.args = args

    def line_mapper(self, line):
        line = line.strip().split('\t')
        click_docs = line[3].split()
        click_docs, log_mask = self.pad_to_fix_len(self.trans_to_nindex(click_docs), self.args.user_log_length)
        user_feature = self.news_scoring[click_docs]

        candidate_news = self.trans_to_nindex([i.split('-')[0] for i in line[4].split()])
        labels = np.array([int(i.split('-')[1]) for i in line[4].split()])
        news_feature = self.news_scoring[candidate_news]

        return user_feature, log_mask, news_feature, labels

    def __iter__(self):
        file_iter = open(self.filename)
        return map(self.line_mapper, file_iter)


class NewsDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, idx):
        return self.data[idx]

    def __len__(self):
        return self.data.shape[0]


**Metric.py**

In [None]:
from sklearn.metrics import roc_auc_score
import numpy as np


def dcg_score(y_true, y_score, k=10):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2**y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)


def ndcg_score(y_true, y_score, k=10):
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best


def mrr_score(y_true, y_score):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)


def ctr_score(y_true, y_score, k=1):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    return np.mean(y_true)

def acc(y_true, y_hat):
    y_hat = torch.argmax(y_hat, dim=-1)
    tot = y_true.shape[0]
    hit = torch.sum(y_true == y_hat)
    return hit.data.float() * 1.0 / tot



**Ultis.py**

In [None]:
import logging
import argparse
import sys

def setuplogger():
    root = logging.getLogger()
    root.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter("[%(levelname)s %(asctime)s] %(message)s")
    handler.setFormatter(formatter)
    root.addHandler(handler)


def dump_args(args):
    for arg in dir(args):
        if not arg.startswith("_"):
            logging.info(f"args[{arg}]={getattr(args, arg)}")

def load_matrix(embedding_file_path, word_dict, word_embedding_dim):
    embedding_matrix = np.zeros(shape=(len(word_dict) + 1, word_embedding_dim))
    have_word = []
    if embedding_file_path is not None:
        with open(embedding_file_path, 'rb') as f:
            while True:
                line = f.readline()
                if len(line) == 0:
                    break
                line = line.split()
                word = line[0].decode()
                if word in word_dict:
                    index = word_dict[word]
                    tp = [float(x) for x in line[1:]]
                    embedding_matrix[index] = np.array(tp)
                    have_word.append(word)
    return embedding_matrix, have_word


def get_checkpoint(directory, ckpt_name):
    ckpt_path = os.path.join(directory, ckpt_name)
    if os.path.exists(ckpt_path):
        return ckpt_path
    else:
        return None


**Model_ultis.py**

In [None]:
from torch import nn
class AttentionPooling(nn.Module):
    def __init__(self, emb_size, hidden_size):
        super(AttentionPooling, self).__init__()
        self.att_fc1 = nn.Linear(emb_size, hidden_size)
        self.att_fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x, attn_mask=None):
        """
        Args:
            x: batch_size, candidate_size, emb_dim
            attn_mask: batch_size, candidate_size
        Returns:
            (shape) batch_size, emb_dim
        """
        e = self.att_fc1(x)
        e = nn.Tanh()(e)
        alpha = self.att_fc2(e)
        alpha = torch.exp(alpha)

        if attn_mask is not None:
            alpha = alpha * attn_mask.unsqueeze(2)

        alpha = alpha / (torch.sum(alpha, dim=1, keepdim=True) + 1e-8)
        x = torch.bmm(x.permute(0, 2, 1), alpha).squeeze(dim=-1)
        return x


# NAML.py

In [None]:
import torch
from torch import nn
import torch.nn.functional as F


class NewsEncoder(nn.Module):
    def __init__(self, args, embedding_matrix, num_category, num_subcategory):
        super(NewsEncoder, self).__init__()
        self.embedding_matrix = embedding_matrix
        self.drop_rate = args.drop_rate
        self.num_words_title = args.num_words_title
        self.use_category = args.use_category
        self.use_subcategory = args.use_subcategory
        self.use_abstract = args.use_abstract
        self.num_words_abstract = args.num_words_abstract
        if args.use_category:
            self.category_emb = nn.Embedding(num_category + 1, args.category_emb_dim, padding_idx=0)
            self.category_dense = nn.Linear(args.category_emb_dim, args.news_dim)
        if args.use_subcategory:
            self.subcategory_emb = nn.Embedding(num_subcategory + 1, args.category_emb_dim, padding_idx=0)
            self.subcategory_dense = nn.Linear(args.category_emb_dim, args.news_dim)
        if args.use_category or args.use_subcategory:
            self.final_attn = AttentionPooling(args.news_dim, args.news_query_vector_dim)
        self.cnn = nn.Conv1d(
            in_channels=args.word_embedding_dim,
            out_channels=args.news_dim,
            kernel_size=3,
            padding=1
        )
        self.attn = AttentionPooling(args.news_dim, args.news_query_vector_dim)

        if args.use_abstract:
            self.abstract_cnn = nn.Conv1d(
                in_channels=args.word_embedding_dim,
                out_channels=args.news_dim,
                kernel_size=3,
                padding=1
            )
            self.abstract_attn = AttentionPooling(args.news_dim, args.news_query_vector_dim)


    def forward(self, x, mask=None):
        '''
            x: batch_size, word_num
            mask: batch_size, word_num
        '''
        title = torch.narrow(x, -1, 0, self.num_words_title).long()
        word_vecs = F.dropout(self.embedding_matrix(title),
                              p=self.drop_rate,
                              training=self.training)
        context_word_vecs = self.cnn(word_vecs.transpose(1, 2)).transpose(1, 2)
        title_vecs = self.attn(context_word_vecs, mask)
        all_vecs = [title_vecs]

        start = self.num_words_title
        if self.use_category:
            category = torch.narrow(x, -1, start, 1).squeeze(dim=-1).long()
            category_vecs = self.category_dense(self.category_emb(category))
            all_vecs.append(category_vecs)
            start += 1
        if self.use_subcategory:
            subcategory = torch.narrow(x, -1, start, 1).squeeze(dim=-1).long()
            subcategory_vecs = self.subcategory_dense(self.subcategory_emb(subcategory))
            all_vecs.append(subcategory_vecs)

        if self.use_abstract:
            abstract = torch.narrow(x, -1, start, self.num_words_abstract).long()
            abstract_word_vecs = F.dropout(self.embedding_matrix(abstract),
                                           p=self.drop_rate,
                                           training=self.training)
            abstract_context_word_vecs = self.abstract_cnn(abstract_word_vecs.transpose(1, 2)).transpose(1, 2)
            abstract_vecs = self.abstract_attn(abstract_context_word_vecs, mask)
            all_vecs.append(abstract_vecs)

        if len(all_vecs) == 1:
            news_vecs = all_vecs[0]
        else:
            all_vecs = torch.stack(all_vecs, dim=1)
            news_vecs = self.final_attn(all_vecs)
        return news_vecs

class UserEncoder(nn.Module):
    def __init__(self, args):
        super(UserEncoder, self).__init__()
        self.args = args
        self.attn = AttentionPooling(args.news_dim, args.user_query_vector_dim)
        self.pad_doc = nn.Parameter(torch.empty(1, args.news_dim).uniform_(-1, 1)).type(torch.FloatTensor)

    def forward(self, news_vecs, log_mask=None):
        '''
            news_vecs: batch_size, history_num, news_dim
            log_mask: batch_size, history_num
        '''
        bz = news_vecs.shape[0]
        if self.args.user_log_mask:
            user_vec = self.attn(news_vecs, log_mask)
        else:
            padding_doc = self.pad_doc.unsqueeze(dim=0).expand(bz, self.args.user_log_length, -1)
            news_vecs = news_vecs * log_mask.unsqueeze(dim=-1) + padding_doc * (1 - log_mask.unsqueeze(dim=-1))
            user_vec = self.attn(news_vecs)
        return user_vec


class Model(torch.nn.Module):
    def __init__(self, args, embedding_matrix, num_category, num_subcategory, **kwargs):
        super(Model, self).__init__()
        self.args = args
        pretrained_word_embedding = torch.from_numpy(embedding_matrix).float()
        word_embedding = nn.Embedding.from_pretrained(pretrained_word_embedding,
                                                      freeze=args.freeze_embedding,
                                                      padding_idx=0)

        self.news_encoder = NewsEncoder(args, word_embedding, num_category, num_subcategory)
        self.user_encoder = UserEncoder(args)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, history, history_mask, candidate, label):
        '''
            history: batch_size, history_length, num_word_title
            history_mask: batch_size, history_length
            candidate: batch_size, 1+K, num_word_title
            label: batch_size, 1+K
        '''
        num_words = history.shape[-1]
        candidate_news = candidate.reshape(-1, num_words)
        print("Candidate News Shape Before Encoding:", candidate_news.shape)  # Expected: (batch_size × 5, num_words_title)
        print("Encoded Candidate News Shape:", self.news_encoder(candidate_news).shape)  # What is the actual output?

        candidate_news_vecs = self.news_encoder(candidate_news).reshape(-1, 1 + self.args.npratio, self.args.news_dim)

        history_news = history.reshape(-1, num_words)
        history_news_vecs = self.news_encoder(history_news).reshape(-1, self.args.user_log_length, self.args.news_dim)

        user_vec = self.user_encoder(history_news_vecs, history_mask)
        score = torch.bmm(candidate_news_vecs, user_vec.unsqueeze(dim=-1)).squeeze(dim=-1)
        loss = self.loss_fn(score, label)
        return loss, score


**preprocess.py**

In [None]:
from collections import Counter
from tqdm import tqdm
import numpy as np
from nltk.tokenize import word_tokenize
import json


def update_dict(dict, key, value=None):
    if key not in dict:
        if value is None:
            dict[key] = len(dict) + 1
        else:
            dict[key] = value


def read_custom_abstract(news_file, custom_abstract_dict):
    news = {}
    news_index = {}
    category_dict = {}
    subcategory_dict = {}
    word_cnt = {}

    with open(news_file, 'r', encoding='utf-8') as f:
        for line in f:
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, entity_title, entity_abstract = splited
            if doc_id in custom_abstract_dict:
                abstract = custom_abstract_dict[doc_id]
            news[doc_id] = [title.split(' '), category, subcategory, abstract.split(' ')]
            news_index[doc_id] = len(news_index) + 1
            for word in title.split(' '):
                if word not in word_cnt:
                    word_cnt[word] = 0
                word_cnt[word] += 1
            for word in abstract.split(' '):
                if word not in word_cnt:
                    word_cnt[word] = 0
                word_cnt[word] += 1
            if category not in category_dict:
                category_dict[category] = len(category_dict) + 1
            if subcategory not in subcategory_dict:
                subcategory_dict[subcategory] = len(subcategory_dict) + 1

    return news, news_index, category_dict, subcategory_dict, word_cnt

def read_news(news_path, abstract_path, args, mode='train'):
    news = {}
    category_dict = {}
    subcategory_dict = {}
    news_index = {}
    word_cnt = Counter()
    if args.use_custom_abstract:
      with open(abstract_path, 'r') as f:
          abs = json.load(f)
    with open(news_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, _, _ = splited
            update_dict(news_index, doc_id)

            title = title.lower()
            title = word_tokenize(title, language='english', preserve_line=True)

            update_dict(news, doc_id, [title, category, subcategory, abs[doc_id] if doc_id in abs else abstract])
            if mode == 'train':
                if args.use_category:
                    update_dict(category_dict, category)
                if args.use_subcategory:
                    update_dict(subcategory_dict, subcategory)
                word_cnt.update(title)

    if mode == 'train':
        word = [k for k, v in word_cnt.items() if v > args.filter_num]
        word_dict = {k: v for k, v in zip(word, range(1, len(word) + 1))}
        return news, news_index, category_dict, subcategory_dict, word_dict
    elif mode == 'test':
        return news, news_index
    else:
        assert False, 'Wrong mode!'


def get_doc_input(news, news_index, category_dict, subcategory_dict, word_dict, args):
    news_num = len(news) + 1
    news_title = np.zeros((news_num, args.num_words_title), dtype='int32')
    news_category = np.zeros((news_num, 1), dtype='int32') if args.use_category else None
    news_subcategory = np.zeros((news_num, 1), dtype='int32') if args.use_subcategory else None
    news_abstract = np.zeros((news_num, args.num_words_abstract), dtype='int32') if args.use_abstract else None

    for key in tqdm(news):
        title, category, subcategory, abstract = news[key]
        doc_index = news_index[key]

        for word_id in range(min(args.num_words_title, len(title))):
            if title[word_id] in word_dict:
                news_title[doc_index, word_id] = word_dict[title[word_id]]

        if args.use_category:
            news_category[doc_index, 0] = category_dict[category] if category in category_dict else 0
        if args.use_subcategory:
            news_subcategory[doc_index, 0] = subcategory_dict[subcategory] if subcategory in subcategory_dict else 0
        if args.use_abstract:
            for word_id in range(min(args.num_words_abstract, len(abstract))):
                if abstract[word_id] in word_dict:
                    news_abstract[doc_index, word_id] = word_dict[abstract[word_id]]

    return news_title, news_category, news_subcategory, news_abstract

**prepare_data.py**

In [None]:
import os
from tqdm import tqdm
import random
import logging


def get_sample(all_elements, num_sample):
    if num_sample > len(all_elements):
        return random.sample(all_elements * (num_sample // len(all_elements) + 1), num_sample)
    else:
        return random.sample(all_elements, num_sample)


def prepare_training_data(train_data_dir, nGPU, npratio, seed):
    random.seed(seed)
    behaviors = []

    behavior_file_path = os.path.join(train_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            iid, uid, time, history, imp = line.strip().split('\t')
            impressions = [x.split('-') for x in imp.split(' ')]
            pos, neg = [], []
            for news_ID, label in impressions:
                if label == '0':
                    neg.append(news_ID)
                elif label == '1':
                    pos.append(news_ID)
            if len(pos) == 0 or len(neg) == 0:
                continue
            for pos_id in pos:
                neg_candidate = get_sample(neg, npratio)
                neg_str = ' '.join(neg_candidate)
                new_line = '\t'.join([iid, uid, time, history, pos_id, neg_str]) + '\n'
                behaviors.append(new_line)

    random.shuffle(behaviors)

    behaviors_per_file = [[] for _ in range(nGPU)]
    for i, line in enumerate(behaviors):
        behaviors_per_file[i % nGPU].append(line)

    logging.info('Writing files...')
    for i in range(nGPU):
        processed_file_path = os.path.join(train_data_dir, f'behaviors_np{npratio}_{i}.tsv')
        with open(processed_file_path, 'w') as f:
            f.writelines(behaviors_per_file[i])

    return len(behaviors)


def prepare_testing_data(test_data_dir, nGPU):
    behaviors = [[] for _ in range(nGPU)]

    behavior_file_path = os.path.join(test_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(tqdm(f)):
            behaviors[i % nGPU].append(line)

    logging.info('Writing files...')
    for i in range(nGPU):
        processed_file_path = os.path.join(test_data_dir, f'behaviors_{i}.tsv')
        with open(processed_file_path, 'w') as f:
            f.writelines(behaviors[i])

    return sum([len(x) for x in behaviors])


In [None]:
def train(rank, args):

    is_distributed = False
    torch.cuda.set_device(rank)


    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args.train_abstract_dir, args, mode='train')

    news_title, news_category, news_subcategory, news_abstract = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory, news_abstract] if x is not None], axis=-1)

    if rank == 0:
        logging.info('Initializing word embedding matrix...')

    embedding_matrix, have_word = load_matrix(args.glove_embedding_path,
                                                    word_dict,
                                                    args.word_embedding_dim)
    if rank == 0:
        logging.info(f'Word dict length: {len(word_dict)}')
        logging.info(f'Have words: {len(have_word)}')
        logging.info(f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}')

    model = Model(args, embedding_matrix, len(category_dict), len(subcategory_dict))

    if args.load_ckpt_name is not None:
        ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
        checkpoint = torch.load(ckpt_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        logging.info(f"Model loaded from {ckpt_path}.")

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if args.enable_gpu:
        model = model.cuda(rank)

    if is_distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

    # if rank == 0:
    #     print(model)
    #     for name, param in model.named_parameters():
    #         print(name, param.requires_grad)

    data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv')

    dataset = DatasetTrain(data_file_path, news_index, news_combined, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size)

    logging.info('Training...')
    for ep in range(args.start_epoch, args.epochs):
        loss = 0.0
        accuary = 0.0
        for cnt, (log_ids, log_mask, input_ids, targets) in enumerate(dataloader):
            if args.enable_gpu:
                log_ids = log_ids.cuda(rank, non_blocking=True)
                log_mask = log_mask.cuda(rank, non_blocking=True)
                input_ids = input_ids.cuda(rank, non_blocking=True)
                targets = targets.cuda(rank, non_blocking=True)

            bz_loss, y_hat = model(log_ids, log_mask, input_ids, targets)
            loss += bz_loss.data.float()
            accuary += acc(targets, y_hat)
            optimizer.zero_grad()
            bz_loss.backward()
            optimizer.step()

            if cnt % args.log_steps == 0:
                logging.info(
                    '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
                        rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)
                )

            if rank == 0 and     cnt != 0 and cnt % args.save_steps == 0:
                ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
                torch.save(
                    {
                        'model_state_dict':
                            {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
                            if is_distributed else model.state_dict(),
                        'category_dict': category_dict,
                        'word_dict': word_dict,
                        'subcategory_dict': subcategory_dict
                    }, ckpt_path)
                logging.info(f"Model saved to {ckpt_path}.")

        logging.info('Training finish.')

        if rank == 0:
            ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
            torch.save(
                {
                    'model_state_dict':
                        {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
                        if is_distributed else model.state_dict(),
                    'category_dict': category_dict,
                    'subcategory_dict': subcategory_dict,
                    'word_dict': word_dict,
                }, ckpt_path)
            logging.info(f"Model saved to {ckpt_path}.")



In [None]:

    import subprocess
    setuplogger()
    args = parse_args()
    dump_args(args)
    random.seed(args.seed)
    torch.manual_seed(args.seed)

    Path(args.model_dir).mkdir(parents=True, exist_ok=True)




INFO:root:args[batch_size]=32


[INFO 2025-02-17 09:34:24,006] args[batch_size]=32
[INFO 2025-02-17 09:34:24,006] args[batch_size]=32


INFO:root:args[category_emb_dim]=100


[INFO 2025-02-17 09:34:24,011] args[category_emb_dim]=100
[INFO 2025-02-17 09:34:24,011] args[category_emb_dim]=100


INFO:root:args[drop_rate]=0.2


[INFO 2025-02-17 09:34:24,015] args[drop_rate]=0.2
[INFO 2025-02-17 09:34:24,015] args[drop_rate]=0.2


INFO:root:args[enable_gpu]=True


[INFO 2025-02-17 09:34:24,019] args[enable_gpu]=True
[INFO 2025-02-17 09:34:24,019] args[enable_gpu]=True


INFO:root:args[epochs]=5


[INFO 2025-02-17 09:34:24,022] args[epochs]=5
[INFO 2025-02-17 09:34:24,022] args[epochs]=5


INFO:root:args[filter_num]=3


[INFO 2025-02-17 09:34:24,026] args[filter_num]=3
[INFO 2025-02-17 09:34:24,026] args[filter_num]=3


INFO:root:args[freeze_embedding]=False


[INFO 2025-02-17 09:34:24,030] args[freeze_embedding]=False
[INFO 2025-02-17 09:34:24,030] args[freeze_embedding]=False


INFO:root:args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt


[INFO 2025-02-17 09:34:24,034] args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt
[INFO 2025-02-17 09:34:24,034] args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt


INFO:root:args[load_ckpt_name]=None


[INFO 2025-02-17 09:34:24,037] args[load_ckpt_name]=None
[INFO 2025-02-17 09:34:24,037] args[load_ckpt_name]=None


INFO:root:args[log_steps]=100


[INFO 2025-02-17 09:34:24,041] args[log_steps]=100
[INFO 2025-02-17 09:34:24,041] args[log_steps]=100


INFO:root:args[lr]=0.0003


[INFO 2025-02-17 09:34:24,044] args[lr]=0.0003
[INFO 2025-02-17 09:34:24,044] args[lr]=0.0003


INFO:root:args[mode]=train


[INFO 2025-02-17 09:34:24,047] args[mode]=train
[INFO 2025-02-17 09:34:24,047] args[mode]=train


INFO:root:args[model_dir]=/content/model


[INFO 2025-02-17 09:34:24,050] args[model_dir]=/content/model
[INFO 2025-02-17 09:34:24,050] args[model_dir]=/content/model


INFO:root:args[nGPU]=1


[INFO 2025-02-17 09:34:24,053] args[nGPU]=1
[INFO 2025-02-17 09:34:24,053] args[nGPU]=1


INFO:root:args[news_dim]=400


[INFO 2025-02-17 09:34:24,056] args[news_dim]=400
[INFO 2025-02-17 09:34:24,056] args[news_dim]=400


INFO:root:args[news_query_vector_dim]=200


[INFO 2025-02-17 09:34:24,059] args[news_query_vector_dim]=200
[INFO 2025-02-17 09:34:24,059] args[news_query_vector_dim]=200


INFO:root:args[npratio]=4


[INFO 2025-02-17 09:34:24,063] args[npratio]=4
[INFO 2025-02-17 09:34:24,063] args[npratio]=4


INFO:root:args[num_attention_heads]=20


[INFO 2025-02-17 09:34:24,066] args[num_attention_heads]=20
[INFO 2025-02-17 09:34:24,066] args[num_attention_heads]=20


INFO:root:args[num_words_abstract]=50


[INFO 2025-02-17 09:34:24,069] args[num_words_abstract]=50
[INFO 2025-02-17 09:34:24,069] args[num_words_abstract]=50


INFO:root:args[num_words_title]=20


[INFO 2025-02-17 09:34:24,073] args[num_words_title]=20
[INFO 2025-02-17 09:34:24,073] args[num_words_title]=20


INFO:root:args[prepare]=True


[INFO 2025-02-17 09:34:24,076] args[prepare]=True
[INFO 2025-02-17 09:34:24,076] args[prepare]=True


INFO:root:args[save_steps]=10000


[INFO 2025-02-17 09:34:24,079] args[save_steps]=10000
[INFO 2025-02-17 09:34:24,079] args[save_steps]=10000


INFO:root:args[seed]=0


[INFO 2025-02-17 09:34:24,082] args[seed]=0
[INFO 2025-02-17 09:34:24,082] args[seed]=0


INFO:root:args[start_epoch]=0


[INFO 2025-02-17 09:34:24,102] args[start_epoch]=0
[INFO 2025-02-17 09:34:24,102] args[start_epoch]=0


INFO:root:args[test_abstract_dir]=/content/genAbs0.json


[INFO 2025-02-17 09:34:24,104] args[test_abstract_dir]=/content/genAbs0.json
[INFO 2025-02-17 09:34:24,104] args[test_abstract_dir]=/content/genAbs0.json


INFO:root:args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev


[INFO 2025-02-17 09:34:24,106] args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev
[INFO 2025-02-17 09:34:24,106] args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev


INFO:root:args[train_abstract_dir]=/content/genAbs0.json


[INFO 2025-02-17 09:34:24,109] args[train_abstract_dir]=/content/genAbs0.json
[INFO 2025-02-17 09:34:24,109] args[train_abstract_dir]=/content/genAbs0.json


INFO:root:args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train


[INFO 2025-02-17 09:34:24,114] args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train
[INFO 2025-02-17 09:34:24,114] args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train


INFO:root:args[use_abstract]=True


[INFO 2025-02-17 09:34:24,117] args[use_abstract]=True
[INFO 2025-02-17 09:34:24,117] args[use_abstract]=True


INFO:root:args[use_category]=True


[INFO 2025-02-17 09:34:24,120] args[use_category]=True
[INFO 2025-02-17 09:34:24,120] args[use_category]=True


INFO:root:args[use_custom_abstract]=True


[INFO 2025-02-17 09:34:24,123] args[use_custom_abstract]=True
[INFO 2025-02-17 09:34:24,123] args[use_custom_abstract]=True


INFO:root:args[use_subcategory]=True


[INFO 2025-02-17 09:34:24,126] args[use_subcategory]=True
[INFO 2025-02-17 09:34:24,126] args[use_subcategory]=True


INFO:root:args[user_log_length]=50


[INFO 2025-02-17 09:34:24,129] args[user_log_length]=50
[INFO 2025-02-17 09:34:24,129] args[user_log_length]=50


INFO:root:args[user_log_mask]=True


[INFO 2025-02-17 09:34:24,132] args[user_log_mask]=True
[INFO 2025-02-17 09:34:24,132] args[user_log_mask]=True


INFO:root:args[user_query_vector_dim]=200


[INFO 2025-02-17 09:34:24,136] args[user_query_vector_dim]=200
[INFO 2025-02-17 09:34:24,136] args[user_query_vector_dim]=200


INFO:root:args[word_embedding_dim]=300


[INFO 2025-02-17 09:34:24,139] args[word_embedding_dim]=300
[INFO 2025-02-17 09:34:24,139] args[word_embedding_dim]=300


In [None]:
if 'train' in args.mode:
    if args.prepare:
        logging.info('Preparing training data...')
        total_sample_num = prepare_training_data(args.train_data_dir, args.nGPU, args.npratio, args.seed)
    else:
        total_sample_num = 0
        for i in range(args.nGPU):
            data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{i}.tsv')
            print(data_file_path)
            if not os.path.exists(data_file_path):
                logging.error(f'Splited training data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                exit()
            result = subprocess.getoutput(f'wc -l {data_file_path}')
            total_sample_num += int(result.split(' ')[0])
        logging.info('Skip training data preparation.')
    logging.info(f'{total_sample_num} training samples, {total_sample_num // args.batch_size // args.nGPU} batches in total.')

    # train(0, args)

INFO:root:Preparing training data...


[INFO 2025-02-16 03:27:30,102] Preparing training data...
[INFO 2025-02-16 03:27:30,102] Preparing training data...


156965it [00:03, 42875.46it/s]
INFO:root:Writing files...


[INFO 2025-02-16 03:27:34,691] Writing files...
[INFO 2025-02-16 03:27:34,691] Writing files...


INFO:root:236344 training samples, 7385 batches in total.


[INFO 2025-02-16 03:27:38,591] 236344 training samples, 7385 batches in total.
[INFO 2025-02-16 03:27:38,591] 236344 training samples, 7385 batches in total.


In [None]:
    train(0, args)


51282it [00:04, 10649.21it/s]
100%|██████████| 51282/51282 [00:00<00:00, 188261.30it/s]
INFO:root:Initializing word embedding matrix...


[INFO 2025-02-16 03:28:40,419] Initializing word embedding matrix...
[INFO 2025-02-16 03:28:40,419] Initializing word embedding matrix...


INFO:root:Word dict length: 12519


[INFO 2025-02-16 03:30:25,414] Word dict length: 12519
[INFO 2025-02-16 03:30:25,414] Word dict length: 12519


INFO:root:Have words: 11960


[INFO 2025-02-16 03:30:25,417] Have words: 11960
[INFO 2025-02-16 03:30:25,417] Have words: 11960


INFO:root:Missing rate: 0.0446521287642783


[INFO 2025-02-16 03:30:25,420] Missing rate: 0.0446521287642783
[INFO 2025-02-16 03:30:25,420] Missing rate: 0.0446521287642783


INFO:root:Training...


[INFO 2025-02-16 03:30:30,495] Training...
[INFO 2025-02-16 03:30:30,495] Training...


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-16 03:30:32,192] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-16 03:30:32,192] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.59442, acc: 0.34406


[INFO 2025-02-16 03:30:37,460] [0] Ed: 3200, train_loss: 1.59442, acc: 0.34406
[INFO 2025-02-16 03:30:37,460] [0] Ed: 3200, train_loss: 1.59442, acc: 0.34406


INFO:root:[0] Ed: 6400, train_loss: 1.54301, acc: 0.36000


[INFO 2025-02-16 03:30:42,753] [0] Ed: 6400, train_loss: 1.54301, acc: 0.36000
[INFO 2025-02-16 03:30:42,753] [0] Ed: 6400, train_loss: 1.54301, acc: 0.36000


INFO:root:[0] Ed: 9600, train_loss: 1.52254, acc: 0.36792


[INFO 2025-02-16 03:30:48,065] [0] Ed: 9600, train_loss: 1.52254, acc: 0.36792
[INFO 2025-02-16 03:30:48,065] [0] Ed: 9600, train_loss: 1.52254, acc: 0.36792


INFO:root:[0] Ed: 12800, train_loss: 1.50752, acc: 0.37383


[INFO 2025-02-16 03:30:53,397] [0] Ed: 12800, train_loss: 1.50752, acc: 0.37383
[INFO 2025-02-16 03:30:53,397] [0] Ed: 12800, train_loss: 1.50752, acc: 0.37383


INFO:root:[0] Ed: 16000, train_loss: 1.49704, acc: 0.37756


[INFO 2025-02-16 03:30:58,743] [0] Ed: 16000, train_loss: 1.49704, acc: 0.37756
[INFO 2025-02-16 03:30:58,743] [0] Ed: 16000, train_loss: 1.49704, acc: 0.37756


INFO:root:[0] Ed: 19200, train_loss: 1.48991, acc: 0.38307


[INFO 2025-02-16 03:31:04,098] [0] Ed: 19200, train_loss: 1.48991, acc: 0.38307
[INFO 2025-02-16 03:31:04,098] [0] Ed: 19200, train_loss: 1.48991, acc: 0.38307


INFO:root:[0] Ed: 22400, train_loss: 1.48348, acc: 0.38567


[INFO 2025-02-16 03:31:09,480] [0] Ed: 22400, train_loss: 1.48348, acc: 0.38567
[INFO 2025-02-16 03:31:09,480] [0] Ed: 22400, train_loss: 1.48348, acc: 0.38567


INFO:root:[0] Ed: 25600, train_loss: 1.47890, acc: 0.38746


[INFO 2025-02-16 03:31:14,885] [0] Ed: 25600, train_loss: 1.47890, acc: 0.38746
[INFO 2025-02-16 03:31:14,885] [0] Ed: 25600, train_loss: 1.47890, acc: 0.38746


INFO:root:[0] Ed: 28800, train_loss: 1.47336, acc: 0.39115


[INFO 2025-02-16 03:31:20,309] [0] Ed: 28800, train_loss: 1.47336, acc: 0.39115
[INFO 2025-02-16 03:31:20,309] [0] Ed: 28800, train_loss: 1.47336, acc: 0.39115


INFO:root:[0] Ed: 32000, train_loss: 1.46945, acc: 0.39391


[INFO 2025-02-16 03:31:25,759] [0] Ed: 32000, train_loss: 1.46945, acc: 0.39391
[INFO 2025-02-16 03:31:25,759] [0] Ed: 32000, train_loss: 1.46945, acc: 0.39391


INFO:root:[0] Ed: 35200, train_loss: 1.46685, acc: 0.39520


[INFO 2025-02-16 03:31:31,226] [0] Ed: 35200, train_loss: 1.46685, acc: 0.39520
[INFO 2025-02-16 03:31:31,226] [0] Ed: 35200, train_loss: 1.46685, acc: 0.39520


INFO:root:[0] Ed: 38400, train_loss: 1.46260, acc: 0.39758


[INFO 2025-02-16 03:31:36,709] [0] Ed: 38400, train_loss: 1.46260, acc: 0.39758
[INFO 2025-02-16 03:31:36,709] [0] Ed: 38400, train_loss: 1.46260, acc: 0.39758


INFO:root:[0] Ed: 41600, train_loss: 1.45769, acc: 0.40135


[INFO 2025-02-16 03:31:42,212] [0] Ed: 41600, train_loss: 1.45769, acc: 0.40135
[INFO 2025-02-16 03:31:42,212] [0] Ed: 41600, train_loss: 1.45769, acc: 0.40135


INFO:root:[0] Ed: 44800, train_loss: 1.45385, acc: 0.40362


[INFO 2025-02-16 03:31:47,722] [0] Ed: 44800, train_loss: 1.45385, acc: 0.40362
[INFO 2025-02-16 03:31:47,722] [0] Ed: 44800, train_loss: 1.45385, acc: 0.40362


INFO:root:[0] Ed: 48000, train_loss: 1.45139, acc: 0.40475


[INFO 2025-02-16 03:31:53,259] [0] Ed: 48000, train_loss: 1.45139, acc: 0.40475
[INFO 2025-02-16 03:31:53,259] [0] Ed: 48000, train_loss: 1.45139, acc: 0.40475


INFO:root:[0] Ed: 51200, train_loss: 1.44807, acc: 0.40559


[INFO 2025-02-16 03:31:58,806] [0] Ed: 51200, train_loss: 1.44807, acc: 0.40559
[INFO 2025-02-16 03:31:58,806] [0] Ed: 51200, train_loss: 1.44807, acc: 0.40559


INFO:root:[0] Ed: 54400, train_loss: 1.44555, acc: 0.40706


[INFO 2025-02-16 03:32:04,384] [0] Ed: 54400, train_loss: 1.44555, acc: 0.40706
[INFO 2025-02-16 03:32:04,384] [0] Ed: 54400, train_loss: 1.44555, acc: 0.40706


INFO:root:[0] Ed: 57600, train_loss: 1.44311, acc: 0.40821


[INFO 2025-02-16 03:32:09,983] [0] Ed: 57600, train_loss: 1.44311, acc: 0.40821
[INFO 2025-02-16 03:32:09,983] [0] Ed: 57600, train_loss: 1.44311, acc: 0.40821


INFO:root:[0] Ed: 60800, train_loss: 1.44105, acc: 0.40870


[INFO 2025-02-16 03:32:15,595] [0] Ed: 60800, train_loss: 1.44105, acc: 0.40870
[INFO 2025-02-16 03:32:15,595] [0] Ed: 60800, train_loss: 1.44105, acc: 0.40870


INFO:root:[0] Ed: 64000, train_loss: 1.43818, acc: 0.41036


[INFO 2025-02-16 03:32:21,206] [0] Ed: 64000, train_loss: 1.43818, acc: 0.41036
[INFO 2025-02-16 03:32:21,206] [0] Ed: 64000, train_loss: 1.43818, acc: 0.41036


INFO:root:[0] Ed: 67200, train_loss: 1.43571, acc: 0.41249


[INFO 2025-02-16 03:32:26,800] [0] Ed: 67200, train_loss: 1.43571, acc: 0.41249
[INFO 2025-02-16 03:32:26,800] [0] Ed: 67200, train_loss: 1.43571, acc: 0.41249


INFO:root:[0] Ed: 70400, train_loss: 1.43327, acc: 0.41364


[INFO 2025-02-16 03:32:32,374] [0] Ed: 70400, train_loss: 1.43327, acc: 0.41364
[INFO 2025-02-16 03:32:32,374] [0] Ed: 70400, train_loss: 1.43327, acc: 0.41364


INFO:root:[0] Ed: 73600, train_loss: 1.43077, acc: 0.41519


[INFO 2025-02-16 03:32:37,939] [0] Ed: 73600, train_loss: 1.43077, acc: 0.41519
[INFO 2025-02-16 03:32:37,939] [0] Ed: 73600, train_loss: 1.43077, acc: 0.41519


INFO:root:[0] Ed: 76800, train_loss: 1.42844, acc: 0.41676


[INFO 2025-02-16 03:32:43,484] [0] Ed: 76800, train_loss: 1.42844, acc: 0.41676
[INFO 2025-02-16 03:32:43,484] [0] Ed: 76800, train_loss: 1.42844, acc: 0.41676


INFO:root:[0] Ed: 80000, train_loss: 1.42725, acc: 0.41721


[INFO 2025-02-16 03:32:49,023] [0] Ed: 80000, train_loss: 1.42725, acc: 0.41721
[INFO 2025-02-16 03:32:49,023] [0] Ed: 80000, train_loss: 1.42725, acc: 0.41721


INFO:root:[0] Ed: 83200, train_loss: 1.42484, acc: 0.41813


[INFO 2025-02-16 03:32:54,558] [0] Ed: 83200, train_loss: 1.42484, acc: 0.41813
[INFO 2025-02-16 03:32:54,558] [0] Ed: 83200, train_loss: 1.42484, acc: 0.41813


INFO:root:[0] Ed: 86400, train_loss: 1.42287, acc: 0.41929


[INFO 2025-02-16 03:33:00,089] [0] Ed: 86400, train_loss: 1.42287, acc: 0.41929
[INFO 2025-02-16 03:33:00,089] [0] Ed: 86400, train_loss: 1.42287, acc: 0.41929


INFO:root:[0] Ed: 89600, train_loss: 1.42044, acc: 0.42073


[INFO 2025-02-16 03:33:05,621] [0] Ed: 89600, train_loss: 1.42044, acc: 0.42073
[INFO 2025-02-16 03:33:05,621] [0] Ed: 89600, train_loss: 1.42044, acc: 0.42073


INFO:root:[0] Ed: 92800, train_loss: 1.41934, acc: 0.42089


[INFO 2025-02-16 03:33:11,155] [0] Ed: 92800, train_loss: 1.41934, acc: 0.42089
[INFO 2025-02-16 03:33:11,155] [0] Ed: 92800, train_loss: 1.41934, acc: 0.42089


INFO:root:[0] Ed: 96000, train_loss: 1.41810, acc: 0.42156


[INFO 2025-02-16 03:33:16,690] [0] Ed: 96000, train_loss: 1.41810, acc: 0.42156
[INFO 2025-02-16 03:33:16,690] [0] Ed: 96000, train_loss: 1.41810, acc: 0.42156


INFO:root:[0] Ed: 99200, train_loss: 1.41696, acc: 0.42185


[INFO 2025-02-16 03:33:22,232] [0] Ed: 99200, train_loss: 1.41696, acc: 0.42185
[INFO 2025-02-16 03:33:22,232] [0] Ed: 99200, train_loss: 1.41696, acc: 0.42185


INFO:root:[0] Ed: 102400, train_loss: 1.41502, acc: 0.42266


[INFO 2025-02-16 03:33:27,782] [0] Ed: 102400, train_loss: 1.41502, acc: 0.42266
[INFO 2025-02-16 03:33:27,782] [0] Ed: 102400, train_loss: 1.41502, acc: 0.42266


INFO:root:[0] Ed: 105600, train_loss: 1.41320, acc: 0.42360


[INFO 2025-02-16 03:33:33,334] [0] Ed: 105600, train_loss: 1.41320, acc: 0.42360
[INFO 2025-02-16 03:33:33,334] [0] Ed: 105600, train_loss: 1.41320, acc: 0.42360


INFO:root:[0] Ed: 108800, train_loss: 1.41204, acc: 0.42419


[INFO 2025-02-16 03:33:38,892] [0] Ed: 108800, train_loss: 1.41204, acc: 0.42419
[INFO 2025-02-16 03:33:38,892] [0] Ed: 108800, train_loss: 1.41204, acc: 0.42419


INFO:root:[0] Ed: 112000, train_loss: 1.41094, acc: 0.42465


[INFO 2025-02-16 03:33:44,443] [0] Ed: 112000, train_loss: 1.41094, acc: 0.42465
[INFO 2025-02-16 03:33:44,443] [0] Ed: 112000, train_loss: 1.41094, acc: 0.42465


INFO:root:[0] Ed: 115200, train_loss: 1.40977, acc: 0.42524


[INFO 2025-02-16 03:33:50,003] [0] Ed: 115200, train_loss: 1.40977, acc: 0.42524
[INFO 2025-02-16 03:33:50,003] [0] Ed: 115200, train_loss: 1.40977, acc: 0.42524


INFO:root:[0] Ed: 118400, train_loss: 1.40834, acc: 0.42606


[INFO 2025-02-16 03:33:55,567] [0] Ed: 118400, train_loss: 1.40834, acc: 0.42606
[INFO 2025-02-16 03:33:55,567] [0] Ed: 118400, train_loss: 1.40834, acc: 0.42606


INFO:root:[0] Ed: 121600, train_loss: 1.40742, acc: 0.42648


[INFO 2025-02-16 03:34:01,134] [0] Ed: 121600, train_loss: 1.40742, acc: 0.42648
[INFO 2025-02-16 03:34:01,134] [0] Ed: 121600, train_loss: 1.40742, acc: 0.42648


INFO:root:[0] Ed: 124800, train_loss: 1.40610, acc: 0.42693


[INFO 2025-02-16 03:34:06,696] [0] Ed: 124800, train_loss: 1.40610, acc: 0.42693
[INFO 2025-02-16 03:34:06,696] [0] Ed: 124800, train_loss: 1.40610, acc: 0.42693


INFO:root:[0] Ed: 128000, train_loss: 1.40531, acc: 0.42744


[INFO 2025-02-16 03:34:12,256] [0] Ed: 128000, train_loss: 1.40531, acc: 0.42744
[INFO 2025-02-16 03:34:12,256] [0] Ed: 128000, train_loss: 1.40531, acc: 0.42744


INFO:root:[0] Ed: 131200, train_loss: 1.40402, acc: 0.42836


[INFO 2025-02-16 03:34:17,821] [0] Ed: 131200, train_loss: 1.40402, acc: 0.42836
[INFO 2025-02-16 03:34:17,821] [0] Ed: 131200, train_loss: 1.40402, acc: 0.42836


INFO:root:[0] Ed: 134400, train_loss: 1.40309, acc: 0.42870


[INFO 2025-02-16 03:34:23,375] [0] Ed: 134400, train_loss: 1.40309, acc: 0.42870
[INFO 2025-02-16 03:34:23,375] [0] Ed: 134400, train_loss: 1.40309, acc: 0.42870


INFO:root:[0] Ed: 137600, train_loss: 1.40253, acc: 0.42900


[INFO 2025-02-16 03:34:28,940] [0] Ed: 137600, train_loss: 1.40253, acc: 0.42900
[INFO 2025-02-16 03:34:28,940] [0] Ed: 137600, train_loss: 1.40253, acc: 0.42900


INFO:root:[0] Ed: 140800, train_loss: 1.40144, acc: 0.42951


[INFO 2025-02-16 03:34:34,499] [0] Ed: 140800, train_loss: 1.40144, acc: 0.42951
[INFO 2025-02-16 03:34:34,499] [0] Ed: 140800, train_loss: 1.40144, acc: 0.42951


INFO:root:[0] Ed: 144000, train_loss: 1.40049, acc: 0.42955


[INFO 2025-02-16 03:34:40,053] [0] Ed: 144000, train_loss: 1.40049, acc: 0.42955
[INFO 2025-02-16 03:34:40,053] [0] Ed: 144000, train_loss: 1.40049, acc: 0.42955


INFO:root:[0] Ed: 147200, train_loss: 1.39913, acc: 0.43024


[INFO 2025-02-16 03:34:45,605] [0] Ed: 147200, train_loss: 1.39913, acc: 0.43024
[INFO 2025-02-16 03:34:45,605] [0] Ed: 147200, train_loss: 1.39913, acc: 0.43024


INFO:root:[0] Ed: 150400, train_loss: 1.39807, acc: 0.43062


[INFO 2025-02-16 03:34:51,154] [0] Ed: 150400, train_loss: 1.39807, acc: 0.43062
[INFO 2025-02-16 03:34:51,154] [0] Ed: 150400, train_loss: 1.39807, acc: 0.43062


INFO:root:[0] Ed: 153600, train_loss: 1.39680, acc: 0.43106


[INFO 2025-02-16 03:34:56,703] [0] Ed: 153600, train_loss: 1.39680, acc: 0.43106
[INFO 2025-02-16 03:34:56,703] [0] Ed: 153600, train_loss: 1.39680, acc: 0.43106


INFO:root:[0] Ed: 156800, train_loss: 1.39587, acc: 0.43149


[INFO 2025-02-16 03:35:02,249] [0] Ed: 156800, train_loss: 1.39587, acc: 0.43149
[INFO 2025-02-16 03:35:02,249] [0] Ed: 156800, train_loss: 1.39587, acc: 0.43149


INFO:root:[0] Ed: 160000, train_loss: 1.39473, acc: 0.43200


[INFO 2025-02-16 03:35:07,789] [0] Ed: 160000, train_loss: 1.39473, acc: 0.43200
[INFO 2025-02-16 03:35:07,789] [0] Ed: 160000, train_loss: 1.39473, acc: 0.43200


INFO:root:[0] Ed: 163200, train_loss: 1.39399, acc: 0.43244


[INFO 2025-02-16 03:35:13,333] [0] Ed: 163200, train_loss: 1.39399, acc: 0.43244
[INFO 2025-02-16 03:35:13,333] [0] Ed: 163200, train_loss: 1.39399, acc: 0.43244


INFO:root:[0] Ed: 166400, train_loss: 1.39319, acc: 0.43288


[INFO 2025-02-16 03:35:18,879] [0] Ed: 166400, train_loss: 1.39319, acc: 0.43288
[INFO 2025-02-16 03:35:18,879] [0] Ed: 166400, train_loss: 1.39319, acc: 0.43288


INFO:root:[0] Ed: 169600, train_loss: 1.39251, acc: 0.43339


[INFO 2025-02-16 03:35:24,422] [0] Ed: 169600, train_loss: 1.39251, acc: 0.43339
[INFO 2025-02-16 03:35:24,422] [0] Ed: 169600, train_loss: 1.39251, acc: 0.43339


INFO:root:[0] Ed: 172800, train_loss: 1.39168, acc: 0.43388


[INFO 2025-02-16 03:35:29,969] [0] Ed: 172800, train_loss: 1.39168, acc: 0.43388
[INFO 2025-02-16 03:35:29,969] [0] Ed: 172800, train_loss: 1.39168, acc: 0.43388


INFO:root:[0] Ed: 176000, train_loss: 1.39078, acc: 0.43445


[INFO 2025-02-16 03:35:35,518] [0] Ed: 176000, train_loss: 1.39078, acc: 0.43445
[INFO 2025-02-16 03:35:35,518] [0] Ed: 176000, train_loss: 1.39078, acc: 0.43445


INFO:root:[0] Ed: 179200, train_loss: 1.38999, acc: 0.43480


[INFO 2025-02-16 03:35:41,068] [0] Ed: 179200, train_loss: 1.38999, acc: 0.43480
[INFO 2025-02-16 03:35:41,068] [0] Ed: 179200, train_loss: 1.38999, acc: 0.43480


INFO:root:[0] Ed: 182400, train_loss: 1.38938, acc: 0.43518


[INFO 2025-02-16 03:35:46,610] [0] Ed: 182400, train_loss: 1.38938, acc: 0.43518
[INFO 2025-02-16 03:35:46,610] [0] Ed: 182400, train_loss: 1.38938, acc: 0.43518


INFO:root:[0] Ed: 185600, train_loss: 1.38852, acc: 0.43554


[INFO 2025-02-16 03:35:52,160] [0] Ed: 185600, train_loss: 1.38852, acc: 0.43554
[INFO 2025-02-16 03:35:52,160] [0] Ed: 185600, train_loss: 1.38852, acc: 0.43554


INFO:root:[0] Ed: 188800, train_loss: 1.38783, acc: 0.43579


[INFO 2025-02-16 03:35:57,707] [0] Ed: 188800, train_loss: 1.38783, acc: 0.43579
[INFO 2025-02-16 03:35:57,707] [0] Ed: 188800, train_loss: 1.38783, acc: 0.43579


INFO:root:[0] Ed: 192000, train_loss: 1.38741, acc: 0.43595


[INFO 2025-02-16 03:36:03,246] [0] Ed: 192000, train_loss: 1.38741, acc: 0.43595
[INFO 2025-02-16 03:36:03,246] [0] Ed: 192000, train_loss: 1.38741, acc: 0.43595


INFO:root:[0] Ed: 195200, train_loss: 1.38659, acc: 0.43626


[INFO 2025-02-16 03:36:08,791] [0] Ed: 195200, train_loss: 1.38659, acc: 0.43626
[INFO 2025-02-16 03:36:08,791] [0] Ed: 195200, train_loss: 1.38659, acc: 0.43626


INFO:root:[0] Ed: 198400, train_loss: 1.38608, acc: 0.43661


[INFO 2025-02-16 03:36:14,335] [0] Ed: 198400, train_loss: 1.38608, acc: 0.43661
[INFO 2025-02-16 03:36:14,335] [0] Ed: 198400, train_loss: 1.38608, acc: 0.43661


INFO:root:[0] Ed: 201600, train_loss: 1.38531, acc: 0.43701


[INFO 2025-02-16 03:36:19,882] [0] Ed: 201600, train_loss: 1.38531, acc: 0.43701
[INFO 2025-02-16 03:36:19,882] [0] Ed: 201600, train_loss: 1.38531, acc: 0.43701


INFO:root:[0] Ed: 204800, train_loss: 1.38493, acc: 0.43723


[INFO 2025-02-16 03:36:25,424] [0] Ed: 204800, train_loss: 1.38493, acc: 0.43723
[INFO 2025-02-16 03:36:25,424] [0] Ed: 204800, train_loss: 1.38493, acc: 0.43723


INFO:root:[0] Ed: 208000, train_loss: 1.38442, acc: 0.43750


[INFO 2025-02-16 03:36:30,968] [0] Ed: 208000, train_loss: 1.38442, acc: 0.43750
[INFO 2025-02-16 03:36:30,968] [0] Ed: 208000, train_loss: 1.38442, acc: 0.43750


INFO:root:[0] Ed: 211200, train_loss: 1.38393, acc: 0.43763


[INFO 2025-02-16 03:36:36,510] [0] Ed: 211200, train_loss: 1.38393, acc: 0.43763
[INFO 2025-02-16 03:36:36,510] [0] Ed: 211200, train_loss: 1.38393, acc: 0.43763


INFO:root:[0] Ed: 214400, train_loss: 1.38338, acc: 0.43793


[INFO 2025-02-16 03:36:42,060] [0] Ed: 214400, train_loss: 1.38338, acc: 0.43793
[INFO 2025-02-16 03:36:42,060] [0] Ed: 214400, train_loss: 1.38338, acc: 0.43793


INFO:root:[0] Ed: 217600, train_loss: 1.38292, acc: 0.43809


[INFO 2025-02-16 03:36:47,603] [0] Ed: 217600, train_loss: 1.38292, acc: 0.43809
[INFO 2025-02-16 03:36:47,603] [0] Ed: 217600, train_loss: 1.38292, acc: 0.43809


INFO:root:[0] Ed: 220800, train_loss: 1.38218, acc: 0.43840


[INFO 2025-02-16 03:36:53,145] [0] Ed: 220800, train_loss: 1.38218, acc: 0.43840
[INFO 2025-02-16 03:36:53,145] [0] Ed: 220800, train_loss: 1.38218, acc: 0.43840


INFO:root:[0] Ed: 224000, train_loss: 1.38144, acc: 0.43873


[INFO 2025-02-16 03:36:58,689] [0] Ed: 224000, train_loss: 1.38144, acc: 0.43873
[INFO 2025-02-16 03:36:58,689] [0] Ed: 224000, train_loss: 1.38144, acc: 0.43873


INFO:root:[0] Ed: 227200, train_loss: 1.38070, acc: 0.43923


[INFO 2025-02-16 03:37:04,233] [0] Ed: 227200, train_loss: 1.38070, acc: 0.43923
[INFO 2025-02-16 03:37:04,233] [0] Ed: 227200, train_loss: 1.38070, acc: 0.43923


INFO:root:[0] Ed: 230400, train_loss: 1.38004, acc: 0.43943


[INFO 2025-02-16 03:37:09,782] [0] Ed: 230400, train_loss: 1.38004, acc: 0.43943
[INFO 2025-02-16 03:37:09,782] [0] Ed: 230400, train_loss: 1.38004, acc: 0.43943


INFO:root:[0] Ed: 233600, train_loss: 1.37901, acc: 0.43997


[INFO 2025-02-16 03:37:15,328] [0] Ed: 233600, train_loss: 1.37901, acc: 0.43997
[INFO 2025-02-16 03:37:15,328] [0] Ed: 233600, train_loss: 1.37901, acc: 0.43997


INFO:root:Training finish.


[INFO 2025-02-16 03:37:19,944] Training finish.
[INFO 2025-02-16 03:37:19,944] Training finish.


INFO:root:Model saved to /content/model/epoch-1.pt.


[INFO 2025-02-16 03:37:20,072] Model saved to /content/model/epoch-1.pt.
[INFO 2025-02-16 03:37:20,072] Model saved to /content/model/epoch-1.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-16 03:37:20,142] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-16 03:37:20,142] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.36175, acc: 0.45969


[INFO 2025-02-16 03:37:25,690] [0] Ed: 3200, train_loss: 1.36175, acc: 0.45969
[INFO 2025-02-16 03:37:25,690] [0] Ed: 3200, train_loss: 1.36175, acc: 0.45969


INFO:root:[0] Ed: 6400, train_loss: 1.34318, acc: 0.46047


[INFO 2025-02-16 03:37:31,247] [0] Ed: 6400, train_loss: 1.34318, acc: 0.46047
[INFO 2025-02-16 03:37:31,247] [0] Ed: 6400, train_loss: 1.34318, acc: 0.46047


INFO:root:[0] Ed: 9600, train_loss: 1.33769, acc: 0.46104


[INFO 2025-02-16 03:37:36,797] [0] Ed: 9600, train_loss: 1.33769, acc: 0.46104
[INFO 2025-02-16 03:37:36,797] [0] Ed: 9600, train_loss: 1.33769, acc: 0.46104


INFO:root:[0] Ed: 12800, train_loss: 1.33106, acc: 0.46531


[INFO 2025-02-16 03:37:42,351] [0] Ed: 12800, train_loss: 1.33106, acc: 0.46531
[INFO 2025-02-16 03:37:42,351] [0] Ed: 12800, train_loss: 1.33106, acc: 0.46531


INFO:root:[0] Ed: 16000, train_loss: 1.33406, acc: 0.46225


[INFO 2025-02-16 03:37:47,905] [0] Ed: 16000, train_loss: 1.33406, acc: 0.46225
[INFO 2025-02-16 03:37:47,905] [0] Ed: 16000, train_loss: 1.33406, acc: 0.46225


INFO:root:[0] Ed: 19200, train_loss: 1.33395, acc: 0.46068


[INFO 2025-02-16 03:37:53,460] [0] Ed: 19200, train_loss: 1.33395, acc: 0.46068
[INFO 2025-02-16 03:37:53,460] [0] Ed: 19200, train_loss: 1.33395, acc: 0.46068


INFO:root:[0] Ed: 22400, train_loss: 1.33248, acc: 0.46138


[INFO 2025-02-16 03:37:59,016] [0] Ed: 22400, train_loss: 1.33248, acc: 0.46138
[INFO 2025-02-16 03:37:59,016] [0] Ed: 22400, train_loss: 1.33248, acc: 0.46138


INFO:root:[0] Ed: 25600, train_loss: 1.33211, acc: 0.46203


[INFO 2025-02-16 03:38:04,574] [0] Ed: 25600, train_loss: 1.33211, acc: 0.46203
[INFO 2025-02-16 03:38:04,574] [0] Ed: 25600, train_loss: 1.33211, acc: 0.46203


INFO:root:[0] Ed: 28800, train_loss: 1.33232, acc: 0.46194


[INFO 2025-02-16 03:38:10,122] [0] Ed: 28800, train_loss: 1.33232, acc: 0.46194
[INFO 2025-02-16 03:38:10,122] [0] Ed: 28800, train_loss: 1.33232, acc: 0.46194


INFO:root:[0] Ed: 32000, train_loss: 1.33233, acc: 0.46150


[INFO 2025-02-16 03:38:15,683] [0] Ed: 32000, train_loss: 1.33233, acc: 0.46150
[INFO 2025-02-16 03:38:15,683] [0] Ed: 32000, train_loss: 1.33233, acc: 0.46150


INFO:root:[0] Ed: 35200, train_loss: 1.33361, acc: 0.46060


[INFO 2025-02-16 03:38:21,243] [0] Ed: 35200, train_loss: 1.33361, acc: 0.46060
[INFO 2025-02-16 03:38:21,243] [0] Ed: 35200, train_loss: 1.33361, acc: 0.46060


INFO:root:[0] Ed: 38400, train_loss: 1.33304, acc: 0.46063


[INFO 2025-02-16 03:38:26,797] [0] Ed: 38400, train_loss: 1.33304, acc: 0.46063
[INFO 2025-02-16 03:38:26,797] [0] Ed: 38400, train_loss: 1.33304, acc: 0.46063


INFO:root:[0] Ed: 41600, train_loss: 1.33040, acc: 0.46238


[INFO 2025-02-16 03:38:32,353] [0] Ed: 41600, train_loss: 1.33040, acc: 0.46238
[INFO 2025-02-16 03:38:32,353] [0] Ed: 41600, train_loss: 1.33040, acc: 0.46238


INFO:root:[0] Ed: 44800, train_loss: 1.32968, acc: 0.46306


[INFO 2025-02-16 03:38:37,912] [0] Ed: 44800, train_loss: 1.32968, acc: 0.46306
[INFO 2025-02-16 03:38:37,912] [0] Ed: 44800, train_loss: 1.32968, acc: 0.46306


INFO:root:[0] Ed: 48000, train_loss: 1.32990, acc: 0.46319


[INFO 2025-02-16 03:38:43,465] [0] Ed: 48000, train_loss: 1.32990, acc: 0.46319
[INFO 2025-02-16 03:38:43,465] [0] Ed: 48000, train_loss: 1.32990, acc: 0.46319


INFO:root:[0] Ed: 51200, train_loss: 1.32895, acc: 0.46355


[INFO 2025-02-16 03:38:49,022] [0] Ed: 51200, train_loss: 1.32895, acc: 0.46355
[INFO 2025-02-16 03:38:49,022] [0] Ed: 51200, train_loss: 1.32895, acc: 0.46355


INFO:root:[0] Ed: 54400, train_loss: 1.32819, acc: 0.46426


[INFO 2025-02-16 03:38:54,575] [0] Ed: 54400, train_loss: 1.32819, acc: 0.46426
[INFO 2025-02-16 03:38:54,575] [0] Ed: 54400, train_loss: 1.32819, acc: 0.46426


INFO:root:[0] Ed: 57600, train_loss: 1.32766, acc: 0.46443


[INFO 2025-02-16 03:39:00,124] [0] Ed: 57600, train_loss: 1.32766, acc: 0.46443
[INFO 2025-02-16 03:39:00,124] [0] Ed: 57600, train_loss: 1.32766, acc: 0.46443


INFO:root:[0] Ed: 60800, train_loss: 1.32749, acc: 0.46428


[INFO 2025-02-16 03:39:05,669] [0] Ed: 60800, train_loss: 1.32749, acc: 0.46428
[INFO 2025-02-16 03:39:05,669] [0] Ed: 60800, train_loss: 1.32749, acc: 0.46428


INFO:root:[0] Ed: 64000, train_loss: 1.32653, acc: 0.46480


[INFO 2025-02-16 03:39:11,214] [0] Ed: 64000, train_loss: 1.32653, acc: 0.46480
[INFO 2025-02-16 03:39:11,214] [0] Ed: 64000, train_loss: 1.32653, acc: 0.46480


INFO:root:[0] Ed: 67200, train_loss: 1.32592, acc: 0.46585


[INFO 2025-02-16 03:39:16,763] [0] Ed: 67200, train_loss: 1.32592, acc: 0.46585
[INFO 2025-02-16 03:39:16,763] [0] Ed: 67200, train_loss: 1.32592, acc: 0.46585


INFO:root:[0] Ed: 70400, train_loss: 1.32495, acc: 0.46639


[INFO 2025-02-16 03:39:22,299] [0] Ed: 70400, train_loss: 1.32495, acc: 0.46639
[INFO 2025-02-16 03:39:22,299] [0] Ed: 70400, train_loss: 1.32495, acc: 0.46639


INFO:root:[0] Ed: 73600, train_loss: 1.32428, acc: 0.46606


[INFO 2025-02-16 03:39:27,845] [0] Ed: 73600, train_loss: 1.32428, acc: 0.46606
[INFO 2025-02-16 03:39:27,845] [0] Ed: 73600, train_loss: 1.32428, acc: 0.46606


INFO:root:[0] Ed: 76800, train_loss: 1.32319, acc: 0.46664


[INFO 2025-02-16 03:39:33,390] [0] Ed: 76800, train_loss: 1.32319, acc: 0.46664
[INFO 2025-02-16 03:39:33,390] [0] Ed: 76800, train_loss: 1.32319, acc: 0.46664


INFO:root:[0] Ed: 80000, train_loss: 1.32326, acc: 0.46685


[INFO 2025-02-16 03:39:38,933] [0] Ed: 80000, train_loss: 1.32326, acc: 0.46685
[INFO 2025-02-16 03:39:38,933] [0] Ed: 80000, train_loss: 1.32326, acc: 0.46685


INFO:root:[0] Ed: 83200, train_loss: 1.32217, acc: 0.46775


[INFO 2025-02-16 03:39:44,475] [0] Ed: 83200, train_loss: 1.32217, acc: 0.46775
[INFO 2025-02-16 03:39:44,475] [0] Ed: 83200, train_loss: 1.32217, acc: 0.46775


INFO:root:[0] Ed: 86400, train_loss: 1.32177, acc: 0.46811


[INFO 2025-02-16 03:39:50,018] [0] Ed: 86400, train_loss: 1.32177, acc: 0.46811
[INFO 2025-02-16 03:39:50,018] [0] Ed: 86400, train_loss: 1.32177, acc: 0.46811


INFO:root:[0] Ed: 89600, train_loss: 1.32093, acc: 0.46844


[INFO 2025-02-16 03:39:55,559] [0] Ed: 89600, train_loss: 1.32093, acc: 0.46844
[INFO 2025-02-16 03:39:55,559] [0] Ed: 89600, train_loss: 1.32093, acc: 0.46844


INFO:root:[0] Ed: 92800, train_loss: 1.32120, acc: 0.46797


[INFO 2025-02-16 03:40:01,103] [0] Ed: 92800, train_loss: 1.32120, acc: 0.46797
[INFO 2025-02-16 03:40:01,103] [0] Ed: 92800, train_loss: 1.32120, acc: 0.46797


INFO:root:[0] Ed: 96000, train_loss: 1.32096, acc: 0.46792


[INFO 2025-02-16 03:40:06,659] [0] Ed: 96000, train_loss: 1.32096, acc: 0.46792
[INFO 2025-02-16 03:40:06,659] [0] Ed: 96000, train_loss: 1.32096, acc: 0.46792


INFO:root:[0] Ed: 99200, train_loss: 1.32108, acc: 0.46788


[INFO 2025-02-16 03:40:12,206] [0] Ed: 99200, train_loss: 1.32108, acc: 0.46788
[INFO 2025-02-16 03:40:12,206] [0] Ed: 99200, train_loss: 1.32108, acc: 0.46788


INFO:root:[0] Ed: 102400, train_loss: 1.32021, acc: 0.46812


[INFO 2025-02-16 03:40:17,755] [0] Ed: 102400, train_loss: 1.32021, acc: 0.46812
[INFO 2025-02-16 03:40:17,755] [0] Ed: 102400, train_loss: 1.32021, acc: 0.46812


INFO:root:[0] Ed: 105600, train_loss: 1.31915, acc: 0.46868


[INFO 2025-02-16 03:40:23,306] [0] Ed: 105600, train_loss: 1.31915, acc: 0.46868
[INFO 2025-02-16 03:40:23,306] [0] Ed: 105600, train_loss: 1.31915, acc: 0.46868


INFO:root:[0] Ed: 108800, train_loss: 1.31914, acc: 0.46866


[INFO 2025-02-16 03:40:28,852] [0] Ed: 108800, train_loss: 1.31914, acc: 0.46866
[INFO 2025-02-16 03:40:28,852] [0] Ed: 108800, train_loss: 1.31914, acc: 0.46866


INFO:root:[0] Ed: 112000, train_loss: 1.31907, acc: 0.46854


[INFO 2025-02-16 03:40:34,394] [0] Ed: 112000, train_loss: 1.31907, acc: 0.46854
[INFO 2025-02-16 03:40:34,394] [0] Ed: 112000, train_loss: 1.31907, acc: 0.46854


INFO:root:[0] Ed: 115200, train_loss: 1.31872, acc: 0.46880


[INFO 2025-02-16 03:40:39,939] [0] Ed: 115200, train_loss: 1.31872, acc: 0.46880
[INFO 2025-02-16 03:40:39,939] [0] Ed: 115200, train_loss: 1.31872, acc: 0.46880


INFO:root:[0] Ed: 118400, train_loss: 1.31808, acc: 0.46924


[INFO 2025-02-16 03:40:45,490] [0] Ed: 118400, train_loss: 1.31808, acc: 0.46924
[INFO 2025-02-16 03:40:45,490] [0] Ed: 118400, train_loss: 1.31808, acc: 0.46924


INFO:root:[0] Ed: 121600, train_loss: 1.31786, acc: 0.46920


[INFO 2025-02-16 03:40:51,046] [0] Ed: 121600, train_loss: 1.31786, acc: 0.46920
[INFO 2025-02-16 03:40:51,046] [0] Ed: 121600, train_loss: 1.31786, acc: 0.46920


INFO:root:[0] Ed: 124800, train_loss: 1.31751, acc: 0.46918


[INFO 2025-02-16 03:40:56,598] [0] Ed: 124800, train_loss: 1.31751, acc: 0.46918
[INFO 2025-02-16 03:40:56,598] [0] Ed: 124800, train_loss: 1.31751, acc: 0.46918


INFO:root:[0] Ed: 128000, train_loss: 1.31768, acc: 0.46917


[INFO 2025-02-16 03:41:02,145] [0] Ed: 128000, train_loss: 1.31768, acc: 0.46917
[INFO 2025-02-16 03:41:02,145] [0] Ed: 128000, train_loss: 1.31768, acc: 0.46917


INFO:root:[0] Ed: 131200, train_loss: 1.31721, acc: 0.46959


[INFO 2025-02-16 03:41:07,702] [0] Ed: 131200, train_loss: 1.31721, acc: 0.46959
[INFO 2025-02-16 03:41:07,702] [0] Ed: 131200, train_loss: 1.31721, acc: 0.46959


INFO:root:[0] Ed: 134400, train_loss: 1.31714, acc: 0.46949


[INFO 2025-02-16 03:41:13,248] [0] Ed: 134400, train_loss: 1.31714, acc: 0.46949
[INFO 2025-02-16 03:41:13,248] [0] Ed: 134400, train_loss: 1.31714, acc: 0.46949


INFO:root:[0] Ed: 137600, train_loss: 1.31737, acc: 0.46945


[INFO 2025-02-16 03:41:18,806] [0] Ed: 137600, train_loss: 1.31737, acc: 0.46945
[INFO 2025-02-16 03:41:18,806] [0] Ed: 137600, train_loss: 1.31737, acc: 0.46945


INFO:root:[0] Ed: 140800, train_loss: 1.31711, acc: 0.46979


[INFO 2025-02-16 03:41:24,354] [0] Ed: 140800, train_loss: 1.31711, acc: 0.46979
[INFO 2025-02-16 03:41:24,354] [0] Ed: 140800, train_loss: 1.31711, acc: 0.46979


INFO:root:[0] Ed: 144000, train_loss: 1.31697, acc: 0.46938


[INFO 2025-02-16 03:41:29,901] [0] Ed: 144000, train_loss: 1.31697, acc: 0.46938
[INFO 2025-02-16 03:41:29,901] [0] Ed: 144000, train_loss: 1.31697, acc: 0.46938


INFO:root:[0] Ed: 147200, train_loss: 1.31652, acc: 0.46955


[INFO 2025-02-16 03:41:35,446] [0] Ed: 147200, train_loss: 1.31652, acc: 0.46955
[INFO 2025-02-16 03:41:35,446] [0] Ed: 147200, train_loss: 1.31652, acc: 0.46955


INFO:root:[0] Ed: 150400, train_loss: 1.31610, acc: 0.46971


[INFO 2025-02-16 03:41:40,992] [0] Ed: 150400, train_loss: 1.31610, acc: 0.46971
[INFO 2025-02-16 03:41:40,992] [0] Ed: 150400, train_loss: 1.31610, acc: 0.46971


INFO:root:[0] Ed: 153600, train_loss: 1.31544, acc: 0.46985


[INFO 2025-02-16 03:41:46,535] [0] Ed: 153600, train_loss: 1.31544, acc: 0.46985
[INFO 2025-02-16 03:41:46,535] [0] Ed: 153600, train_loss: 1.31544, acc: 0.46985


INFO:root:[0] Ed: 156800, train_loss: 1.31504, acc: 0.46996


[INFO 2025-02-16 03:41:52,078] [0] Ed: 156800, train_loss: 1.31504, acc: 0.46996
[INFO 2025-02-16 03:41:52,078] [0] Ed: 156800, train_loss: 1.31504, acc: 0.46996


INFO:root:[0] Ed: 160000, train_loss: 1.31456, acc: 0.47021


[INFO 2025-02-16 03:41:57,618] [0] Ed: 160000, train_loss: 1.31456, acc: 0.47021
[INFO 2025-02-16 03:41:57,618] [0] Ed: 160000, train_loss: 1.31456, acc: 0.47021


INFO:root:[0] Ed: 163200, train_loss: 1.31430, acc: 0.47041


[INFO 2025-02-16 03:42:03,158] [0] Ed: 163200, train_loss: 1.31430, acc: 0.47041
[INFO 2025-02-16 03:42:03,158] [0] Ed: 163200, train_loss: 1.31430, acc: 0.47041


INFO:root:[0] Ed: 166400, train_loss: 1.31426, acc: 0.47040


[INFO 2025-02-16 03:42:08,704] [0] Ed: 166400, train_loss: 1.31426, acc: 0.47040
[INFO 2025-02-16 03:42:08,704] [0] Ed: 166400, train_loss: 1.31426, acc: 0.47040


INFO:root:[0] Ed: 169600, train_loss: 1.31423, acc: 0.47069


[INFO 2025-02-16 03:42:14,251] [0] Ed: 169600, train_loss: 1.31423, acc: 0.47069
[INFO 2025-02-16 03:42:14,251] [0] Ed: 169600, train_loss: 1.31423, acc: 0.47069


INFO:root:[0] Ed: 172800, train_loss: 1.31386, acc: 0.47079


[INFO 2025-02-16 03:42:19,798] [0] Ed: 172800, train_loss: 1.31386, acc: 0.47079
[INFO 2025-02-16 03:42:19,798] [0] Ed: 172800, train_loss: 1.31386, acc: 0.47079


INFO:root:[0] Ed: 176000, train_loss: 1.31359, acc: 0.47108


[INFO 2025-02-16 03:42:25,344] [0] Ed: 176000, train_loss: 1.31359, acc: 0.47108
[INFO 2025-02-16 03:42:25,344] [0] Ed: 176000, train_loss: 1.31359, acc: 0.47108


INFO:root:[0] Ed: 179200, train_loss: 1.31333, acc: 0.47112


[INFO 2025-02-16 03:42:30,892] [0] Ed: 179200, train_loss: 1.31333, acc: 0.47112
[INFO 2025-02-16 03:42:30,892] [0] Ed: 179200, train_loss: 1.31333, acc: 0.47112


INFO:root:[0] Ed: 182400, train_loss: 1.31322, acc: 0.47134


[INFO 2025-02-16 03:42:36,436] [0] Ed: 182400, train_loss: 1.31322, acc: 0.47134
[INFO 2025-02-16 03:42:36,436] [0] Ed: 182400, train_loss: 1.31322, acc: 0.47134


INFO:root:[0] Ed: 185600, train_loss: 1.31279, acc: 0.47157


[INFO 2025-02-16 03:42:41,993] [0] Ed: 185600, train_loss: 1.31279, acc: 0.47157
[INFO 2025-02-16 03:42:41,993] [0] Ed: 185600, train_loss: 1.31279, acc: 0.47157


INFO:root:[0] Ed: 188800, train_loss: 1.31272, acc: 0.47172


[INFO 2025-02-16 03:42:47,540] [0] Ed: 188800, train_loss: 1.31272, acc: 0.47172
[INFO 2025-02-16 03:42:47,540] [0] Ed: 188800, train_loss: 1.31272, acc: 0.47172


INFO:root:[0] Ed: 192000, train_loss: 1.31283, acc: 0.47152


[INFO 2025-02-16 03:42:53,086] [0] Ed: 192000, train_loss: 1.31283, acc: 0.47152
[INFO 2025-02-16 03:42:53,086] [0] Ed: 192000, train_loss: 1.31283, acc: 0.47152


INFO:root:[0] Ed: 195200, train_loss: 1.31258, acc: 0.47169


[INFO 2025-02-16 03:42:58,635] [0] Ed: 195200, train_loss: 1.31258, acc: 0.47169
[INFO 2025-02-16 03:42:58,635] [0] Ed: 195200, train_loss: 1.31258, acc: 0.47169


INFO:root:[0] Ed: 198400, train_loss: 1.31255, acc: 0.47182


[INFO 2025-02-16 03:43:04,182] [0] Ed: 198400, train_loss: 1.31255, acc: 0.47182
[INFO 2025-02-16 03:43:04,182] [0] Ed: 198400, train_loss: 1.31255, acc: 0.47182


INFO:root:[0] Ed: 201600, train_loss: 1.31221, acc: 0.47201


[INFO 2025-02-16 03:43:09,731] [0] Ed: 201600, train_loss: 1.31221, acc: 0.47201
[INFO 2025-02-16 03:43:09,731] [0] Ed: 201600, train_loss: 1.31221, acc: 0.47201


INFO:root:[0] Ed: 204800, train_loss: 1.31238, acc: 0.47169


[INFO 2025-02-16 03:43:15,278] [0] Ed: 204800, train_loss: 1.31238, acc: 0.47169
[INFO 2025-02-16 03:43:15,278] [0] Ed: 204800, train_loss: 1.31238, acc: 0.47169


INFO:root:[0] Ed: 208000, train_loss: 1.31241, acc: 0.47167


[INFO 2025-02-16 03:43:20,827] [0] Ed: 208000, train_loss: 1.31241, acc: 0.47167
[INFO 2025-02-16 03:43:20,827] [0] Ed: 208000, train_loss: 1.31241, acc: 0.47167


INFO:root:[0] Ed: 211200, train_loss: 1.31235, acc: 0.47168


[INFO 2025-02-16 03:43:26,380] [0] Ed: 211200, train_loss: 1.31235, acc: 0.47168
[INFO 2025-02-16 03:43:26,380] [0] Ed: 211200, train_loss: 1.31235, acc: 0.47168


INFO:root:[0] Ed: 214400, train_loss: 1.31222, acc: 0.47188


[INFO 2025-02-16 03:43:31,934] [0] Ed: 214400, train_loss: 1.31222, acc: 0.47188
[INFO 2025-02-16 03:43:31,934] [0] Ed: 214400, train_loss: 1.31222, acc: 0.47188


INFO:root:[0] Ed: 217600, train_loss: 1.31209, acc: 0.47186


[INFO 2025-02-16 03:43:37,485] [0] Ed: 217600, train_loss: 1.31209, acc: 0.47186
[INFO 2025-02-16 03:43:37,485] [0] Ed: 217600, train_loss: 1.31209, acc: 0.47186


INFO:root:[0] Ed: 220800, train_loss: 1.31179, acc: 0.47188


[INFO 2025-02-16 03:43:43,031] [0] Ed: 220800, train_loss: 1.31179, acc: 0.47188
[INFO 2025-02-16 03:43:43,031] [0] Ed: 220800, train_loss: 1.31179, acc: 0.47188


INFO:root:[0] Ed: 224000, train_loss: 1.31141, acc: 0.47206


[INFO 2025-02-16 03:43:48,583] [0] Ed: 224000, train_loss: 1.31141, acc: 0.47206
[INFO 2025-02-16 03:43:48,583] [0] Ed: 224000, train_loss: 1.31141, acc: 0.47206


INFO:root:[0] Ed: 227200, train_loss: 1.31104, acc: 0.47221


[INFO 2025-02-16 03:43:54,127] [0] Ed: 227200, train_loss: 1.31104, acc: 0.47221
[INFO 2025-02-16 03:43:54,127] [0] Ed: 227200, train_loss: 1.31104, acc: 0.47221


INFO:root:[0] Ed: 230400, train_loss: 1.31072, acc: 0.47236


[INFO 2025-02-16 03:43:59,671] [0] Ed: 230400, train_loss: 1.31072, acc: 0.47236
[INFO 2025-02-16 03:43:59,671] [0] Ed: 230400, train_loss: 1.31072, acc: 0.47236


INFO:root:[0] Ed: 233600, train_loss: 1.30998, acc: 0.47266


[INFO 2025-02-16 03:44:05,219] [0] Ed: 233600, train_loss: 1.30998, acc: 0.47266
[INFO 2025-02-16 03:44:05,219] [0] Ed: 233600, train_loss: 1.30998, acc: 0.47266


INFO:root:Training finish.


[INFO 2025-02-16 03:44:09,833] Training finish.
[INFO 2025-02-16 03:44:09,833] Training finish.


INFO:root:Model saved to /content/model/epoch-2.pt.


[INFO 2025-02-16 03:44:09,957] Model saved to /content/model/epoch-2.pt.
[INFO 2025-02-16 03:44:09,957] Model saved to /content/model/epoch-2.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-16 03:44:10,028] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-16 03:44:10,028] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.32502, acc: 0.47812


[INFO 2025-02-16 03:44:15,579] [0] Ed: 3200, train_loss: 1.32502, acc: 0.47812
[INFO 2025-02-16 03:44:15,579] [0] Ed: 3200, train_loss: 1.32502, acc: 0.47812


INFO:root:[0] Ed: 6400, train_loss: 1.30326, acc: 0.48047


[INFO 2025-02-16 03:44:21,132] [0] Ed: 6400, train_loss: 1.30326, acc: 0.48047
[INFO 2025-02-16 03:44:21,132] [0] Ed: 6400, train_loss: 1.30326, acc: 0.48047


INFO:root:[0] Ed: 9600, train_loss: 1.30096, acc: 0.48229


[INFO 2025-02-16 03:44:26,686] [0] Ed: 9600, train_loss: 1.30096, acc: 0.48229
[INFO 2025-02-16 03:44:26,686] [0] Ed: 9600, train_loss: 1.30096, acc: 0.48229


INFO:root:[0] Ed: 12800, train_loss: 1.29474, acc: 0.48328


[INFO 2025-02-16 03:44:32,238] [0] Ed: 12800, train_loss: 1.29474, acc: 0.48328
[INFO 2025-02-16 03:44:32,238] [0] Ed: 12800, train_loss: 1.29474, acc: 0.48328


INFO:root:[0] Ed: 16000, train_loss: 1.29895, acc: 0.47913


[INFO 2025-02-16 03:44:37,797] [0] Ed: 16000, train_loss: 1.29895, acc: 0.47913
[INFO 2025-02-16 03:44:37,797] [0] Ed: 16000, train_loss: 1.29895, acc: 0.47913


INFO:root:[0] Ed: 19200, train_loss: 1.29798, acc: 0.47953


[INFO 2025-02-16 03:44:43,354] [0] Ed: 19200, train_loss: 1.29798, acc: 0.47953
[INFO 2025-02-16 03:44:43,354] [0] Ed: 19200, train_loss: 1.29798, acc: 0.47953


INFO:root:[0] Ed: 22400, train_loss: 1.29628, acc: 0.47915


[INFO 2025-02-16 03:44:48,913] [0] Ed: 22400, train_loss: 1.29628, acc: 0.47915
[INFO 2025-02-16 03:44:48,913] [0] Ed: 22400, train_loss: 1.29628, acc: 0.47915


INFO:root:[0] Ed: 25600, train_loss: 1.29637, acc: 0.47953


[INFO 2025-02-16 03:44:54,468] [0] Ed: 25600, train_loss: 1.29637, acc: 0.47953
[INFO 2025-02-16 03:44:54,468] [0] Ed: 25600, train_loss: 1.29637, acc: 0.47953


INFO:root:[0] Ed: 28800, train_loss: 1.29611, acc: 0.48014


[INFO 2025-02-16 03:45:00,020] [0] Ed: 28800, train_loss: 1.29611, acc: 0.48014
[INFO 2025-02-16 03:45:00,020] [0] Ed: 28800, train_loss: 1.29611, acc: 0.48014


INFO:root:[0] Ed: 32000, train_loss: 1.29636, acc: 0.47888


[INFO 2025-02-16 03:45:05,579] [0] Ed: 32000, train_loss: 1.29636, acc: 0.47888
[INFO 2025-02-16 03:45:05,579] [0] Ed: 32000, train_loss: 1.29636, acc: 0.47888


INFO:root:[0] Ed: 35200, train_loss: 1.29815, acc: 0.47776


[INFO 2025-02-16 03:45:11,128] [0] Ed: 35200, train_loss: 1.29815, acc: 0.47776
[INFO 2025-02-16 03:45:11,128] [0] Ed: 35200, train_loss: 1.29815, acc: 0.47776


INFO:root:[0] Ed: 38400, train_loss: 1.29709, acc: 0.47771


[INFO 2025-02-16 03:45:16,678] [0] Ed: 38400, train_loss: 1.29709, acc: 0.47771
[INFO 2025-02-16 03:45:16,678] [0] Ed: 38400, train_loss: 1.29709, acc: 0.47771


INFO:root:[0] Ed: 41600, train_loss: 1.29414, acc: 0.47938


[INFO 2025-02-16 03:45:22,223] [0] Ed: 41600, train_loss: 1.29414, acc: 0.47938
[INFO 2025-02-16 03:45:22,223] [0] Ed: 41600, train_loss: 1.29414, acc: 0.47938


INFO:root:[0] Ed: 44800, train_loss: 1.29351, acc: 0.47942


[INFO 2025-02-16 03:45:27,777] [0] Ed: 44800, train_loss: 1.29351, acc: 0.47942
[INFO 2025-02-16 03:45:27,777] [0] Ed: 44800, train_loss: 1.29351, acc: 0.47942


INFO:root:[0] Ed: 48000, train_loss: 1.29379, acc: 0.47940


[INFO 2025-02-16 03:45:33,320] [0] Ed: 48000, train_loss: 1.29379, acc: 0.47940
[INFO 2025-02-16 03:45:33,320] [0] Ed: 48000, train_loss: 1.29379, acc: 0.47940


INFO:root:[0] Ed: 51200, train_loss: 1.29267, acc: 0.47943


[INFO 2025-02-16 03:45:38,862] [0] Ed: 51200, train_loss: 1.29267, acc: 0.47943
[INFO 2025-02-16 03:45:38,862] [0] Ed: 51200, train_loss: 1.29267, acc: 0.47943


INFO:root:[0] Ed: 54400, train_loss: 1.29221, acc: 0.47993


[INFO 2025-02-16 03:45:44,410] [0] Ed: 54400, train_loss: 1.29221, acc: 0.47993
[INFO 2025-02-16 03:45:44,410] [0] Ed: 54400, train_loss: 1.29221, acc: 0.47993


INFO:root:[0] Ed: 57600, train_loss: 1.29157, acc: 0.48010


[INFO 2025-02-16 03:45:49,957] [0] Ed: 57600, train_loss: 1.29157, acc: 0.48010
[INFO 2025-02-16 03:45:49,957] [0] Ed: 57600, train_loss: 1.29157, acc: 0.48010


INFO:root:[0] Ed: 60800, train_loss: 1.29156, acc: 0.48002


[INFO 2025-02-16 03:45:55,494] [0] Ed: 60800, train_loss: 1.29156, acc: 0.48002
[INFO 2025-02-16 03:45:55,494] [0] Ed: 60800, train_loss: 1.29156, acc: 0.48002


INFO:root:[0] Ed: 64000, train_loss: 1.29058, acc: 0.48039


[INFO 2025-02-16 03:46:01,043] [0] Ed: 64000, train_loss: 1.29058, acc: 0.48039
[INFO 2025-02-16 03:46:01,043] [0] Ed: 64000, train_loss: 1.29058, acc: 0.48039


INFO:root:[0] Ed: 67200, train_loss: 1.28987, acc: 0.48122


[INFO 2025-02-16 03:46:06,589] [0] Ed: 67200, train_loss: 1.28987, acc: 0.48122
[INFO 2025-02-16 03:46:06,589] [0] Ed: 67200, train_loss: 1.28987, acc: 0.48122


INFO:root:[0] Ed: 70400, train_loss: 1.28907, acc: 0.48118


[INFO 2025-02-16 03:46:12,133] [0] Ed: 70400, train_loss: 1.28907, acc: 0.48118
[INFO 2025-02-16 03:46:12,133] [0] Ed: 70400, train_loss: 1.28907, acc: 0.48118


INFO:root:[0] Ed: 73600, train_loss: 1.28862, acc: 0.48088


[INFO 2025-02-16 03:46:17,675] [0] Ed: 73600, train_loss: 1.28862, acc: 0.48088
[INFO 2025-02-16 03:46:17,675] [0] Ed: 73600, train_loss: 1.28862, acc: 0.48088


INFO:root:[0] Ed: 76800, train_loss: 1.28745, acc: 0.48164


[INFO 2025-02-16 03:46:23,216] [0] Ed: 76800, train_loss: 1.28745, acc: 0.48164
[INFO 2025-02-16 03:46:23,216] [0] Ed: 76800, train_loss: 1.28745, acc: 0.48164


INFO:root:[0] Ed: 80000, train_loss: 1.28772, acc: 0.48146


[INFO 2025-02-16 03:46:28,767] [0] Ed: 80000, train_loss: 1.28772, acc: 0.48146
[INFO 2025-02-16 03:46:28,767] [0] Ed: 80000, train_loss: 1.28772, acc: 0.48146


INFO:root:[0] Ed: 83200, train_loss: 1.28679, acc: 0.48236


[INFO 2025-02-16 03:46:34,315] [0] Ed: 83200, train_loss: 1.28679, acc: 0.48236
[INFO 2025-02-16 03:46:34,315] [0] Ed: 83200, train_loss: 1.28679, acc: 0.48236


INFO:root:[0] Ed: 86400, train_loss: 1.28642, acc: 0.48281


[INFO 2025-02-16 03:46:39,865] [0] Ed: 86400, train_loss: 1.28642, acc: 0.48281
[INFO 2025-02-16 03:46:39,865] [0] Ed: 86400, train_loss: 1.28642, acc: 0.48281


INFO:root:[0] Ed: 89600, train_loss: 1.28520, acc: 0.48345


[INFO 2025-02-16 03:46:45,408] [0] Ed: 89600, train_loss: 1.28520, acc: 0.48345
[INFO 2025-02-16 03:46:45,408] [0] Ed: 89600, train_loss: 1.28520, acc: 0.48345


INFO:root:[0] Ed: 92800, train_loss: 1.28582, acc: 0.48280


[INFO 2025-02-16 03:46:50,954] [0] Ed: 92800, train_loss: 1.28582, acc: 0.48280
[INFO 2025-02-16 03:46:50,954] [0] Ed: 92800, train_loss: 1.28582, acc: 0.48280


INFO:root:[0] Ed: 96000, train_loss: 1.28559, acc: 0.48291


[INFO 2025-02-16 03:46:56,500] [0] Ed: 96000, train_loss: 1.28559, acc: 0.48291
[INFO 2025-02-16 03:46:56,500] [0] Ed: 96000, train_loss: 1.28559, acc: 0.48291


INFO:root:[0] Ed: 99200, train_loss: 1.28572, acc: 0.48284


[INFO 2025-02-16 03:47:02,046] [0] Ed: 99200, train_loss: 1.28572, acc: 0.48284
[INFO 2025-02-16 03:47:02,046] [0] Ed: 99200, train_loss: 1.28572, acc: 0.48284


INFO:root:[0] Ed: 102400, train_loss: 1.28472, acc: 0.48318


[INFO 2025-02-16 03:47:07,594] [0] Ed: 102400, train_loss: 1.28472, acc: 0.48318
[INFO 2025-02-16 03:47:07,594] [0] Ed: 102400, train_loss: 1.28472, acc: 0.48318


INFO:root:[0] Ed: 105600, train_loss: 1.28338, acc: 0.48379


[INFO 2025-02-16 03:47:13,142] [0] Ed: 105600, train_loss: 1.28338, acc: 0.48379
[INFO 2025-02-16 03:47:13,142] [0] Ed: 105600, train_loss: 1.28338, acc: 0.48379


INFO:root:[0] Ed: 108800, train_loss: 1.28340, acc: 0.48376


[INFO 2025-02-16 03:47:18,685] [0] Ed: 108800, train_loss: 1.28340, acc: 0.48376
[INFO 2025-02-16 03:47:18,685] [0] Ed: 108800, train_loss: 1.28340, acc: 0.48376


INFO:root:[0] Ed: 112000, train_loss: 1.28360, acc: 0.48377


[INFO 2025-02-16 03:47:24,224] [0] Ed: 112000, train_loss: 1.28360, acc: 0.48377
[INFO 2025-02-16 03:47:24,224] [0] Ed: 112000, train_loss: 1.28360, acc: 0.48377


INFO:root:[0] Ed: 115200, train_loss: 1.28334, acc: 0.48408


[INFO 2025-02-16 03:47:29,768] [0] Ed: 115200, train_loss: 1.28334, acc: 0.48408
[INFO 2025-02-16 03:47:29,768] [0] Ed: 115200, train_loss: 1.28334, acc: 0.48408


INFO:root:[0] Ed: 118400, train_loss: 1.28245, acc: 0.48440


[INFO 2025-02-16 03:47:35,307] [0] Ed: 118400, train_loss: 1.28245, acc: 0.48440
[INFO 2025-02-16 03:47:35,307] [0] Ed: 118400, train_loss: 1.28245, acc: 0.48440


INFO:root:[0] Ed: 121600, train_loss: 1.28231, acc: 0.48436


[INFO 2025-02-16 03:47:40,854] [0] Ed: 121600, train_loss: 1.28231, acc: 0.48436
[INFO 2025-02-16 03:47:40,854] [0] Ed: 121600, train_loss: 1.28231, acc: 0.48436


INFO:root:[0] Ed: 124800, train_loss: 1.28207, acc: 0.48442


[INFO 2025-02-16 03:47:46,396] [0] Ed: 124800, train_loss: 1.28207, acc: 0.48442
[INFO 2025-02-16 03:47:46,396] [0] Ed: 124800, train_loss: 1.28207, acc: 0.48442


INFO:root:[0] Ed: 128000, train_loss: 1.28235, acc: 0.48440


[INFO 2025-02-16 03:47:51,936] [0] Ed: 128000, train_loss: 1.28235, acc: 0.48440
[INFO 2025-02-16 03:47:51,936] [0] Ed: 128000, train_loss: 1.28235, acc: 0.48440


INFO:root:[0] Ed: 131200, train_loss: 1.28186, acc: 0.48466


[INFO 2025-02-16 03:47:57,493] [0] Ed: 131200, train_loss: 1.28186, acc: 0.48466
[INFO 2025-02-16 03:47:57,493] [0] Ed: 131200, train_loss: 1.28186, acc: 0.48466


INFO:root:[0] Ed: 134400, train_loss: 1.28193, acc: 0.48447


[INFO 2025-02-16 03:48:03,036] [0] Ed: 134400, train_loss: 1.28193, acc: 0.48447
[INFO 2025-02-16 03:48:03,036] [0] Ed: 134400, train_loss: 1.28193, acc: 0.48447


INFO:root:[0] Ed: 137600, train_loss: 1.28217, acc: 0.48443


[INFO 2025-02-16 03:48:08,592] [0] Ed: 137600, train_loss: 1.28217, acc: 0.48443
[INFO 2025-02-16 03:48:08,592] [0] Ed: 137600, train_loss: 1.28217, acc: 0.48443


INFO:root:[0] Ed: 140800, train_loss: 1.28185, acc: 0.48460


[INFO 2025-02-16 03:48:14,133] [0] Ed: 140800, train_loss: 1.28185, acc: 0.48460
[INFO 2025-02-16 03:48:14,133] [0] Ed: 140800, train_loss: 1.28185, acc: 0.48460


INFO:root:[0] Ed: 144000, train_loss: 1.28182, acc: 0.48420


[INFO 2025-02-16 03:48:19,683] [0] Ed: 144000, train_loss: 1.28182, acc: 0.48420
[INFO 2025-02-16 03:48:19,683] [0] Ed: 144000, train_loss: 1.28182, acc: 0.48420


INFO:root:[0] Ed: 147200, train_loss: 1.28136, acc: 0.48439


[INFO 2025-02-16 03:48:25,230] [0] Ed: 147200, train_loss: 1.28136, acc: 0.48439
[INFO 2025-02-16 03:48:25,230] [0] Ed: 147200, train_loss: 1.28136, acc: 0.48439


INFO:root:[0] Ed: 150400, train_loss: 1.28100, acc: 0.48459


[INFO 2025-02-16 03:48:30,779] [0] Ed: 150400, train_loss: 1.28100, acc: 0.48459
[INFO 2025-02-16 03:48:30,779] [0] Ed: 150400, train_loss: 1.28100, acc: 0.48459


INFO:root:[0] Ed: 153600, train_loss: 1.28040, acc: 0.48482


[INFO 2025-02-16 03:48:36,332] [0] Ed: 153600, train_loss: 1.28040, acc: 0.48482
[INFO 2025-02-16 03:48:36,332] [0] Ed: 153600, train_loss: 1.28040, acc: 0.48482


INFO:root:[0] Ed: 156800, train_loss: 1.28002, acc: 0.48492


[INFO 2025-02-16 03:48:41,879] [0] Ed: 156800, train_loss: 1.28002, acc: 0.48492
[INFO 2025-02-16 03:48:41,879] [0] Ed: 156800, train_loss: 1.28002, acc: 0.48492


INFO:root:[0] Ed: 160000, train_loss: 1.27961, acc: 0.48530


[INFO 2025-02-16 03:48:47,431] [0] Ed: 160000, train_loss: 1.27961, acc: 0.48530
[INFO 2025-02-16 03:48:47,431] [0] Ed: 160000, train_loss: 1.27961, acc: 0.48530


INFO:root:[0] Ed: 163200, train_loss: 1.27937, acc: 0.48550


[INFO 2025-02-16 03:48:52,980] [0] Ed: 163200, train_loss: 1.27937, acc: 0.48550
[INFO 2025-02-16 03:48:52,980] [0] Ed: 163200, train_loss: 1.27937, acc: 0.48550


INFO:root:[0] Ed: 166400, train_loss: 1.27948, acc: 0.48550


[INFO 2025-02-16 03:48:58,524] [0] Ed: 166400, train_loss: 1.27948, acc: 0.48550
[INFO 2025-02-16 03:48:58,524] [0] Ed: 166400, train_loss: 1.27948, acc: 0.48550


INFO:root:[0] Ed: 169600, train_loss: 1.27950, acc: 0.48575


[INFO 2025-02-16 03:49:04,076] [0] Ed: 169600, train_loss: 1.27950, acc: 0.48575
[INFO 2025-02-16 03:49:04,076] [0] Ed: 169600, train_loss: 1.27950, acc: 0.48575


INFO:root:[0] Ed: 172800, train_loss: 1.27928, acc: 0.48582


[INFO 2025-02-16 03:49:09,624] [0] Ed: 172800, train_loss: 1.27928, acc: 0.48582
[INFO 2025-02-16 03:49:09,624] [0] Ed: 172800, train_loss: 1.27928, acc: 0.48582


INFO:root:[0] Ed: 176000, train_loss: 1.27917, acc: 0.48599


[INFO 2025-02-16 03:49:15,170] [0] Ed: 176000, train_loss: 1.27917, acc: 0.48599
[INFO 2025-02-16 03:49:15,170] [0] Ed: 176000, train_loss: 1.27917, acc: 0.48599


INFO:root:[0] Ed: 179200, train_loss: 1.27903, acc: 0.48606


[INFO 2025-02-16 03:49:20,721] [0] Ed: 179200, train_loss: 1.27903, acc: 0.48606
[INFO 2025-02-16 03:49:20,721] [0] Ed: 179200, train_loss: 1.27903, acc: 0.48606


INFO:root:[0] Ed: 182400, train_loss: 1.27906, acc: 0.48600


[INFO 2025-02-16 03:49:26,278] [0] Ed: 182400, train_loss: 1.27906, acc: 0.48600
[INFO 2025-02-16 03:49:26,278] [0] Ed: 182400, train_loss: 1.27906, acc: 0.48600


INFO:root:[0] Ed: 185600, train_loss: 1.27869, acc: 0.48609


[INFO 2025-02-16 03:49:31,835] [0] Ed: 185600, train_loss: 1.27869, acc: 0.48609
[INFO 2025-02-16 03:49:31,835] [0] Ed: 185600, train_loss: 1.27869, acc: 0.48609


INFO:root:[0] Ed: 188800, train_loss: 1.27861, acc: 0.48642


[INFO 2025-02-16 03:49:37,387] [0] Ed: 188800, train_loss: 1.27861, acc: 0.48642
[INFO 2025-02-16 03:49:37,387] [0] Ed: 188800, train_loss: 1.27861, acc: 0.48642


INFO:root:[0] Ed: 192000, train_loss: 1.27876, acc: 0.48629


[INFO 2025-02-16 03:49:42,943] [0] Ed: 192000, train_loss: 1.27876, acc: 0.48629
[INFO 2025-02-16 03:49:42,943] [0] Ed: 192000, train_loss: 1.27876, acc: 0.48629


INFO:root:[0] Ed: 195200, train_loss: 1.27852, acc: 0.48636


[INFO 2025-02-16 03:49:48,498] [0] Ed: 195200, train_loss: 1.27852, acc: 0.48636
[INFO 2025-02-16 03:49:48,498] [0] Ed: 195200, train_loss: 1.27852, acc: 0.48636


INFO:root:[0] Ed: 198400, train_loss: 1.27866, acc: 0.48647


[INFO 2025-02-16 03:49:54,064] [0] Ed: 198400, train_loss: 1.27866, acc: 0.48647
[INFO 2025-02-16 03:49:54,064] [0] Ed: 198400, train_loss: 1.27866, acc: 0.48647


INFO:root:[0] Ed: 201600, train_loss: 1.27836, acc: 0.48662


[INFO 2025-02-16 03:49:59,622] [0] Ed: 201600, train_loss: 1.27836, acc: 0.48662
[INFO 2025-02-16 03:49:59,622] [0] Ed: 201600, train_loss: 1.27836, acc: 0.48662


INFO:root:[0] Ed: 204800, train_loss: 1.27852, acc: 0.48636


[INFO 2025-02-16 03:50:05,182] [0] Ed: 204800, train_loss: 1.27852, acc: 0.48636
[INFO 2025-02-16 03:50:05,182] [0] Ed: 204800, train_loss: 1.27852, acc: 0.48636


INFO:root:[0] Ed: 208000, train_loss: 1.27859, acc: 0.48639


[INFO 2025-02-16 03:50:10,740] [0] Ed: 208000, train_loss: 1.27859, acc: 0.48639
[INFO 2025-02-16 03:50:10,740] [0] Ed: 208000, train_loss: 1.27859, acc: 0.48639


INFO:root:[0] Ed: 211200, train_loss: 1.27853, acc: 0.48639


[INFO 2025-02-16 03:50:16,294] [0] Ed: 211200, train_loss: 1.27853, acc: 0.48639
[INFO 2025-02-16 03:50:16,294] [0] Ed: 211200, train_loss: 1.27853, acc: 0.48639


INFO:root:[0] Ed: 214400, train_loss: 1.27843, acc: 0.48654


[INFO 2025-02-16 03:50:21,852] [0] Ed: 214400, train_loss: 1.27843, acc: 0.48654
[INFO 2025-02-16 03:50:21,852] [0] Ed: 214400, train_loss: 1.27843, acc: 0.48654


INFO:root:[0] Ed: 217600, train_loss: 1.27840, acc: 0.48646


[INFO 2025-02-16 03:50:27,405] [0] Ed: 217600, train_loss: 1.27840, acc: 0.48646
[INFO 2025-02-16 03:50:27,405] [0] Ed: 217600, train_loss: 1.27840, acc: 0.48646


INFO:root:[0] Ed: 220800, train_loss: 1.27814, acc: 0.48661


[INFO 2025-02-16 03:50:32,960] [0] Ed: 220800, train_loss: 1.27814, acc: 0.48661
[INFO 2025-02-16 03:50:32,960] [0] Ed: 220800, train_loss: 1.27814, acc: 0.48661


INFO:root:[0] Ed: 224000, train_loss: 1.27774, acc: 0.48675


[INFO 2025-02-16 03:50:38,508] [0] Ed: 224000, train_loss: 1.27774, acc: 0.48675
[INFO 2025-02-16 03:50:38,508] [0] Ed: 224000, train_loss: 1.27774, acc: 0.48675


INFO:root:[0] Ed: 227200, train_loss: 1.27740, acc: 0.48685


[INFO 2025-02-16 03:50:44,054] [0] Ed: 227200, train_loss: 1.27740, acc: 0.48685
[INFO 2025-02-16 03:50:44,054] [0] Ed: 227200, train_loss: 1.27740, acc: 0.48685


INFO:root:[0] Ed: 230400, train_loss: 1.27713, acc: 0.48699


[INFO 2025-02-16 03:50:49,598] [0] Ed: 230400, train_loss: 1.27713, acc: 0.48699
[INFO 2025-02-16 03:50:49,598] [0] Ed: 230400, train_loss: 1.27713, acc: 0.48699


INFO:root:[0] Ed: 233600, train_loss: 1.27642, acc: 0.48726


[INFO 2025-02-16 03:50:55,147] [0] Ed: 233600, train_loss: 1.27642, acc: 0.48726
[INFO 2025-02-16 03:50:55,147] [0] Ed: 233600, train_loss: 1.27642, acc: 0.48726


INFO:root:Training finish.


[INFO 2025-02-16 03:50:59,757] Training finish.
[INFO 2025-02-16 03:50:59,757] Training finish.


INFO:root:Model saved to /content/model/epoch-3.pt.


[INFO 2025-02-16 03:50:59,881] Model saved to /content/model/epoch-3.pt.
[INFO 2025-02-16 03:50:59,881] Model saved to /content/model/epoch-3.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-16 03:50:59,953] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-16 03:50:59,953] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.28960, acc: 0.50281


[INFO 2025-02-16 03:51:05,493] [0] Ed: 3200, train_loss: 1.28960, acc: 0.50281
[INFO 2025-02-16 03:51:05,493] [0] Ed: 3200, train_loss: 1.28960, acc: 0.50281


INFO:root:[0] Ed: 6400, train_loss: 1.27167, acc: 0.49641


[INFO 2025-02-16 03:51:11,033] [0] Ed: 6400, train_loss: 1.27167, acc: 0.49641
[INFO 2025-02-16 03:51:11,033] [0] Ed: 6400, train_loss: 1.27167, acc: 0.49641


INFO:root:[0] Ed: 9600, train_loss: 1.26913, acc: 0.49792


[INFO 2025-02-16 03:51:16,576] [0] Ed: 9600, train_loss: 1.26913, acc: 0.49792
[INFO 2025-02-16 03:51:16,576] [0] Ed: 9600, train_loss: 1.26913, acc: 0.49792


INFO:root:[0] Ed: 12800, train_loss: 1.26251, acc: 0.49828


[INFO 2025-02-16 03:51:22,120] [0] Ed: 12800, train_loss: 1.26251, acc: 0.49828
[INFO 2025-02-16 03:51:22,120] [0] Ed: 12800, train_loss: 1.26251, acc: 0.49828


INFO:root:[0] Ed: 16000, train_loss: 1.26768, acc: 0.49525


[INFO 2025-02-16 03:51:27,668] [0] Ed: 16000, train_loss: 1.26768, acc: 0.49525
[INFO 2025-02-16 03:51:27,668] [0] Ed: 16000, train_loss: 1.26768, acc: 0.49525


INFO:root:[0] Ed: 19200, train_loss: 1.26869, acc: 0.49464


[INFO 2025-02-16 03:51:33,213] [0] Ed: 19200, train_loss: 1.26869, acc: 0.49464
[INFO 2025-02-16 03:51:33,213] [0] Ed: 19200, train_loss: 1.26869, acc: 0.49464


INFO:root:[0] Ed: 22400, train_loss: 1.26736, acc: 0.49482


[INFO 2025-02-16 03:51:38,758] [0] Ed: 22400, train_loss: 1.26736, acc: 0.49482
[INFO 2025-02-16 03:51:38,758] [0] Ed: 22400, train_loss: 1.26736, acc: 0.49482


INFO:root:[0] Ed: 25600, train_loss: 1.26666, acc: 0.49555


[INFO 2025-02-16 03:51:44,305] [0] Ed: 25600, train_loss: 1.26666, acc: 0.49555
[INFO 2025-02-16 03:51:44,305] [0] Ed: 25600, train_loss: 1.26666, acc: 0.49555


INFO:root:[0] Ed: 28800, train_loss: 1.26633, acc: 0.49573


[INFO 2025-02-16 03:51:49,849] [0] Ed: 28800, train_loss: 1.26633, acc: 0.49573
[INFO 2025-02-16 03:51:49,849] [0] Ed: 28800, train_loss: 1.26633, acc: 0.49573


INFO:root:[0] Ed: 32000, train_loss: 1.26614, acc: 0.49488


[INFO 2025-02-16 03:51:55,394] [0] Ed: 32000, train_loss: 1.26614, acc: 0.49488
[INFO 2025-02-16 03:51:55,394] [0] Ed: 32000, train_loss: 1.26614, acc: 0.49488


INFO:root:[0] Ed: 35200, train_loss: 1.26857, acc: 0.49355


[INFO 2025-02-16 03:52:00,939] [0] Ed: 35200, train_loss: 1.26857, acc: 0.49355
[INFO 2025-02-16 03:52:00,939] [0] Ed: 35200, train_loss: 1.26857, acc: 0.49355


INFO:root:[0] Ed: 38400, train_loss: 1.26747, acc: 0.49339


[INFO 2025-02-16 03:52:06,484] [0] Ed: 38400, train_loss: 1.26747, acc: 0.49339
[INFO 2025-02-16 03:52:06,484] [0] Ed: 38400, train_loss: 1.26747, acc: 0.49339


INFO:root:[0] Ed: 41600, train_loss: 1.26408, acc: 0.49486


[INFO 2025-02-16 03:52:12,023] [0] Ed: 41600, train_loss: 1.26408, acc: 0.49486
[INFO 2025-02-16 03:52:12,023] [0] Ed: 41600, train_loss: 1.26408, acc: 0.49486


INFO:root:[0] Ed: 44800, train_loss: 1.26337, acc: 0.49531


[INFO 2025-02-16 03:52:17,568] [0] Ed: 44800, train_loss: 1.26337, acc: 0.49531
[INFO 2025-02-16 03:52:17,568] [0] Ed: 44800, train_loss: 1.26337, acc: 0.49531


INFO:root:[0] Ed: 48000, train_loss: 1.26353, acc: 0.49481


[INFO 2025-02-16 03:52:23,108] [0] Ed: 48000, train_loss: 1.26353, acc: 0.49481
[INFO 2025-02-16 03:52:23,108] [0] Ed: 48000, train_loss: 1.26353, acc: 0.49481


INFO:root:[0] Ed: 51200, train_loss: 1.26266, acc: 0.49498


[INFO 2025-02-16 03:52:28,657] [0] Ed: 51200, train_loss: 1.26266, acc: 0.49498
[INFO 2025-02-16 03:52:28,657] [0] Ed: 51200, train_loss: 1.26266, acc: 0.49498


INFO:root:[0] Ed: 54400, train_loss: 1.26194, acc: 0.49566


[INFO 2025-02-16 03:52:34,209] [0] Ed: 54400, train_loss: 1.26194, acc: 0.49566
[INFO 2025-02-16 03:52:34,209] [0] Ed: 54400, train_loss: 1.26194, acc: 0.49566


INFO:root:[0] Ed: 57600, train_loss: 1.26145, acc: 0.49599


[INFO 2025-02-16 03:52:39,765] [0] Ed: 57600, train_loss: 1.26145, acc: 0.49599
[INFO 2025-02-16 03:52:39,765] [0] Ed: 57600, train_loss: 1.26145, acc: 0.49599


INFO:root:[0] Ed: 60800, train_loss: 1.26151, acc: 0.49589


[INFO 2025-02-16 03:52:45,316] [0] Ed: 60800, train_loss: 1.26151, acc: 0.49589
[INFO 2025-02-16 03:52:45,316] [0] Ed: 60800, train_loss: 1.26151, acc: 0.49589


INFO:root:[0] Ed: 64000, train_loss: 1.26056, acc: 0.49639


[INFO 2025-02-16 03:52:50,878] [0] Ed: 64000, train_loss: 1.26056, acc: 0.49639
[INFO 2025-02-16 03:52:50,878] [0] Ed: 64000, train_loss: 1.26056, acc: 0.49639


INFO:root:[0] Ed: 67200, train_loss: 1.25990, acc: 0.49713


[INFO 2025-02-16 03:52:56,437] [0] Ed: 67200, train_loss: 1.25990, acc: 0.49713
[INFO 2025-02-16 03:52:56,437] [0] Ed: 67200, train_loss: 1.25990, acc: 0.49713


INFO:root:[0] Ed: 70400, train_loss: 1.25910, acc: 0.49732


[INFO 2025-02-16 03:53:01,991] [0] Ed: 70400, train_loss: 1.25910, acc: 0.49732
[INFO 2025-02-16 03:53:01,991] [0] Ed: 70400, train_loss: 1.25910, acc: 0.49732


INFO:root:[0] Ed: 73600, train_loss: 1.25885, acc: 0.49651


[INFO 2025-02-16 03:53:07,544] [0] Ed: 73600, train_loss: 1.25885, acc: 0.49651
[INFO 2025-02-16 03:53:07,544] [0] Ed: 73600, train_loss: 1.25885, acc: 0.49651


INFO:root:[0] Ed: 76800, train_loss: 1.25780, acc: 0.49724


[INFO 2025-02-16 03:53:13,089] [0] Ed: 76800, train_loss: 1.25780, acc: 0.49724
[INFO 2025-02-16 03:53:13,089] [0] Ed: 76800, train_loss: 1.25780, acc: 0.49724


INFO:root:[0] Ed: 80000, train_loss: 1.25802, acc: 0.49719


[INFO 2025-02-16 03:53:18,640] [0] Ed: 80000, train_loss: 1.25802, acc: 0.49719
[INFO 2025-02-16 03:53:18,640] [0] Ed: 80000, train_loss: 1.25802, acc: 0.49719


INFO:root:[0] Ed: 83200, train_loss: 1.25705, acc: 0.49790


[INFO 2025-02-16 03:53:24,185] [0] Ed: 83200, train_loss: 1.25705, acc: 0.49790
[INFO 2025-02-16 03:53:24,185] [0] Ed: 83200, train_loss: 1.25705, acc: 0.49790


INFO:root:[0] Ed: 86400, train_loss: 1.25660, acc: 0.49850


[INFO 2025-02-16 03:53:29,734] [0] Ed: 86400, train_loss: 1.25660, acc: 0.49850
[INFO 2025-02-16 03:53:29,734] [0] Ed: 86400, train_loss: 1.25660, acc: 0.49850


INFO:root:[0] Ed: 89600, train_loss: 1.25558, acc: 0.49901


[INFO 2025-02-16 03:53:35,282] [0] Ed: 89600, train_loss: 1.25558, acc: 0.49901
[INFO 2025-02-16 03:53:35,282] [0] Ed: 89600, train_loss: 1.25558, acc: 0.49901


INFO:root:[0] Ed: 92800, train_loss: 1.25638, acc: 0.49829


[INFO 2025-02-16 03:53:40,829] [0] Ed: 92800, train_loss: 1.25638, acc: 0.49829
[INFO 2025-02-16 03:53:40,829] [0] Ed: 92800, train_loss: 1.25638, acc: 0.49829


INFO:root:[0] Ed: 96000, train_loss: 1.25614, acc: 0.49812


[INFO 2025-02-16 03:53:46,376] [0] Ed: 96000, train_loss: 1.25614, acc: 0.49812
[INFO 2025-02-16 03:53:46,376] [0] Ed: 96000, train_loss: 1.25614, acc: 0.49812


INFO:root:[0] Ed: 99200, train_loss: 1.25621, acc: 0.49792


[INFO 2025-02-16 03:53:51,924] [0] Ed: 99200, train_loss: 1.25621, acc: 0.49792
[INFO 2025-02-16 03:53:51,924] [0] Ed: 99200, train_loss: 1.25621, acc: 0.49792


INFO:root:[0] Ed: 102400, train_loss: 1.25521, acc: 0.49831


[INFO 2025-02-16 03:53:57,468] [0] Ed: 102400, train_loss: 1.25521, acc: 0.49831
[INFO 2025-02-16 03:53:57,468] [0] Ed: 102400, train_loss: 1.25521, acc: 0.49831


INFO:root:[0] Ed: 105600, train_loss: 1.25382, acc: 0.49891


[INFO 2025-02-16 03:54:03,017] [0] Ed: 105600, train_loss: 1.25382, acc: 0.49891
[INFO 2025-02-16 03:54:03,017] [0] Ed: 105600, train_loss: 1.25382, acc: 0.49891


INFO:root:[0] Ed: 108800, train_loss: 1.25389, acc: 0.49877


[INFO 2025-02-16 03:54:08,563] [0] Ed: 108800, train_loss: 1.25389, acc: 0.49877
[INFO 2025-02-16 03:54:08,563] [0] Ed: 108800, train_loss: 1.25389, acc: 0.49877


INFO:root:[0] Ed: 112000, train_loss: 1.25425, acc: 0.49876


[INFO 2025-02-16 03:54:14,104] [0] Ed: 112000, train_loss: 1.25425, acc: 0.49876
[INFO 2025-02-16 03:54:14,104] [0] Ed: 112000, train_loss: 1.25425, acc: 0.49876


INFO:root:[0] Ed: 115200, train_loss: 1.25403, acc: 0.49901


[INFO 2025-02-16 03:54:19,652] [0] Ed: 115200, train_loss: 1.25403, acc: 0.49901
[INFO 2025-02-16 03:54:19,652] [0] Ed: 115200, train_loss: 1.25403, acc: 0.49901


INFO:root:[0] Ed: 118400, train_loss: 1.25301, acc: 0.49948


[INFO 2025-02-16 03:54:25,198] [0] Ed: 118400, train_loss: 1.25301, acc: 0.49948
[INFO 2025-02-16 03:54:25,198] [0] Ed: 118400, train_loss: 1.25301, acc: 0.49948


INFO:root:[0] Ed: 121600, train_loss: 1.25273, acc: 0.49946


[INFO 2025-02-16 03:54:30,747] [0] Ed: 121600, train_loss: 1.25273, acc: 0.49946
[INFO 2025-02-16 03:54:30,747] [0] Ed: 121600, train_loss: 1.25273, acc: 0.49946


INFO:root:[0] Ed: 124800, train_loss: 1.25258, acc: 0.49948


[INFO 2025-02-16 03:54:36,294] [0] Ed: 124800, train_loss: 1.25258, acc: 0.49948
[INFO 2025-02-16 03:54:36,294] [0] Ed: 124800, train_loss: 1.25258, acc: 0.49948


INFO:root:[0] Ed: 128000, train_loss: 1.25298, acc: 0.49926


[INFO 2025-02-16 03:54:41,845] [0] Ed: 128000, train_loss: 1.25298, acc: 0.49926
[INFO 2025-02-16 03:54:41,845] [0] Ed: 128000, train_loss: 1.25298, acc: 0.49926


INFO:root:[0] Ed: 131200, train_loss: 1.25248, acc: 0.49940


[INFO 2025-02-16 03:54:47,407] [0] Ed: 131200, train_loss: 1.25248, acc: 0.49940
[INFO 2025-02-16 03:54:47,407] [0] Ed: 131200, train_loss: 1.25248, acc: 0.49940


INFO:root:[0] Ed: 134400, train_loss: 1.25254, acc: 0.49910


[INFO 2025-02-16 03:54:52,967] [0] Ed: 134400, train_loss: 1.25254, acc: 0.49910
[INFO 2025-02-16 03:54:52,967] [0] Ed: 134400, train_loss: 1.25254, acc: 0.49910


INFO:root:[0] Ed: 137600, train_loss: 1.25276, acc: 0.49914


[INFO 2025-02-16 03:54:58,531] [0] Ed: 137600, train_loss: 1.25276, acc: 0.49914
[INFO 2025-02-16 03:54:58,531] [0] Ed: 137600, train_loss: 1.25276, acc: 0.49914


INFO:root:[0] Ed: 140800, train_loss: 1.25247, acc: 0.49937


[INFO 2025-02-16 03:55:04,085] [0] Ed: 140800, train_loss: 1.25247, acc: 0.49937
[INFO 2025-02-16 03:55:04,085] [0] Ed: 140800, train_loss: 1.25247, acc: 0.49937


INFO:root:[0] Ed: 144000, train_loss: 1.25249, acc: 0.49895


[INFO 2025-02-16 03:55:09,636] [0] Ed: 144000, train_loss: 1.25249, acc: 0.49895
[INFO 2025-02-16 03:55:09,636] [0] Ed: 144000, train_loss: 1.25249, acc: 0.49895


INFO:root:[0] Ed: 147200, train_loss: 1.25210, acc: 0.49909


[INFO 2025-02-16 03:55:15,194] [0] Ed: 147200, train_loss: 1.25210, acc: 0.49909
[INFO 2025-02-16 03:55:15,194] [0] Ed: 147200, train_loss: 1.25210, acc: 0.49909


INFO:root:[0] Ed: 150400, train_loss: 1.25167, acc: 0.49926


[INFO 2025-02-16 03:55:20,752] [0] Ed: 150400, train_loss: 1.25167, acc: 0.49926
[INFO 2025-02-16 03:55:20,752] [0] Ed: 150400, train_loss: 1.25167, acc: 0.49926


INFO:root:[0] Ed: 153600, train_loss: 1.25100, acc: 0.49943


[INFO 2025-02-16 03:55:26,310] [0] Ed: 153600, train_loss: 1.25100, acc: 0.49943
[INFO 2025-02-16 03:55:26,310] [0] Ed: 153600, train_loss: 1.25100, acc: 0.49943


INFO:root:[0] Ed: 156800, train_loss: 1.25056, acc: 0.49968


[INFO 2025-02-16 03:55:31,863] [0] Ed: 156800, train_loss: 1.25056, acc: 0.49968
[INFO 2025-02-16 03:55:31,863] [0] Ed: 156800, train_loss: 1.25056, acc: 0.49968


INFO:root:[0] Ed: 160000, train_loss: 1.25017, acc: 0.49993


[INFO 2025-02-16 03:55:37,414] [0] Ed: 160000, train_loss: 1.25017, acc: 0.49993
[INFO 2025-02-16 03:55:37,414] [0] Ed: 160000, train_loss: 1.25017, acc: 0.49993


INFO:root:[0] Ed: 163200, train_loss: 1.24999, acc: 0.49999


[INFO 2025-02-16 03:55:42,964] [0] Ed: 163200, train_loss: 1.24999, acc: 0.49999
[INFO 2025-02-16 03:55:42,964] [0] Ed: 163200, train_loss: 1.24999, acc: 0.49999


INFO:root:[0] Ed: 166400, train_loss: 1.25012, acc: 0.49986


[INFO 2025-02-16 03:55:48,517] [0] Ed: 166400, train_loss: 1.25012, acc: 0.49986
[INFO 2025-02-16 03:55:48,517] [0] Ed: 166400, train_loss: 1.25012, acc: 0.49986


INFO:root:[0] Ed: 169600, train_loss: 1.25021, acc: 0.49993


[INFO 2025-02-16 03:55:54,066] [0] Ed: 169600, train_loss: 1.25021, acc: 0.49993
[INFO 2025-02-16 03:55:54,066] [0] Ed: 169600, train_loss: 1.25021, acc: 0.49993


INFO:root:[0] Ed: 172800, train_loss: 1.25000, acc: 0.50003


[INFO 2025-02-16 03:55:59,616] [0] Ed: 172800, train_loss: 1.25000, acc: 0.50003
[INFO 2025-02-16 03:55:59,616] [0] Ed: 172800, train_loss: 1.25000, acc: 0.50003


INFO:root:[0] Ed: 176000, train_loss: 1.24983, acc: 0.50024


[INFO 2025-02-16 03:56:05,165] [0] Ed: 176000, train_loss: 1.24983, acc: 0.50024
[INFO 2025-02-16 03:56:05,165] [0] Ed: 176000, train_loss: 1.24983, acc: 0.50024


INFO:root:[0] Ed: 179200, train_loss: 1.24973, acc: 0.50035


[INFO 2025-02-16 03:56:10,716] [0] Ed: 179200, train_loss: 1.24973, acc: 0.50035
[INFO 2025-02-16 03:56:10,716] [0] Ed: 179200, train_loss: 1.24973, acc: 0.50035


INFO:root:[0] Ed: 182400, train_loss: 1.24985, acc: 0.50032


[INFO 2025-02-16 03:56:16,265] [0] Ed: 182400, train_loss: 1.24985, acc: 0.50032
[INFO 2025-02-16 03:56:16,265] [0] Ed: 182400, train_loss: 1.24985, acc: 0.50032


INFO:root:[0] Ed: 185600, train_loss: 1.24951, acc: 0.50042


[INFO 2025-02-16 03:56:21,813] [0] Ed: 185600, train_loss: 1.24951, acc: 0.50042
[INFO 2025-02-16 03:56:21,813] [0] Ed: 185600, train_loss: 1.24951, acc: 0.50042


INFO:root:[0] Ed: 188800, train_loss: 1.24951, acc: 0.50064


[INFO 2025-02-16 03:56:27,361] [0] Ed: 188800, train_loss: 1.24951, acc: 0.50064
[INFO 2025-02-16 03:56:27,361] [0] Ed: 188800, train_loss: 1.24951, acc: 0.50064


INFO:root:[0] Ed: 192000, train_loss: 1.24970, acc: 0.50044


[INFO 2025-02-16 03:56:32,899] [0] Ed: 192000, train_loss: 1.24970, acc: 0.50044
[INFO 2025-02-16 03:56:32,899] [0] Ed: 192000, train_loss: 1.24970, acc: 0.50044


INFO:root:[0] Ed: 195200, train_loss: 1.24948, acc: 0.50055


[INFO 2025-02-16 03:56:38,444] [0] Ed: 195200, train_loss: 1.24948, acc: 0.50055
[INFO 2025-02-16 03:56:38,444] [0] Ed: 195200, train_loss: 1.24948, acc: 0.50055


INFO:root:[0] Ed: 198400, train_loss: 1.24965, acc: 0.50064


[INFO 2025-02-16 03:56:43,991] [0] Ed: 198400, train_loss: 1.24965, acc: 0.50064
[INFO 2025-02-16 03:56:43,991] [0] Ed: 198400, train_loss: 1.24965, acc: 0.50064


INFO:root:[0] Ed: 201600, train_loss: 1.24938, acc: 0.50074


[INFO 2025-02-16 03:56:49,530] [0] Ed: 201600, train_loss: 1.24938, acc: 0.50074
[INFO 2025-02-16 03:56:49,530] [0] Ed: 201600, train_loss: 1.24938, acc: 0.50074


INFO:root:[0] Ed: 204800, train_loss: 1.24955, acc: 0.50051


[INFO 2025-02-16 03:56:55,067] [0] Ed: 204800, train_loss: 1.24955, acc: 0.50051
[INFO 2025-02-16 03:56:55,067] [0] Ed: 204800, train_loss: 1.24955, acc: 0.50051


INFO:root:[0] Ed: 208000, train_loss: 1.24974, acc: 0.50041


[INFO 2025-02-16 03:57:00,604] [0] Ed: 208000, train_loss: 1.24974, acc: 0.50041
[INFO 2025-02-16 03:57:00,604] [0] Ed: 208000, train_loss: 1.24974, acc: 0.50041


INFO:root:[0] Ed: 211200, train_loss: 1.24964, acc: 0.50037


[INFO 2025-02-16 03:57:06,147] [0] Ed: 211200, train_loss: 1.24964, acc: 0.50037
[INFO 2025-02-16 03:57:06,147] [0] Ed: 211200, train_loss: 1.24964, acc: 0.50037


INFO:root:[0] Ed: 214400, train_loss: 1.24955, acc: 0.50054


[INFO 2025-02-16 03:57:11,703] [0] Ed: 214400, train_loss: 1.24955, acc: 0.50054
[INFO 2025-02-16 03:57:11,703] [0] Ed: 214400, train_loss: 1.24955, acc: 0.50054


INFO:root:[0] Ed: 217600, train_loss: 1.24956, acc: 0.50039


[INFO 2025-02-16 03:57:17,256] [0] Ed: 217600, train_loss: 1.24956, acc: 0.50039
[INFO 2025-02-16 03:57:17,256] [0] Ed: 217600, train_loss: 1.24956, acc: 0.50039


INFO:root:[0] Ed: 220800, train_loss: 1.24939, acc: 0.50052


[INFO 2025-02-16 03:57:22,809] [0] Ed: 220800, train_loss: 1.24939, acc: 0.50052
[INFO 2025-02-16 03:57:22,809] [0] Ed: 220800, train_loss: 1.24939, acc: 0.50052


INFO:root:[0] Ed: 224000, train_loss: 1.24899, acc: 0.50062


[INFO 2025-02-16 03:57:28,362] [0] Ed: 224000, train_loss: 1.24899, acc: 0.50062
[INFO 2025-02-16 03:57:28,362] [0] Ed: 224000, train_loss: 1.24899, acc: 0.50062


INFO:root:[0] Ed: 227200, train_loss: 1.24858, acc: 0.50087


[INFO 2025-02-16 03:57:33,917] [0] Ed: 227200, train_loss: 1.24858, acc: 0.50087
[INFO 2025-02-16 03:57:33,917] [0] Ed: 227200, train_loss: 1.24858, acc: 0.50087


INFO:root:[0] Ed: 230400, train_loss: 1.24834, acc: 0.50110


[INFO 2025-02-16 03:57:39,469] [0] Ed: 230400, train_loss: 1.24834, acc: 0.50110
[INFO 2025-02-16 03:57:39,469] [0] Ed: 230400, train_loss: 1.24834, acc: 0.50110


INFO:root:[0] Ed: 233600, train_loss: 1.24771, acc: 0.50137


[INFO 2025-02-16 03:57:45,018] [0] Ed: 233600, train_loss: 1.24771, acc: 0.50137
[INFO 2025-02-16 03:57:45,018] [0] Ed: 233600, train_loss: 1.24771, acc: 0.50137


INFO:root:Training finish.


[INFO 2025-02-16 03:57:49,637] Training finish.
[INFO 2025-02-16 03:57:49,637] Training finish.


INFO:root:Model saved to /content/model/epoch-4.pt.


[INFO 2025-02-16 03:57:49,759] Model saved to /content/model/epoch-4.pt.
[INFO 2025-02-16 03:57:49,759] Model saved to /content/model/epoch-4.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-16 03:57:49,829] [0] Ed: 0, train_loss: inf, acc: inf
[INFO 2025-02-16 03:57:49,829] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.26065, acc: 0.51250


[INFO 2025-02-16 03:57:55,382] [0] Ed: 3200, train_loss: 1.26065, acc: 0.51250
[INFO 2025-02-16 03:57:55,382] [0] Ed: 3200, train_loss: 1.26065, acc: 0.51250


INFO:root:[0] Ed: 6400, train_loss: 1.24195, acc: 0.50891


[INFO 2025-02-16 03:58:00,932] [0] Ed: 6400, train_loss: 1.24195, acc: 0.50891
[INFO 2025-02-16 03:58:00,932] [0] Ed: 6400, train_loss: 1.24195, acc: 0.50891


INFO:root:[0] Ed: 9600, train_loss: 1.24265, acc: 0.50854


[INFO 2025-02-16 03:58:06,481] [0] Ed: 9600, train_loss: 1.24265, acc: 0.50854
[INFO 2025-02-16 03:58:06,481] [0] Ed: 9600, train_loss: 1.24265, acc: 0.50854


INFO:root:[0] Ed: 12800, train_loss: 1.23631, acc: 0.51125


[INFO 2025-02-16 03:58:12,034] [0] Ed: 12800, train_loss: 1.23631, acc: 0.51125
[INFO 2025-02-16 03:58:12,034] [0] Ed: 12800, train_loss: 1.23631, acc: 0.51125


INFO:root:[0] Ed: 16000, train_loss: 1.24335, acc: 0.50713


[INFO 2025-02-16 03:58:17,585] [0] Ed: 16000, train_loss: 1.24335, acc: 0.50713
[INFO 2025-02-16 03:58:17,585] [0] Ed: 16000, train_loss: 1.24335, acc: 0.50713


INFO:root:[0] Ed: 19200, train_loss: 1.24535, acc: 0.50510


[INFO 2025-02-16 03:58:23,130] [0] Ed: 19200, train_loss: 1.24535, acc: 0.50510
[INFO 2025-02-16 03:58:23,130] [0] Ed: 19200, train_loss: 1.24535, acc: 0.50510


INFO:root:[0] Ed: 22400, train_loss: 1.24458, acc: 0.50451


[INFO 2025-02-16 03:58:28,682] [0] Ed: 22400, train_loss: 1.24458, acc: 0.50451
[INFO 2025-02-16 03:58:28,682] [0] Ed: 22400, train_loss: 1.24458, acc: 0.50451


INFO:root:[0] Ed: 25600, train_loss: 1.24367, acc: 0.50504


[INFO 2025-02-16 03:58:34,230] [0] Ed: 25600, train_loss: 1.24367, acc: 0.50504
[INFO 2025-02-16 03:58:34,230] [0] Ed: 25600, train_loss: 1.24367, acc: 0.50504


INFO:root:[0] Ed: 28800, train_loss: 1.24259, acc: 0.50531


[INFO 2025-02-16 03:58:39,783] [0] Ed: 28800, train_loss: 1.24259, acc: 0.50531
[INFO 2025-02-16 03:58:39,783] [0] Ed: 28800, train_loss: 1.24259, acc: 0.50531


INFO:root:[0] Ed: 32000, train_loss: 1.24314, acc: 0.50513


[INFO 2025-02-16 03:58:45,339] [0] Ed: 32000, train_loss: 1.24314, acc: 0.50513
[INFO 2025-02-16 03:58:45,339] [0] Ed: 32000, train_loss: 1.24314, acc: 0.50513


INFO:root:[0] Ed: 35200, train_loss: 1.24457, acc: 0.50514


[INFO 2025-02-16 03:58:50,888] [0] Ed: 35200, train_loss: 1.24457, acc: 0.50514
[INFO 2025-02-16 03:58:50,888] [0] Ed: 35200, train_loss: 1.24457, acc: 0.50514


INFO:root:[0] Ed: 38400, train_loss: 1.24311, acc: 0.50513


[INFO 2025-02-16 03:58:56,440] [0] Ed: 38400, train_loss: 1.24311, acc: 0.50513
[INFO 2025-02-16 03:58:56,440] [0] Ed: 38400, train_loss: 1.24311, acc: 0.50513


INFO:root:[0] Ed: 41600, train_loss: 1.23966, acc: 0.50683


[INFO 2025-02-16 03:59:01,986] [0] Ed: 41600, train_loss: 1.23966, acc: 0.50683
[INFO 2025-02-16 03:59:01,986] [0] Ed: 41600, train_loss: 1.23966, acc: 0.50683


INFO:root:[0] Ed: 44800, train_loss: 1.23899, acc: 0.50661


[INFO 2025-02-16 03:59:07,540] [0] Ed: 44800, train_loss: 1.23899, acc: 0.50661
[INFO 2025-02-16 03:59:07,540] [0] Ed: 44800, train_loss: 1.23899, acc: 0.50661


INFO:root:[0] Ed: 48000, train_loss: 1.23913, acc: 0.50658


[INFO 2025-02-16 03:59:13,085] [0] Ed: 48000, train_loss: 1.23913, acc: 0.50658
[INFO 2025-02-16 03:59:13,085] [0] Ed: 48000, train_loss: 1.23913, acc: 0.50658


INFO:root:[0] Ed: 51200, train_loss: 1.23801, acc: 0.50695


[INFO 2025-02-16 03:59:18,635] [0] Ed: 51200, train_loss: 1.23801, acc: 0.50695
[INFO 2025-02-16 03:59:18,635] [0] Ed: 51200, train_loss: 1.23801, acc: 0.50695


INFO:root:[0] Ed: 54400, train_loss: 1.23724, acc: 0.50743


[INFO 2025-02-16 03:59:24,197] [0] Ed: 54400, train_loss: 1.23724, acc: 0.50743
[INFO 2025-02-16 03:59:24,197] [0] Ed: 54400, train_loss: 1.23724, acc: 0.50743


INFO:root:[0] Ed: 57600, train_loss: 1.23663, acc: 0.50776


[INFO 2025-02-16 03:59:29,758] [0] Ed: 57600, train_loss: 1.23663, acc: 0.50776
[INFO 2025-02-16 03:59:29,758] [0] Ed: 57600, train_loss: 1.23663, acc: 0.50776


INFO:root:[0] Ed: 60800, train_loss: 1.23683, acc: 0.50819


[INFO 2025-02-16 03:59:35,312] [0] Ed: 60800, train_loss: 1.23683, acc: 0.50819
[INFO 2025-02-16 03:59:35,312] [0] Ed: 60800, train_loss: 1.23683, acc: 0.50819


INFO:root:[0] Ed: 64000, train_loss: 1.23549, acc: 0.50863


[INFO 2025-02-16 03:59:40,869] [0] Ed: 64000, train_loss: 1.23549, acc: 0.50863
[INFO 2025-02-16 03:59:40,869] [0] Ed: 64000, train_loss: 1.23549, acc: 0.50863


INFO:root:[0] Ed: 67200, train_loss: 1.23463, acc: 0.50918


[INFO 2025-02-16 03:59:46,428] [0] Ed: 67200, train_loss: 1.23463, acc: 0.50918
[INFO 2025-02-16 03:59:46,428] [0] Ed: 67200, train_loss: 1.23463, acc: 0.50918


INFO:root:[0] Ed: 70400, train_loss: 1.23371, acc: 0.50936


[INFO 2025-02-16 03:59:51,984] [0] Ed: 70400, train_loss: 1.23371, acc: 0.50936
[INFO 2025-02-16 03:59:51,984] [0] Ed: 70400, train_loss: 1.23371, acc: 0.50936


INFO:root:[0] Ed: 73600, train_loss: 1.23385, acc: 0.50879


[INFO 2025-02-16 03:59:57,539] [0] Ed: 73600, train_loss: 1.23385, acc: 0.50879
[INFO 2025-02-16 03:59:57,539] [0] Ed: 73600, train_loss: 1.23385, acc: 0.50879


INFO:root:[0] Ed: 76800, train_loss: 1.23291, acc: 0.50913


[INFO 2025-02-16 04:00:03,090] [0] Ed: 76800, train_loss: 1.23291, acc: 0.50913
[INFO 2025-02-16 04:00:03,090] [0] Ed: 76800, train_loss: 1.23291, acc: 0.50913


INFO:root:[0] Ed: 80000, train_loss: 1.23327, acc: 0.50890


[INFO 2025-02-16 04:00:08,636] [0] Ed: 80000, train_loss: 1.23327, acc: 0.50890
[INFO 2025-02-16 04:00:08,636] [0] Ed: 80000, train_loss: 1.23327, acc: 0.50890


INFO:root:[0] Ed: 83200, train_loss: 1.23239, acc: 0.50937


[INFO 2025-02-16 04:00:14,180] [0] Ed: 83200, train_loss: 1.23239, acc: 0.50937
[INFO 2025-02-16 04:00:14,180] [0] Ed: 83200, train_loss: 1.23239, acc: 0.50937


INFO:root:[0] Ed: 86400, train_loss: 1.23181, acc: 0.51015


[INFO 2025-02-16 04:00:19,730] [0] Ed: 86400, train_loss: 1.23181, acc: 0.51015
[INFO 2025-02-16 04:00:19,730] [0] Ed: 86400, train_loss: 1.23181, acc: 0.51015


INFO:root:[0] Ed: 89600, train_loss: 1.23055, acc: 0.51096


[INFO 2025-02-16 04:00:25,279] [0] Ed: 89600, train_loss: 1.23055, acc: 0.51096
[INFO 2025-02-16 04:00:25,279] [0] Ed: 89600, train_loss: 1.23055, acc: 0.51096


INFO:root:[0] Ed: 92800, train_loss: 1.23147, acc: 0.51014


[INFO 2025-02-16 04:00:30,813] [0] Ed: 92800, train_loss: 1.23147, acc: 0.51014
[INFO 2025-02-16 04:00:30,813] [0] Ed: 92800, train_loss: 1.23147, acc: 0.51014


INFO:root:[0] Ed: 96000, train_loss: 1.23107, acc: 0.50996


[INFO 2025-02-16 04:00:36,351] [0] Ed: 96000, train_loss: 1.23107, acc: 0.50996
[INFO 2025-02-16 04:00:36,351] [0] Ed: 96000, train_loss: 1.23107, acc: 0.50996


INFO:root:[0] Ed: 99200, train_loss: 1.23111, acc: 0.50980


[INFO 2025-02-16 04:00:41,890] [0] Ed: 99200, train_loss: 1.23111, acc: 0.50980
[INFO 2025-02-16 04:00:41,890] [0] Ed: 99200, train_loss: 1.23111, acc: 0.50980


INFO:root:[0] Ed: 102400, train_loss: 1.22984, acc: 0.51030


[INFO 2025-02-16 04:00:47,433] [0] Ed: 102400, train_loss: 1.22984, acc: 0.51030
[INFO 2025-02-16 04:00:47,433] [0] Ed: 102400, train_loss: 1.22984, acc: 0.51030


INFO:root:[0] Ed: 105600, train_loss: 1.22836, acc: 0.51086


[INFO 2025-02-16 04:00:52,981] [0] Ed: 105600, train_loss: 1.22836, acc: 0.51086
[INFO 2025-02-16 04:00:52,981] [0] Ed: 105600, train_loss: 1.22836, acc: 0.51086


INFO:root:[0] Ed: 108800, train_loss: 1.22847, acc: 0.51087


[INFO 2025-02-16 04:00:58,530] [0] Ed: 108800, train_loss: 1.22847, acc: 0.51087
[INFO 2025-02-16 04:00:58,530] [0] Ed: 108800, train_loss: 1.22847, acc: 0.51087


INFO:root:[0] Ed: 112000, train_loss: 1.22894, acc: 0.51073


[INFO 2025-02-16 04:01:04,078] [0] Ed: 112000, train_loss: 1.22894, acc: 0.51073
[INFO 2025-02-16 04:01:04,078] [0] Ed: 112000, train_loss: 1.22894, acc: 0.51073


INFO:root:[0] Ed: 115200, train_loss: 1.22860, acc: 0.51108


[INFO 2025-02-16 04:01:09,628] [0] Ed: 115200, train_loss: 1.22860, acc: 0.51108
[INFO 2025-02-16 04:01:09,628] [0] Ed: 115200, train_loss: 1.22860, acc: 0.51108


INFO:root:[0] Ed: 118400, train_loss: 1.22746, acc: 0.51133


[INFO 2025-02-16 04:01:15,191] [0] Ed: 118400, train_loss: 1.22746, acc: 0.51133
[INFO 2025-02-16 04:01:15,191] [0] Ed: 118400, train_loss: 1.22746, acc: 0.51133


INFO:root:[0] Ed: 121600, train_loss: 1.22723, acc: 0.51150


[INFO 2025-02-16 04:01:20,752] [0] Ed: 121600, train_loss: 1.22723, acc: 0.51150
[INFO 2025-02-16 04:01:20,752] [0] Ed: 121600, train_loss: 1.22723, acc: 0.51150


INFO:root:[0] Ed: 124800, train_loss: 1.22677, acc: 0.51163


[INFO 2025-02-16 04:01:26,306] [0] Ed: 124800, train_loss: 1.22677, acc: 0.51163
[INFO 2025-02-16 04:01:26,306] [0] Ed: 124800, train_loss: 1.22677, acc: 0.51163


INFO:root:[0] Ed: 128000, train_loss: 1.22730, acc: 0.51155


[INFO 2025-02-16 04:01:31,863] [0] Ed: 128000, train_loss: 1.22730, acc: 0.51155
[INFO 2025-02-16 04:01:31,863] [0] Ed: 128000, train_loss: 1.22730, acc: 0.51155


INFO:root:[0] Ed: 131200, train_loss: 1.22692, acc: 0.51175


[INFO 2025-02-16 04:01:37,429] [0] Ed: 131200, train_loss: 1.22692, acc: 0.51175
[INFO 2025-02-16 04:01:37,429] [0] Ed: 131200, train_loss: 1.22692, acc: 0.51175


INFO:root:[0] Ed: 134400, train_loss: 1.22687, acc: 0.51157


[INFO 2025-02-16 04:01:42,989] [0] Ed: 134400, train_loss: 1.22687, acc: 0.51157
[INFO 2025-02-16 04:01:42,989] [0] Ed: 134400, train_loss: 1.22687, acc: 0.51157


INFO:root:[0] Ed: 137600, train_loss: 1.22710, acc: 0.51156


[INFO 2025-02-16 04:01:48,553] [0] Ed: 137600, train_loss: 1.22710, acc: 0.51156
[INFO 2025-02-16 04:01:48,553] [0] Ed: 137600, train_loss: 1.22710, acc: 0.51156


INFO:root:[0] Ed: 140800, train_loss: 1.22675, acc: 0.51180


[INFO 2025-02-16 04:01:54,102] [0] Ed: 140800, train_loss: 1.22675, acc: 0.51180
[INFO 2025-02-16 04:01:54,102] [0] Ed: 140800, train_loss: 1.22675, acc: 0.51180


INFO:root:[0] Ed: 144000, train_loss: 1.22678, acc: 0.51153


[INFO 2025-02-16 04:01:59,661] [0] Ed: 144000, train_loss: 1.22678, acc: 0.51153
[INFO 2025-02-16 04:01:59,661] [0] Ed: 144000, train_loss: 1.22678, acc: 0.51153


INFO:root:[0] Ed: 147200, train_loss: 1.22634, acc: 0.51162


[INFO 2025-02-16 04:02:05,213] [0] Ed: 147200, train_loss: 1.22634, acc: 0.51162
[INFO 2025-02-16 04:02:05,213] [0] Ed: 147200, train_loss: 1.22634, acc: 0.51162


INFO:root:[0] Ed: 150400, train_loss: 1.22589, acc: 0.51193


[INFO 2025-02-16 04:02:10,761] [0] Ed: 150400, train_loss: 1.22589, acc: 0.51193
[INFO 2025-02-16 04:02:10,761] [0] Ed: 150400, train_loss: 1.22589, acc: 0.51193


INFO:root:[0] Ed: 153600, train_loss: 1.22531, acc: 0.51214


[INFO 2025-02-16 04:02:16,316] [0] Ed: 153600, train_loss: 1.22531, acc: 0.51214
[INFO 2025-02-16 04:02:16,316] [0] Ed: 153600, train_loss: 1.22531, acc: 0.51214


INFO:root:[0] Ed: 156800, train_loss: 1.22475, acc: 0.51242


[INFO 2025-02-16 04:02:21,862] [0] Ed: 156800, train_loss: 1.22475, acc: 0.51242
[INFO 2025-02-16 04:02:21,862] [0] Ed: 156800, train_loss: 1.22475, acc: 0.51242


INFO:root:[0] Ed: 160000, train_loss: 1.22446, acc: 0.51273


[INFO 2025-02-16 04:02:27,413] [0] Ed: 160000, train_loss: 1.22446, acc: 0.51273
[INFO 2025-02-16 04:02:27,413] [0] Ed: 160000, train_loss: 1.22446, acc: 0.51273


INFO:root:[0] Ed: 163200, train_loss: 1.22414, acc: 0.51288


[INFO 2025-02-16 04:02:32,964] [0] Ed: 163200, train_loss: 1.22414, acc: 0.51288
[INFO 2025-02-16 04:02:32,964] [0] Ed: 163200, train_loss: 1.22414, acc: 0.51288


INFO:root:[0] Ed: 166400, train_loss: 1.22423, acc: 0.51284


[INFO 2025-02-16 04:02:38,512] [0] Ed: 166400, train_loss: 1.22423, acc: 0.51284
[INFO 2025-02-16 04:02:38,512] [0] Ed: 166400, train_loss: 1.22423, acc: 0.51284


INFO:root:[0] Ed: 169600, train_loss: 1.22424, acc: 0.51298


[INFO 2025-02-16 04:02:44,062] [0] Ed: 169600, train_loss: 1.22424, acc: 0.51298
[INFO 2025-02-16 04:02:44,062] [0] Ed: 169600, train_loss: 1.22424, acc: 0.51298


INFO:root:[0] Ed: 172800, train_loss: 1.22402, acc: 0.51313


[INFO 2025-02-16 04:02:49,608] [0] Ed: 172800, train_loss: 1.22402, acc: 0.51313
[INFO 2025-02-16 04:02:49,608] [0] Ed: 172800, train_loss: 1.22402, acc: 0.51313


INFO:root:[0] Ed: 176000, train_loss: 1.22376, acc: 0.51320


[INFO 2025-02-16 04:02:55,148] [0] Ed: 176000, train_loss: 1.22376, acc: 0.51320
[INFO 2025-02-16 04:02:55,148] [0] Ed: 176000, train_loss: 1.22376, acc: 0.51320


INFO:root:[0] Ed: 179200, train_loss: 1.22374, acc: 0.51329


[INFO 2025-02-16 04:03:00,695] [0] Ed: 179200, train_loss: 1.22374, acc: 0.51329
[INFO 2025-02-16 04:03:00,695] [0] Ed: 179200, train_loss: 1.22374, acc: 0.51329


INFO:root:[0] Ed: 182400, train_loss: 1.22385, acc: 0.51329


[INFO 2025-02-16 04:03:06,243] [0] Ed: 182400, train_loss: 1.22385, acc: 0.51329
[INFO 2025-02-16 04:03:06,243] [0] Ed: 182400, train_loss: 1.22385, acc: 0.51329


INFO:root:[0] Ed: 185600, train_loss: 1.22361, acc: 0.51339


[INFO 2025-02-16 04:03:11,790] [0] Ed: 185600, train_loss: 1.22361, acc: 0.51339
[INFO 2025-02-16 04:03:11,790] [0] Ed: 185600, train_loss: 1.22361, acc: 0.51339


INFO:root:[0] Ed: 188800, train_loss: 1.22360, acc: 0.51350


[INFO 2025-02-16 04:03:17,334] [0] Ed: 188800, train_loss: 1.22360, acc: 0.51350
[INFO 2025-02-16 04:03:17,334] [0] Ed: 188800, train_loss: 1.22360, acc: 0.51350


INFO:root:[0] Ed: 192000, train_loss: 1.22386, acc: 0.51335


[INFO 2025-02-16 04:03:22,890] [0] Ed: 192000, train_loss: 1.22386, acc: 0.51335
[INFO 2025-02-16 04:03:22,890] [0] Ed: 192000, train_loss: 1.22386, acc: 0.51335


INFO:root:[0] Ed: 195200, train_loss: 1.22363, acc: 0.51352


[INFO 2025-02-16 04:03:28,442] [0] Ed: 195200, train_loss: 1.22363, acc: 0.51352
[INFO 2025-02-16 04:03:28,442] [0] Ed: 195200, train_loss: 1.22363, acc: 0.51352


INFO:root:[0] Ed: 198400, train_loss: 1.22379, acc: 0.51368


[INFO 2025-02-16 04:03:33,991] [0] Ed: 198400, train_loss: 1.22379, acc: 0.51368
[INFO 2025-02-16 04:03:33,991] [0] Ed: 198400, train_loss: 1.22379, acc: 0.51368


INFO:root:[0] Ed: 201600, train_loss: 1.22336, acc: 0.51383


[INFO 2025-02-16 04:03:39,540] [0] Ed: 201600, train_loss: 1.22336, acc: 0.51383
[INFO 2025-02-16 04:03:39,540] [0] Ed: 201600, train_loss: 1.22336, acc: 0.51383


INFO:root:[0] Ed: 204800, train_loss: 1.22353, acc: 0.51366


[INFO 2025-02-16 04:03:45,091] [0] Ed: 204800, train_loss: 1.22353, acc: 0.51366
[INFO 2025-02-16 04:03:45,091] [0] Ed: 204800, train_loss: 1.22353, acc: 0.51366


INFO:root:[0] Ed: 208000, train_loss: 1.22373, acc: 0.51373


[INFO 2025-02-16 04:03:50,644] [0] Ed: 208000, train_loss: 1.22373, acc: 0.51373
[INFO 2025-02-16 04:03:50,644] [0] Ed: 208000, train_loss: 1.22373, acc: 0.51373


INFO:root:[0] Ed: 211200, train_loss: 1.22368, acc: 0.51378


[INFO 2025-02-16 04:03:56,202] [0] Ed: 211200, train_loss: 1.22368, acc: 0.51378
[INFO 2025-02-16 04:03:56,202] [0] Ed: 211200, train_loss: 1.22368, acc: 0.51378


INFO:root:[0] Ed: 214400, train_loss: 1.22366, acc: 0.51379


[INFO 2025-02-16 04:04:01,765] [0] Ed: 214400, train_loss: 1.22366, acc: 0.51379
[INFO 2025-02-16 04:04:01,765] [0] Ed: 214400, train_loss: 1.22366, acc: 0.51379


INFO:root:[0] Ed: 217600, train_loss: 1.22366, acc: 0.51368


[INFO 2025-02-16 04:04:07,324] [0] Ed: 217600, train_loss: 1.22366, acc: 0.51368
[INFO 2025-02-16 04:04:07,324] [0] Ed: 217600, train_loss: 1.22366, acc: 0.51368


INFO:root:[0] Ed: 220800, train_loss: 1.22344, acc: 0.51375


[INFO 2025-02-16 04:04:12,883] [0] Ed: 220800, train_loss: 1.22344, acc: 0.51375
[INFO 2025-02-16 04:04:12,883] [0] Ed: 220800, train_loss: 1.22344, acc: 0.51375


INFO:root:[0] Ed: 224000, train_loss: 1.22293, acc: 0.51390


[INFO 2025-02-16 04:04:18,440] [0] Ed: 224000, train_loss: 1.22293, acc: 0.51390
[INFO 2025-02-16 04:04:18,440] [0] Ed: 224000, train_loss: 1.22293, acc: 0.51390


INFO:root:[0] Ed: 227200, train_loss: 1.22260, acc: 0.51404


[INFO 2025-02-16 04:04:23,999] [0] Ed: 227200, train_loss: 1.22260, acc: 0.51404
[INFO 2025-02-16 04:04:23,999] [0] Ed: 227200, train_loss: 1.22260, acc: 0.51404


INFO:root:[0] Ed: 230400, train_loss: 1.22235, acc: 0.51417


[INFO 2025-02-16 04:04:29,553] [0] Ed: 230400, train_loss: 1.22235, acc: 0.51417
[INFO 2025-02-16 04:04:29,553] [0] Ed: 230400, train_loss: 1.22235, acc: 0.51417


INFO:root:[0] Ed: 233600, train_loss: 1.22172, acc: 0.51449


[INFO 2025-02-16 04:04:35,107] [0] Ed: 233600, train_loss: 1.22172, acc: 0.51449
[INFO 2025-02-16 04:04:35,107] [0] Ed: 233600, train_loss: 1.22172, acc: 0.51449


INFO:root:Training finish.


[INFO 2025-02-16 04:04:39,725] Training finish.
[INFO 2025-02-16 04:04:39,725] Training finish.


INFO:root:Model saved to /content/model/epoch-5.pt.


[INFO 2025-02-16 04:04:39,847] Model saved to /content/model/epoch-5.pt.
[INFO 2025-02-16 04:04:39,847] Model saved to /content/model/epoch-5.pt.


In [None]:
def test(rank, args):
    is_distributed = False

    torch.cuda.set_device(rank)

    if args.load_ckpt_name is not None:
        ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)

    assert ckpt_path is not None, 'No checkpoint found.'
    checkpoint = torch.load(ckpt_path, map_location='cpu')

    subcategory_dict = checkpoint['subcategory_dict']
    category_dict = checkpoint['category_dict']
    word_dict = checkpoint['word_dict']

    dummy_embedding_matrix = np.zeros((len(word_dict) + 1, args.word_embedding_dim))
    model = Model(args, dummy_embedding_matrix, len(category_dict), len(subcategory_dict))
    model.load_state_dict(checkpoint['model_state_dict'])
    logging.info(f"Model loaded from {ckpt_path}")

    if args.enable_gpu:
        model.cuda(rank)

    model.eval()
    torch.set_grad_enabled(False)

    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args.test_abstract_dir, args, mode='train')
    news_title, news_category, news_subcategory, news_abstract = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory, news_abstract] if x is not None], axis=-1)

    news_dataset = NewsDataset(news_combined)
    news_dataloader = DataLoader(news_dataset,
                                 batch_size=args.batch_size,
                                 num_workers=4)

    news_scoring = []
    with torch.no_grad():
        for input_ids in tqdm(news_dataloader):
            input_ids = input_ids.cuda(rank)
            news_vec = model.news_encoder(input_ids)
            news_vec = news_vec.to(torch.device("cpu")).detach().numpy()
            news_scoring.extend(news_vec)

    news_scoring = np.array(news_scoring)
    logging.info("news scoring num: {}".format(news_scoring.shape[0]))

    if rank == 0:
        doc_sim = 0
        for _ in tqdm(range(1000000)):
            i = random.randrange(1, len(news_scoring))
            j = random.randrange(1, len(news_scoring))
            if i != j:
                doc_sim += np.dot(news_scoring[i], news_scoring[j]) / (np.linalg.norm(news_scoring[i]) * np.linalg.norm(news_scoring[j]))
        logging.info(f'News doc-sim: {doc_sim / 1000000}')

    data_file_path = os.path.join(args.test_data_dir, f'behaviors_{rank}.tsv')

    def collate_fn(tuple_list):
        log_vecs = torch.FloatTensor([x[0] for x in tuple_list])
        log_mask = torch.FloatTensor([x[1] for x in tuple_list])
        news_vecs = [x[2] for x in tuple_list]
        labels = [x[3] for x in tuple_list]
        return (log_vecs, log_mask, news_vecs, labels)

    dataset = DatasetTest(data_file_path, news_index, news_scoring, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn)

    AUC = []
    MRR = []
    nDCG5 = []
    nDCG10 = []

    def print_metrics(rank, cnt, x):
        logging.info("[{}] {} samples: {}".format(rank, cnt, '\t'.join(["{:0.2f}".format(i * 100) for i in x])))

    def get_mean(arr):
        return [np.array(i).mean() for i in arr]

    def get_sum(arr):
        return [np.array(i).sum() for i in arr]

    local_sample_num = 0

    for cnt, (log_vecs, log_mask, news_vecs, labels) in enumerate(dataloader):
        local_sample_num += log_vecs.shape[0]

        if args.enable_gpu:
            log_vecs = log_vecs.cuda(rank, non_blocking=True)
            log_mask = log_mask.cuda(rank, non_blocking=True)

        user_vecs = model.user_encoder(log_vecs, log_mask).to(torch.device("cpu")).detach().numpy()

        for user_vec, news_vec, label in zip(user_vecs, news_vecs, labels):
            if label.mean() == 0 or label.mean() == 1:
                continue

            score = np.dot(news_vec, user_vec)

            auc = roc_auc_score(label, score)
            mrr = mrr_score(label, score)
            ndcg5 = ndcg_score(label, score, k=5)
            ndcg10 = ndcg_score(label, score, k=10)

            AUC.append(auc)
            MRR.append(mrr)
            nDCG5.append(ndcg5)
            nDCG10.append(ndcg10)

        if cnt % args.log_steps == 0:
            print_metrics(rank, local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))

    logging.info('[{}] local_sample_num: {}'.format(rank, local_sample_num))
    if is_distributed:
        local_sample_num = torch.tensor(local_sample_num).cuda(rank)
        dist.reduce(local_sample_num, dst=0, op=dist.ReduceOp.SUM)
        local_metrics_sum = torch.FloatTensor(get_sum([AUC, MRR, nDCG5, nDCG10])).cuda(rank)
        dist.reduce(local_metrics_sum, dst=0, op=dist.ReduceOp.SUM)
        if rank == 0:
            print_metrics('*', local_sample_num, local_metrics_sum / local_sample_num)
    else:
        print_metrics('*', local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))


In [None]:
args.mode = 'test'
args.user_log_mask=True
args.batch_size=128
args.load_ckpt_name= 'epoch-5.pt'
args.prepare=True
if 'test' in args.mode:
        if args.prepare:
            logging.info('Preparing testing data...')
            total_sample_num = prepare_testing_data(args.test_data_dir, args.nGPU)
        else:
            total_sample_num = 0
            for i in range(args.nGPU):
                data_file_path = os.path.join(args.test_data_dir, f'behaviors_{i}.tsv')
                if not os.path.exists(data_file_path):
                    logging.error(f'Splited testing data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                    exit()
                result = subprocess.getoutput(f'wc -l {data_file_path}')
                total_sample_num += int(result.split(' ')[0])
            logging.info('Skip testing data preparation.')
        logging.info(f'{total_sample_num} testing samples in total.')

        test(0, args)

INFO:root:Preparing testing data...


[INFO 2025-02-16 04:04:39,964] Preparing testing data...
[INFO 2025-02-16 04:04:39,964] Preparing testing data...


73152it [00:01, 55099.19it/s]
INFO:root:Writing files...


[INFO 2025-02-16 04:04:41,514] Writing files...
[INFO 2025-02-16 04:04:41,514] Writing files...


INFO:root:73152 testing samples in total.


[INFO 2025-02-16 04:04:42,929] 73152 testing samples in total.
[INFO 2025-02-16 04:04:42,929] 73152 testing samples in total.


  checkpoint = torch.load(ckpt_path, map_location='cpu')
INFO:root:Model loaded from /content/model/epoch-5.pt


[INFO 2025-02-16 04:04:42,967] Model loaded from /content/model/epoch-5.pt
[INFO 2025-02-16 04:04:42,967] Model loaded from /content/model/epoch-5.pt


51282it [00:04, 11935.00it/s]
100%|██████████| 51282/51282 [00:00<00:00, 184251.70it/s]
100%|██████████| 401/401 [00:01<00:00, 313.91it/s]
INFO:root:news scoring num: 51283


[INFO 2025-02-16 04:04:49,207] news scoring num: 51283
[INFO 2025-02-16 04:04:49,207] news scoring num: 51283


100%|██████████| 1000000/1000000 [00:09<00:00, 107058.99it/s]
INFO:root:News doc-sim: 0.12817811946745758


[INFO 2025-02-16 04:04:58,555] News doc-sim: 0.12817811946745758
[INFO 2025-02-16 04:04:58,555] News doc-sim: 0.12817811946745758


  log_vecs = torch.FloatTensor([x[0] for x in tuple_list])
INFO:root:[0] 128 samples: 61.40	28.85	29.55	38.26


[INFO 2025-02-16 04:04:59,230] [0] 128 samples: 61.40	28.85	29.55	38.26
[INFO 2025-02-16 04:04:59,230] [0] 128 samples: 61.40	28.85	29.55	38.26


INFO:root:[0] 12928 samples: 63.58	30.21	32.97	39.13


[INFO 2025-02-16 04:06:10,877] [0] 12928 samples: 63.58	30.21	32.97	39.13
[INFO 2025-02-16 04:06:10,877] [0] 12928 samples: 63.58	30.21	32.97	39.13


INFO:root:[0] 25728 samples: 63.57	30.42	33.22	39.27


[INFO 2025-02-16 04:07:22,000] [0] 25728 samples: 63.57	30.42	33.22	39.27
[INFO 2025-02-16 04:07:22,000] [0] 25728 samples: 63.57	30.42	33.22	39.27


INFO:root:[0] 38528 samples: 63.51	30.30	33.09	39.20


[INFO 2025-02-16 04:08:33,108] [0] 38528 samples: 63.51	30.30	33.09	39.20
[INFO 2025-02-16 04:08:33,108] [0] 38528 samples: 63.51	30.30	33.09	39.20


INFO:root:[0] 51328 samples: 63.38	30.15	32.86	39.02


[INFO 2025-02-16 04:09:44,650] [0] 51328 samples: 63.38	30.15	32.86	39.02
[INFO 2025-02-16 04:09:44,650] [0] 51328 samples: 63.38	30.15	32.86	39.02


INFO:root:[0] 64128 samples: 63.37	30.16	32.88	39.06


[INFO 2025-02-16 04:10:55,804] [0] 64128 samples: 63.37	30.16	32.88	39.06
[INFO 2025-02-16 04:10:55,804] [0] 64128 samples: 63.37	30.16	32.88	39.06


INFO:root:[0] local_sample_num: 73152


[INFO 2025-02-16 04:11:46,083] [0] local_sample_num: 73152
[INFO 2025-02-16 04:11:46,083] [0] local_sample_num: 73152


INFO:root:[*] 73152 samples: 63.36	30.19	32.90	39.09


[INFO 2025-02-16 04:11:46,115] [*] 73152 samples: 63.36	30.19	32.90	39.09
[INFO 2025-02-16 04:11:46,115] [*] 73152 samples: 63.36	30.19	32.90	39.09
