In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from collections import Counter
import numpy as np
import random
import logging
import os
import torch.optim as optim
import torch.distributed as dist
from torch.utils.data import DataLoader
from pathlib import Path



In [None]:
from dataclasses import dataclass
from typing import Optional

@dataclass
class Args:
    nGPU: int = 1
    seed: int = 0
    prepare: bool = True
    mode: str = "train"
    train_data_dir: str = "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train"
    test_data_dir: str = "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev"
    train_abstract_dir: str = '/content/genAbs0.json'
    # "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/train_gen_abs.json"
    test_abstract_dir: str = '/content/genAbs0.json'
    # "/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/Dev_gen_abs.json"
    model_dir: str = '/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/LSTUR model'
    batch_size: int = 32
    npratio: int = 4
    enable_gpu: bool = True
    filter_num: int = 3
    log_steps: int = 100
    epochs: int = 5
    lr: float = 0.0003
    num_words_title: int = 20
    num_words_abstract: int = 50
    user_log_length: int = 50
    word_embedding_dim: int = 300
    glove_embedding_path: str = '/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt'
    freeze_embedding: bool = False
    news_dim: int = 400
    news_query_vector_dim: int = 200
    user_query_vector_dim: int = 200
    num_attention_heads: int = 15
    user_log_mask: bool = False
    dropout_probability = 0.2
    save_steps: int = 10000
    start_epoch: int = 0
    load_ckpt_name: Optional[str] = None
    use_category: bool = True
    use_subcategory: bool = True
    use_abstract: bool = True
    use_custom_abstract: bool = True
    category_emb_dim: int = 100
    num_filters = 300
    window_size = 3
    long_short_term_method = 'ini'
    masking_probability = 0.5
    num_users = 1 + 50000
    num_categories = 1 + 274

def parse_args():
  return Args()


**Dataset.py**

In [None]:
                                                                                                                                                                                                                                                                                                              from torch.utils.data import IterableDataset, Dataset
                                                                                                                                                                                                                                                                                                              import numpy as np
                                                                                                                                                                                                                                                                                                              import random
                                                                                                                                                                                                                                                                                                              import json


                                                                                                                                                                                                                                                                                                              class DatasetTrain(IterableDataset):
                                                                                                                                                                                                                                                                                                                  def __init__(self, filename, news_index, news_combined, user2id_path, args):
                                                                                                                                                                                                                                                                                                                      super(DatasetTrain).__init__()
                                                                                                                                                                                                                                                                                                                      self.filename = filename
                                                                                                                                                                                                                                                                                                                      self.news_index = news_index  # Maps news ID -> index
                                                                                                                                                                                                                                                                                                                      self.news_combined = news_combined  # News feature matrix
                                                                                                                                                                                                                                                                                                                      self.args = args

                                                                                                                                                                                                                                                                                                                      # Load user2id mapping from file
                                                                                                                                                                                                                                                                                                                      with open(user2id_path, "r") as f:
                                                                                                                                                                                                                                                                                                                          self.user2id = json.load(f)

                                                                                                                                                                                                                                                                                                                  def trans_to_nindex(self, nids):
                                                                                                                                                                                                                                                                                                                      return [self.news_index[i] if i in self.news_index else 0 for i in nids]

                                                                                                                                                                                                                                                                                                                  def pad_to_fix_len(self, x, fix_length, padding_front=True, padding_value=0):
                                                                                                                                                                                                                                                                                                                      if padding_front:
                                                                                                                                                                                                                                                                                                                          pad_x = [padding_value] * (fix_length - len(x)) + x[-fix_length:]
                                                                                                                                                                                                                                                                                                                          mask = [0] * (fix_length - len(x)) + [1] * min(fix_length, len(x))
                                                                                                                                                                                                                                                                                                                      else:
                                                                                                                                                                                                                                                                                                                          pad_x = x[-fix_length:] + [padding_value] * (fix_length - len(x))
                                                                                                                                                                                                                                                                                                                          mask = [1] * min(fix_length, len(x)) + [0] * (fix_length - len(x))
                                                                                                                                                                                                                                                                                                                      return pad_x, np.array(mask, dtype='float32')

                                                                                                                                                                                                                                                                                                                  def line_mapper(self, line):
                                                                                                                                                                                                                                                                                                                      line = line.strip().split("\t")

                                                                                                                                                                                                                                                                                                                      user_id = line[0]  # First column is user ID
                                                                                                                                                                                                                                                                                                                      click_docs = line[3].split()  # Clicked news IDs
                                                                                                                                                                                                                                                                                                                      sess_pos = line[4].split()  # Positive candidate news IDs
                                                                                                                                                                                                                                                                                                                      sess_neg = line[5].split()  # Negative candidate news IDs

                                                                                                                                                                                                                                                                                                                      # Convert user ID to index, default to "unk" if missing
                                                                                                                                                                                                                                                                                                                      user_index = self.user2id.get(user_id, self.user2id["unk"])

                                                                                                                                                                                                                                                                                                                      # Convert clicked news IDs to indices print(f"user shape: {user.shape}")

                                                                                                                                                                                                                                                                                                                      clicked_news = self.trans_to_nindex(click_docs)
                                                                                                                                                                                                                                                                                                                      clicked_news_length = len(clicked_news)


                                                                                                                                                                                                                                                                                                                      # Pad clicked news to fixed length
                                                                                                                                                                                                                                                                                                                      clicked_docs, _ = self.pad_to_fix_len(clicked_news, self.args.user_log_length)
                                                                                                                                                                                                                                                                                                                      clicked_news_feature = self.news_combined[clicked_docs]

                                                                                                                                                                                                                                                                                                                      # Sample candidate news (positive + negative)
                                                                                                                                                                                                                                                                                                                      pos = self.trans_to_nindex(sess_pos)
                                                                                                                                                                                                                                                                                                                      neg = self.trans_to_nindex(sess_neg)
                                                                                                                                                                                                                                                                                                                      label = random.randint(0, self.args.npratio)
                                                                                                                                                                                                                                                                                                                      sample_news = neg[:label] + pos + neg[label:]
                                                                                                                                                                                                                                                                                                                      # sample_news, _ = self.pad_to_fix_len(self.trans_to_nindex(sample_news), self.args.user_log_length)

                                                                                                                                                                                                                                                                                                                      candidate_news_feature = self.news_combined[sample_news]

                                                                                                                                                                                                                                                                                                                      return user_index, clicked_news_length, candidate_news_feature, clicked_news_feature, label

                                                                                                                                                                                                                                                                                                                  def __iter__(self):
                                                                                                                                                                                                                                                                                                                      file_iter = open(self.filename)
                                                                                                                                                                                                                                                                                                                      return map(self.line_mapper, file_iter)


                                                                                                                                                                                                                                                                                                              class DatasetTest(DatasetTrain):
                                                                                                                                                                                                                                                                                                                  def __init__(self, filename, news_index, news_scoring, user2id_path, args):
                                                                                                                                                                                                                                                                                                                      super(DatasetTrain).__init__()
                                                                                                                                                                                                                                                                                                                      self.filename = filename
                                                                                                                                                                                                                                                                                                                      self.news_index = news_index
                                                                                                                                                                                                                                                                                                                      self.news_scoring = news_scoring
                                                                                                                                                                                                                                                                                                                      self.args = args

                                                                                                                                                                                                                                                                                                                      # Load user2id mapping from file
                                                                                                                                                                                                                                                                                                                      with open(user2id_path, "r") as f:
                                                                                                                                                                                                                                                                                                                          self.user2id = json.load(f)

                                                                                                                                                                                                                                                                                                                  def line_mapper(self, line):
                                                                                                                                                                                                                                                                                                                      line = line.strip().split("\t")

                                                                                                                                                                                                                                                                                                                      user_id = line[0]
                                                                                                                                                                                                                                                                                                                      click_docs = line[3].split()

                                                                                                                                                                                                                                                                                                                      user_index = self.user2id.get(user_id, self.user2id["unk"])

                                                                                                                                                                                                                                                                                                                      clicked_news = self.trans_to_nindex(click_docs)
                                                                                                                                                                                                                                                                                                                      clicked_news_length = len(clicked_news)

                                                                                                                                                                                                                                                                                                                      clicked_docs, _ = self.pad_to_fix_len(clicked_news, self.args.user_log_length)
                                                                                                                                                                                                                                                                                                                      clicked_news_indices = self.trans_to_nindex(click_docs)  # Convert news IDs to indices
                                                                                                                                                                                                                                                                                                                      clicked_news_feature = self.news_scoring[clicked_docs]



                                                                                                                                                                                                                                                                                                                      candidate_news = self.trans_to_nindex([i.split('-')[0] for i in line[4].split()])
                                                                                                                                                                                                                                                                                                                      label = np.array([int(i.split('-')[1]) for i in line[4].split()])
                                                                                                                                                                                                                                                                                                                      candidate_news_feature  = self.news_scoring[candidate_news]

                                                                                                                                                                                                                                                                                                                      return user_index, clicked_news_length, candidate_news_feature, clicked_news_feature, label
                                                                                                                                                                                                                                                                                                                  def __iter__(self):
                                                                                                                                                                                                                                                                                                                      file_iter = open(self.filename)
                                                                                                                                                                                                                                                                                                                      return map(self.line_mapper, file_iter)


                                                                                                                                                                                                                                                                                                              class NewsDataset(Dataset):
                                                                                                                                                                                                                                                                                                                  def __init__(self, data):
                                                                                                                                                                                                                                                                                                                      self.data = data

                                                                                                                                                                                                                                                                                                                  def __getitem__(self, idx):
                                                                                                                                                                                                                                                                                                                      return self.data[idx]

                                                                                                                                                                                                                                                                                                                  def __len__(self):
                                                                                                                                                                                                                                                                                                                      return self.data.shape[0]


**Metric.py**

In [None]:
from sklearn.metrics import roc_auc_score
import numpy as np


def dcg_score(y_true, y_score, k=10):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2**y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)


def ndcg_score(y_true, y_score, k=10):
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best


def mrr_score(y_true, y_score):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)


def ctr_score(y_true, y_score, k=1):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    return np.mean(y_true)

def acc(y_true, y_hat):
    y_hat = torch.argmax(y_hat, dim=-1)
    tot = y_true.shape[0]
    hit = torch.sum(y_true == y_hat)
    return hit.data.float() * 1.0 / tot



**Ultis.py**

In [None]:
import logging
import argparse
import sys

def setuplogger():
    root = logging.getLogger()
    root.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter("[%(levelname)s %(asctime)s] %(message)s")
    handler.setFormatter(formatter)
    root.addHandler(handler)


def dump_args(args):
    for arg in dir(args):
        if not arg.startswith("_"):
            logging.info(f"args[{arg}]={getattr(args, arg)}")

def load_matrix(embedding_file_path, word_dict, word_embedding_dim):
    embedding_matrix = np.zeros(shape=(len(word_dict) + 1, word_embedding_dim))
    have_word = []
    if embedding_file_path is not None:
        with open(embedding_file_path, 'rb') as f:
            while True:
                line = f.readline()
                if len(line) == 0:
                    break
                line = line.split()
                word = line[0].decode()
                if word in word_dict:
                    index = word_dict[word]
                    tp = [float(x) for x in line[1:]]
                    embedding_matrix[index] = np.array(tp)
                    have_word.append(word)
    return embedding_matrix, have_word


def get_checkpoint(directory, ckpt_name):
    ckpt_path = os.path.join(directory, ckpt_name)
    if os.path.exists(ckpt_path):
        return ckpt_path
    else:
        return None


**preprocess.py**

In [None]:
from collections import Counter
from tqdm import tqdm
import numpy as np
from nltk.tokenize import word_tokenize
import json


def update_dict(dict, key, value=None):
    if key not in dict:
        if value is None:
            dict[key] = len(dict) + 1
        else:
            dict[key] = value


def read_custom_abstract(news_file, custom_abstract_dict):
    news = {}
    news_index = {}
    category_dict = {}
    subcategory_dict = {}
    word_cnt = {}

    with open(news_file, 'r', encoding='utf-8') as f:
        for line in f:
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, entity_title, entity_abstract = splited
            if doc_id in custom_abstract_dict:
                abstract = custom_abstract_dict[doc_id]
            news[doc_id] = [title.split(' '), category, subcategory, abstract.split(' ')]
            news_index[doc_id] = len(news_index) + 1
            for word in title.split(' '):
                if word not in word_cnt:
                    word_cnt[word] = 0
                word_cnt[word] += 1
            for word in abstract.split(' '):
                if word not in word_cnt:
                    word_cnt[word] = 0
                word_cnt[word] += 1
            if category not in category_dict:
                category_dict[category] = len(category_dict) + 1
            if subcategory not in subcategory_dict:
                subcategory_dict[subcategory] = len(subcategory_dict) + 1

    return news, news_index, category_dict, subcategory_dict, word_cnt

def read_news(news_path, abstract_path, args, mode='train'):
    news = {}
    category_dict = {}
    subcategory_dict = {}
    news_index = {}
    word_cnt = Counter()
    if args.use_custom_abstract:
      with open(abstract_path, 'r') as f:
          abs = json.load(f)
    with open(news_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, _, _ = splited
            update_dict(news_index, doc_id)

            title = title.lower()
            title = word_tokenize(title, language='english', preserve_line=True)

            update_dict(news, doc_id, [title, category, subcategory, abs[doc_id] if doc_id in abs else abstract])
            if mode == 'train':
                if args.use_category:
                    update_dict(category_dict, category)
                if args.use_subcategory:
                    update_dict(subcategory_dict, subcategory)
                word_cnt.update(title)

    if mode == 'train':
        word = [k for k, v in word_cnt.items() if v > args.filter_num]
        word_dict = {k: v for k, v in zip(word, range(1, len(word) + 1))}
        return news, news_index, category_dict, subcategory_dict, word_dict
    elif mode == 'test':
        return news, news_index
    else:
        assert False, 'Wrong mode!'


def get_doc_input(news, news_index, category_dict, subcategory_dict, word_dict, args):
    news_num = len(news) + 1
    news_title = np.zeros((news_num, args.num_words_title), dtype='int32')
    news_category = np.zeros((news_num, 1), dtype='int32') if args.use_category else None
    news_subcategory = np.zeros((news_num, 1), dtype='int32') if args.use_subcategory else None
    news_abstract = np.zeros((news_num, args.num_words_abstract), dtype='int32') if args.use_abstract else None

    for key in tqdm(news):
        title, category, subcategory, abstract = news[key]
        doc_index = news_index[key]

        for word_id in range(min(args.num_words_title, len(title))):
            if title[word_id] in word_dict:
                news_title[doc_index, word_id] = word_dict[title[word_id]]

        if args.use_category:
            news_category[doc_index, 0] = category_dict[category] if category in category_dict else 0
        if args.use_subcategory:
            news_subcategory[doc_index, 0] = subcategory_dict[subcategory] if subcategory in subcategory_dict else 0
        if args.use_abstract:
            for word_id in range(min(args.num_words_abstract, len(abstract))):
                if abstract[word_id] in word_dict:
                    news_abstract[doc_index, word_id] = word_dict[abstract[word_id]]

    return news_title, news_category, news_subcategory, news_abstract

**prepare_data.py**

In [None]:
import os
from tqdm import tqdm
import random
import logging


def get_sample(all_elements, num_sample):
    if num_sample > len(all_elements):
        return random.sample(all_elements * (num_sample // len(all_elements) + 1), num_sample)
    else:
        return random.sample(all_elements, num_sample)


def prepare_training_data(train_data_dir, nGPU, npratio, seed):
    random.seed(seed)
    behaviors = []

    behavior_file_path = os.path.join(train_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            iid, uid, time, history, imp = line.strip().split('\t')
            impressions = [x.split('-') for x in imp.split(' ')]
            pos, neg = [], []
            for news_ID, label in impressions:
                if label == '0':
                    neg.append(news_ID)
                elif label == '1':
                    pos.append(news_ID)
            if len(pos) == 0 or len(neg) == 0:
                continue
            for pos_id in pos:
                neg_candidate = get_sample(neg, npratio)
                neg_str = ' '.join(neg_candidate)
                new_line = '\t'.join([iid, uid, time, history, pos_id, neg_str]) + '\n'
                behaviors.append(new_line)

    random.shuffle(behaviors)

    behaviors_per_file = [[] for _ in range(nGPU)]
    for i, line in enumerate(behaviors):
        behaviors_per_file[i % nGPU].append(line)

    logging.info('Writing files...')
    for i in range(nGPU):
        processed_file_path = os.path.join(train_data_dir, f'behaviors_np{npratio}_{i}.tsv')
        with open(processed_file_path, 'w') as f:
            f.writelines(behaviors_per_file[i])

    return len(behaviors)


def prepare_testing_data(test_data_dir, nGPU):
    behaviors = [[] for _ in range(nGPU)]

    behavior_file_path = os.path.join(test_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(tqdm(f)):
            behaviors[i % nGPU].append(line)

    logging.info('Writing files...')
    for i in range(nGPU):
        processed_file_path = os.path.join(test_data_dir, f'behaviors_{i}.tsv')
        with open(processed_file_path, 'w') as f:
            f.writelines(behaviors[i])

    return sum([len(x) for x in behaviors])


In [None]:
def train(rank, args):

    is_distributed = False
    torch.cuda.set_device(rank)


    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args.train_abstract_dir, args, mode='train')

    news_title, news_category, news_subcategory, news_abstract = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory, news_abstract] if x is not None], axis=-1)

    if rank == 0:
        logging.info('Initializing word embedding matrix...')

    embedding_matrix, have_word = load_matrix(args.glove_embedding_path,
                                                    word_dict,
                                                    args.word_embedding_dim)
    if rank == 0:
        logging.info(f'Word dict length: {len(word_dict)}')
        logging.info(f'Have words: {len(have_word)}')
        logging.info(f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}')

    model = Model(args, embedding_matrix, len(category_dict), len(subcategory_dict))

    if args.load_ckpt_name is not None:
        ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
        checkpoint = torch.load(ckpt_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        logging.info(f"Model loaded from {ckpt_path}.")

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if args.enable_gpu:
        model = model.cuda(rank)

    if is_distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

    # if rank == 0:
    #     print(model)
    #     for name, param in model.named_parameters():
    #         print(name, param.requires_grad)

    data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv')

    dataset = DatasetTrain(data_file_path, news_index, news_combined, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size)

    logging.info('Training...')
    for ep in range(args.start_epoch, args.epochs):
        loss = 0.0
        accuary = 0.0
        for cnt, (log_ids, log_mask, input_ids, targets) in enumerate(dataloader):
            if args.enable_gpu:
                log_ids = log_ids.cuda(rank, non_blocking=True)
                log_mask = log_mask.cuda(rank, non_blocking=True)
                input_ids = input_ids.cuda(rank, non_blocking=True)
                targets = targets.cuda(rank, non_blocking=True)

            bz_loss, y_hat = model(log_ids, log_mask, input_ids, targets)
            loss += bz_loss.data.float()
            accuary += acc(targets, y_hat)
            optimizer.zero_grad()
            bz_loss.backward()
            optimizer.step()

            if cnt % args.log_steps == 0:
                logging.info(
                    '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
                        rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)
                )

            if rank == 0 and     cnt != 0 and cnt % args.save_steps == 0:
                ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
                torch.save(
                    {
                        'model_state_dict':
                            {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
                            if is_distributed else model.state_dict(),
                        'category_dict': category_dict,
                        'word_dict': word_dict,
                        'subcategory_dict': subcategory_dict
                    }, ckpt_path)
                logging.info(f"Model saved to {ckpt_path}.")

        logging.info('Training finish.')

        if rank == 0:
            ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
            torch.save(
                {
                    'model_state_dict':
                        {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
                        if is_distributed else model.state_dict(),
                    'category_dict': category_dict,
                    'subcategory_dict': subcategory_dict,
                    'word_dict': word_dict,
                }, ckpt_path)
            logging.info(f"Model saved to {ckpt_path}.")



In [None]:

    import subprocess
    setuplogger()
    args = parse_args()
    dump_args(args)
    random.seed(args.seed)





INFO:root:args[batch_size]=32


[INFO 2025-03-03 08:59:55,655] args[batch_size]=32


INFO:root:args[category_emb_dim]=100


[INFO 2025-03-03 08:59:55,657] args[category_emb_dim]=100


INFO:root:args[dropout_probability]=0.2


[INFO 2025-03-03 08:59:55,658] args[dropout_probability]=0.2


INFO:root:args[enable_gpu]=True


[INFO 2025-03-03 08:59:55,660] args[enable_gpu]=True


INFO:root:args[epochs]=5


[INFO 2025-03-03 08:59:55,662] args[epochs]=5


INFO:root:args[filter_num]=3


[INFO 2025-03-03 08:59:55,663] args[filter_num]=3


INFO:root:args[freeze_embedding]=False


[INFO 2025-03-03 08:59:55,664] args[freeze_embedding]=False


INFO:root:args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt


[INFO 2025-03-03 08:59:55,665] args[glove_embedding_path]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/glove.840B.300d.txt


INFO:root:args[load_ckpt_name]=None


[INFO 2025-03-03 08:59:55,669] args[load_ckpt_name]=None


INFO:root:args[log_steps]=100


[INFO 2025-03-03 08:59:55,670] args[log_steps]=100


INFO:root:args[long_short_term_method]=ini


[INFO 2025-03-03 08:59:55,671] args[long_short_term_method]=ini


INFO:root:args[lr]=0.0003


[INFO 2025-03-03 08:59:55,672] args[lr]=0.0003


INFO:root:args[masking_probability]=0.5


[INFO 2025-03-03 08:59:55,673] args[masking_probability]=0.5


INFO:root:args[mode]=train


[INFO 2025-03-03 08:59:55,673] args[mode]=train


INFO:root:args[model_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/LSTUR model


[INFO 2025-03-03 08:59:55,675] args[model_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/LSTUR model


INFO:root:args[nGPU]=1


[INFO 2025-03-03 08:59:55,677] args[nGPU]=1


INFO:root:args[news_dim]=400


[INFO 2025-03-03 08:59:55,678] args[news_dim]=400


INFO:root:args[news_query_vector_dim]=200


[INFO 2025-03-03 08:59:55,678] args[news_query_vector_dim]=200


INFO:root:args[npratio]=4


[INFO 2025-03-03 08:59:55,680] args[npratio]=4


INFO:root:args[num_attention_heads]=15


[INFO 2025-03-03 08:59:55,682] args[num_attention_heads]=15


INFO:root:args[num_categories]=275


[INFO 2025-03-03 08:59:55,683] args[num_categories]=275


INFO:root:args[num_filters]=300


[INFO 2025-03-03 08:59:55,683] args[num_filters]=300


INFO:root:args[num_users]=50001


[INFO 2025-03-03 08:59:55,686] args[num_users]=50001


INFO:root:args[num_words_abstract]=50


[INFO 2025-03-03 08:59:55,687] args[num_words_abstract]=50


INFO:root:args[num_words_title]=20


[INFO 2025-03-03 08:59:55,687] args[num_words_title]=20


INFO:root:args[prepare]=True


[INFO 2025-03-03 08:59:55,690] args[prepare]=True


INFO:root:args[save_steps]=10000


[INFO 2025-03-03 08:59:55,690] args[save_steps]=10000


INFO:root:args[seed]=0


[INFO 2025-03-03 08:59:55,691] args[seed]=0


INFO:root:args[start_epoch]=0


[INFO 2025-03-03 08:59:55,692] args[start_epoch]=0


INFO:root:args[test_abstract_dir]=/content/genAbs0.json


[INFO 2025-03-03 08:59:55,694] args[test_abstract_dir]=/content/genAbs0.json


INFO:root:args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev


[INFO 2025-03-03 08:59:55,695] args[test_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_dev


INFO:root:args[train_abstract_dir]=/content/genAbs0.json


[INFO 2025-03-03 08:59:55,695] args[train_abstract_dir]=/content/genAbs0.json


INFO:root:args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train


[INFO 2025-03-03 08:59:55,697] args[train_data_dir]=/content/drive/MyDrive/Colab Notebooks/NewsRecommendation/data/MINDsmall_train


INFO:root:args[use_abstract]=True


[INFO 2025-03-03 08:59:55,698] args[use_abstract]=True


INFO:root:args[use_category]=True


[INFO 2025-03-03 08:59:55,700] args[use_category]=True


INFO:root:args[use_custom_abstract]=True


[INFO 2025-03-03 08:59:55,701] args[use_custom_abstract]=True


INFO:root:args[use_subcategory]=True


[INFO 2025-03-03 08:59:55,702] args[use_subcategory]=True


INFO:root:args[user_log_length]=50


[INFO 2025-03-03 08:59:55,704] args[user_log_length]=50


INFO:root:args[user_log_mask]=False


[INFO 2025-03-03 08:59:55,705] args[user_log_mask]=False


INFO:root:args[user_query_vector_dim]=200


[INFO 2025-03-03 08:59:55,708] args[user_query_vector_dim]=200


INFO:root:args[window_size]=3


[INFO 2025-03-03 08:59:55,708] args[window_size]=3


INFO:root:args[word_embedding_dim]=300


[INFO 2025-03-03 08:59:55,711] args[word_embedding_dim]=300


# **NRMS**

In [None]:
import torch
import torch.nn as nn
class DotProductClickPredictor(torch.nn.Module):
    def __init__(self):
        super(DotProductClickPredictor, self).__init__()

    def forward(self, candidate_news_vector, user_vector):
        """
        Args:
            candidate_news_vector: batch_size, candidate_size, X
            user_vector: batch_size, X
        Returns:
            (shape): batch_size
        """
        # batch_size, candidate_size
        probability = torch.bmm(candidate_news_vector,
                                user_vector.unsqueeze(dim=-1)).squeeze(dim=-1)
        return probability
class AdditiveAttention(torch.nn.Module):
    """
    A general additive attention module.
    Originally for NAML.
    """
    def __init__(self,
                 query_vector_dim,
                 candidate_vector_dim,
                 writer=None,
                 tag=None,
                 names=None):
        super(AdditiveAttention, self).__init__()
        self.linear = nn.Linear(candidate_vector_dim, query_vector_dim)
        self.attention_query_vector = nn.Parameter(
            torch.empty(query_vector_dim).uniform_(-0.1, 0.1))
        # For tensorboard
        self.writer = writer
        self.tag = tag
        self.names = names
        self.local_step = 1

    def forward(self, candidate_vector):
        """
        Args:
            candidate_vector: batch_size, candidate_size, candidate_vector_dim
        Returns:
            (shape) batch_size, candidate_vector_dim
        """
        # batch_size, candidate_size, query_vector_dim
        temp = torch.tanh(self.linear(candidate_vector))
        # batch_size, candidate_size
        candidate_weights = F.softmax(torch.matmul(
            temp, self.attention_query_vector),
                                      dim=1)
        if self.writer is not None:
            assert candidate_weights.size(1) == len(self.names)
            if self.local_step % 10 == 0:
                self.writer.add_scalars(
                    self.tag, {
                        x: y
                        for x, y in zip(self.names,
                                        candidate_weights.mean(dim=0))
                    }, self.local_step)
            self.local_step += 1
        # batch_size, candidate_vector_dim
        target = torch.bmm(candidate_weights.unsqueeze(dim=1),
                           candidate_vector).squeeze(dim=1)
        return target

class ScaledDotProductAttention(nn.Module):
    def __init__(self, d_k):
        super(ScaledDotProductAttention, self).__init__()
        self.d_k = d_k

    def forward(self, Q, K, V, attn_mask=None):
        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(self.d_k)
        scores = torch.exp(scores)
        if attn_mask is not None:
            scores = scores * attn_mask
        attn = scores / (torch.sum(scores, dim=-1, keepdim=True) + 1e-8)

        context = torch.matmul(attn, V)
        return context, attn



class MultiHeadSelfAttention(nn.Module):
    def __init__(self, d_model, num_attention_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.d_model = d_model
        self.num_attention_heads = num_attention_heads
        assert d_model % num_attention_heads == 0
        self.d_k = d_model // num_attention_heads
        self.d_v = d_model // num_attention_heads

        self.W_Q = nn.Linear(d_model, d_model)
        self.W_K = nn.Linear(d_model, d_model)
        self.W_V = nn.Linear(d_model, d_model)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight, gain=1)

    def forward(self, Q, K=None, V=None, length=None):
        if K is None:
            K = Q
        if V is None:
            V = Q
        batch_size = Q.size(0)

        q_s = self.W_Q(Q).view(batch_size, -1, self.num_attention_heads,
                               self.d_k).transpose(1, 2)
        k_s = self.W_K(K).view(batch_size, -1, self.num_attention_heads,
                               self.d_k).transpose(1, 2)
        v_s = self.W_V(V).view(batch_size, -1, self.num_attention_heads,
                               self.d_v).transpose(1, 2)

        if length is not None:
            maxlen = Q.size(1)
            attn_mask = torch.arange(maxlen).expand(
                batch_size, maxlen) < length.view(-1, 1)
            attn_mask = attn_mask.unsqueeze(1).expand(batch_size, maxlen,
                                                      maxlen)
            attn_mask = attn_mask.unsqueeze(1).repeat(1,
                                                      self.num_attention_heads,
                                                      1, 1)
        else:
            attn_mask = None

        context, attn = ScaledDotProductAttention(self.d_k)(q_s, k_s, v_s,
                                                            attn_mask)
        context = context.transpose(1, 2).contiguous().view(
            batch_size, -1, self.num_attention_heads * self.d_v)
        return context

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class NewsEncoder(torch.nn.Module):
    def __init__(self, config, word_embedding):
        super(NewsEncoder, self).__init__()
        self.config = config
        self.word_embedding = word_embedding
        self.category_embedding = nn.Embedding(config.num_categories,
                                               config.num_filters,
                                               padding_idx=0)
        assert config.window_size >= 1 and config.window_size % 2 == 1

        self.title_CNN = nn.Conv2d(
            1,
            config.num_filters,
            (config.window_size, config.word_embedding_dim),
            padding=(int((config.window_size - 1) / 2), 0))
        self.title_attention = AdditiveAttention(config.news_query_vector_dim,
                                                 config.num_filters)

    def forward(self, news):
        """
        Args:
            news:
                {
                    "category": batch_size,
                    "subcategory": batch_size,
                    "title": batch_size * num_words_title
                }
        Returns:
            (shape) batch_size, num_filters * 3
        """
        #extract title, cat, subcat
        start = self.config.num_words_title
        title = torch.narrow(news, -1, 0, start).long()
        title_vector = F.dropout(self.word_embedding(title.to(device)),
                                 p=self.config.dropout_probability,
                                 training=self.training)
        convoluted_title_vector = self.title_CNN(title_vector.unsqueeze(dim=1)).squeeze(dim=3)
        activated_title_vector = F.dropout(F.relu(convoluted_title_vector),
                                           p=self.config.dropout_probability,
                                           training=self.training)
        weighted_title_vector = self.title_attention(activated_title_vector.transpose(1, 2))

        if self.config.use_category:
            category = torch.narrow(news, -1, start, 1).squeeze(dim=-1).long()
            category_vector = self.category_embedding(category.to(device))
            start +=1
        if self.config.use_subcategory:
            subcategory = torch.narrow(news, -1, start, 1).squeeze(dim=-1).long()
            subcategory_vector = self.category_embedding(subcategory.to(device))
            start += 1
        news_vector = torch.cat(
            [weighted_title_vector, category_vector, subcategory_vector],
            dim=1)

        return news_vector

class UserEncoder(torch.nn.Module):
    def __init__(self, config):
        super(UserEncoder, self).__init__()
        self.config = config
        assert int(config.num_filters * 1.5) == config.num_filters * 1.5
        self.gru = nn.GRU(
            config.num_filters * 3,
            config.num_filters * 3 if config.long_short_term_method == 'ini'
            else int(config.num_filters * 1.5))

    def forward(self, user, clicked_news_length, clicked_news_vector):
        """
        Args:
            user:
                ini: batch_size, num_filters * 3
                con: batch_size, num_filters * 1.5
            clicked_news_length: batch_size,
            clicked_news_vector: batch_size, num_clicked_news_a_user, num_filters * 3
        Returns:
            (shape) batch_size, num_filters * 3
        """
        clicked_news_length = clicked_news_length.cpu().type(torch.int64)
        clicked_news_length = torch.clamp(clicked_news_length, max=clicked_news_vector.shape[1])

        clicked_news_length[clicked_news_length == 0] = 1
        # 1, batch_size, num_filters * 3
        if self.config.long_short_term_method == 'ini':
            packed_clicked_news_vector = pack_padded_sequence(
                clicked_news_vector,
                clicked_news_length,
                batch_first=True,
                enforce_sorted=False)
            _, last_hidden = self.gru(packed_clicked_news_vector,
                                      user.unsqueeze(dim=0))
            return last_hidden.squeeze(dim=0)
        else:
            packed_clicked_news_vector = pack_padded_sequence(
                clicked_news_vector,
                clicked_news_length,
                batch_first=True,
                enforce_sorted=False)
            _, last_hidden = self.gru(packed_clicked_news_vector)
            return torch.cat((last_hidden.squeeze(dim=0), user), dim=1)

class LSTUR(torch.nn.Module):
    """
    LSTUR network.
    Input 1 + K candidate news and a list of user clicked news, produce the click probability.
    """
    def __init__(self, config, embedding_matrix):
        """
        # ini
        user embedding: num_filters * 3
        news encoder: num_filters * 3
        GRU:
        input: num_filters * 3
        hidden: num_filters * 3

        # con
        user embedding: num_filter * 1.5
        news encoder: num_filters * 3
        GRU:
        input: num_fitlers * 3
        hidden: num_filter * 1.5
        """
        super(LSTUR, self).__init__()
        self.config = config
        word_embedding = torch.from_numpy(embedding_matrix).float()
        pretrained_word_embedding = nn.Embedding.from_pretrained(word_embedding,
                                                      freeze=args.freeze_embedding,
                                                      padding_idx=0)
        self.news_encoder = NewsEncoder(config, pretrained_word_embedding)
        self.user_encoder = UserEncoder(config)
        self.click_predictor = DotProductClickPredictor()
        assert int(config.num_filters * 1.5) == config.num_filters * 1.5
        self.user_embedding = nn.Embedding(
            config.num_users,
            config.num_filters * 3 if config.long_short_term_method == 'ini'
            else int(config.num_filters * 1.5),
            padding_idx=0)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, user, clicked_news_length, candidate_news, clicked_news, label):
        """
        Args:
            user: batch_size,
            clicked_news_length: batch_size,
            candidate_news:
                [
                    {
                        "category": batch_size,
                        "subcategory": batch_size,
                        "title": batch_size * num_words_title
                    } * (1 + K)
                ]
            clicked_news:
                [
                    {
                        "category": batch_size,
                        "subcategory": batch_size,
                        "title": batch_size * num_words_title
                    } * num_clicked_news_a_user
                ]
        Returns:
            click_probability: batch_size
        """
        # batch_size, 1 + K, num_filters * 3
        candidate_news_vector = torch.stack(
            [self.news_encoder(x) for x in candidate_news])
        # ini: batch_size, num_filters * 3
        # con: batch_size, num_filters * 1.5
        # TODO what if not drop
        user = F.dropout2d(self.user_embedding(
            user.to(device)).unsqueeze(dim=0),
                           p=self.config.masking_probability,
                           training=self.training).squeeze(dim=0)
        # batch_size, num_clicked_news_a_user, num_filters * 3
        clicked_news_vector = torch.stack(
            [self.news_encoder(x) for x in clicked_news])
        # batch_size, num_filters * 3
        user_vector = self.user_encoder(user, clicked_news_length,
                                        clicked_news_vector)
        # batch_size, 1 + K
        click_probability = self.click_predictor(candidate_news_vector,
                                                 user_vector)
        loss = self.loss_fn(click_probability, label)
        return loss, click_probability

    def get_news_vector(self, news):
        # batch_size, num_filters * 3
        return self.news_encoder(news)

    def get_user_vector(self, user, clicked_news_length, clicked_news_vector):
        """
        Args:
            user: batch_size
            clicked_news_length: batch_size
            clicked_news_vector: batch_size, num_clicked_news_a_user, num_filters * 3
        Returns:
            (shape) batch_size, num_filters * 3
        """
        # ini: batch_size, num_filters * 3
        # con: batch_size, num_filters * 1.5
        user = self.user_embedding(user.to(device))
        # batch_size, num_filters * 3
        return self.user_encoder(user, clicked_news_length,
                                 clicked_news_vector)

    def get_prediction(self, news_vector, user_vector):
        """
        Args:
            news_vector: candidate_size, word_embedding_dim
            user_vector: word_embedding_dim
        Returns:
            click_probability: candidate_size
        """
        # candidate_size
        return self.click_predictor(
            news_vector.unsqueeze(dim=0),
            user_vector.unsqueeze(dim=0)).squeeze(dim=0)

In [None]:
args.mode = 'train'

In [None]:
if 'train' in args.mode:
    if args.prepare:
        logging.info('Preparing training data...')
        total_sample_num = prepare_training_data(args.train_data_dir, args.nGPU, args.npratio, args.seed)
    else:
        total_sample_num = 0
        for i in range(args.nGPU):
            data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{i}.tsv')
            print(data_file_path)
            if not os.path.exists(data_file_path):
                logging.error(f'Splited training data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                exit()
            result = subprocess.getoutput(f'wc -l {data_file_path}')
            total_sample_num += int(result.split(' ')[0])
        logging.info('Skip training data preparation.')
    logging.info(f'{total_sample_num} training samples, {total_sample_num // args.batch_size // args.nGPU} batches in total.')



INFO:root:Preparing training data...


[INFO 2025-02-27 14:26:46,636] Preparing training data...


156965it [00:03, 45324.45it/s]
INFO:root:Writing files...


[INFO 2025-02-27 14:26:51,185] Writing files...


INFO:root:236344 training samples, 7385 batches in total.


[INFO 2025-02-27 14:26:54,035] 236344 training samples, 7385 batches in total.


In [None]:
rank = 0
news, news_index, category_dict, subcategory_dict, word_dict = read_news(
		os.path.join(args.train_data_dir, 'news.tsv'), args.train_abstract_dir, args, mode='train')

news_title, news_category, news_subcategory, news_abstract = get_doc_input(
    news, news_index, category_dict, subcategory_dict, word_dict, args)
news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory] if x is not None], axis=-1)

if rank == 0:
    logging.info('Initializing word embedding matrix...')

embedding_matrix, have_word = load_matrix(args.glove_embedding_path,
                                                word_dict,
                                                args.word_embedding_dim)
if rank == 0:
    logging.info(f'Word dict length: {len(word_dict)}')
    logging.info(f'Have words: {len(have_word)}')
    logging.info(f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}')

51282it [00:05, 10057.30it/s]
100%|██████████| 51282/51282 [00:00<00:00, 191133.35it/s]
INFO:root:Initializing word embedding matrix...


[INFO 2025-02-27 14:28:00,897] Initializing word embedding matrix...


INFO:root:Word dict length: 12519


[INFO 2025-02-27 14:29:32,785] Word dict length: 12519


INFO:root:Have words: 11960


[INFO 2025-02-27 14:29:32,786] Have words: 11960


INFO:root:Missing rate: 0.0446521287642783


[INFO 2025-02-27 14:29:32,787] Missing rate: 0.0446521287642783


In [None]:
import torch.optim as optim

model = LSTUR(args, embedding_matrix)
is_distributed = False
'''
if args.load_ckpt_name is not None:
	ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
	checkpoint = torch.load(ckpt_path, map_location='cpu')
	model.load_state_dict(checkpoint['model_state_dict'])
	logging.info(f"Model loaded from {ckpt_path}.")
'''
optimizer = optim.Adam(model.parameters(), lr=args.lr)

if args.enable_gpu:
	model = model.cuda(rank)

if is_distributed:
	model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv')

dataset = DatasetTrain(data_file_path, news_index, news_combined, "/content/user2id.json", args)

dataloader = DataLoader(dataset, batch_size=args.batch_size)

logging.info('Training...')
for ep in range(args.start_epoch, args.epochs):
	loss = 0.0
	accuary = 0.0
	for cnt, (user_index, clicked_news_length, candidate_news_feature, clicked_news_feature, label) in enumerate(dataloader):
		if args.enable_gpu:
			user_index = user_index.cuda(rank, non_blocking=True)
			clicked_news_length = clicked_news_length.cuda(rank, non_blocking=True)
			candidate_news_feature = candidate_news_feature.cuda(rank, non_blocking=True)
			clicked_news_feature = clicked_news_feature.cuda(rank, non_blocking=True)
			label = label.cuda(rank, non_blocking=True)

		bz_loss, y_hat = model(user_index, clicked_news_length, candidate_news_feature, clicked_news_feature, label)
		loss += bz_loss.data.float()
		accuary += acc(label, y_hat)
		optimizer.zero_grad()
		bz_loss.backward()
		optimizer.step()

		if cnt % args.log_steps == 0:
			logging.info(
				'[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
					rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)
			)

		if rank == 0 and cnt != 0 and cnt % args.save_steps == 0:
			ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
			torch.save(
				{
					'model_state_dict':
						{'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
						if is_distributed else model.state_dict(),
					'category_dict': category_dict,
					'word_dict': word_dict,
					'subcategory_dict': subcategory_dict
				}, ckpt_path)
			logging.info(f"Model saved to {ckpt_path}.")

	logging.info('Training finish.')

	if rank == 0:
		ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
		torch.save(
			{
				'model_state_dict':
					{'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
					if is_distributed else model.state_dict(),
				'category_dict': category_dict,
				'subcategory_dict': subcategory_dict,
				'word_dict': word_dict,
			}, ckpt_path)
		logging.info(f"Model saved to {ckpt_path}.")



INFO:root:Training...


[INFO 2025-02-27 14:29:37,832] Training...


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-27 14:29:39,471] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 6.71425, acc: 0.25031


[INFO 2025-02-27 14:29:50,504] [0] Ed: 3200, train_loss: 6.71425, acc: 0.25031


INFO:root:[0] Ed: 6400, train_loss: 6.29955, acc: 0.25187


[INFO 2025-02-27 14:30:01,543] [0] Ed: 6400, train_loss: 6.29955, acc: 0.25187


INFO:root:[0] Ed: 9600, train_loss: 6.05799, acc: 0.25740


[INFO 2025-02-27 14:30:12,565] [0] Ed: 9600, train_loss: 6.05799, acc: 0.25740


INFO:root:[0] Ed: 12800, train_loss: 5.78803, acc: 0.25992


[INFO 2025-02-27 14:30:23,608] [0] Ed: 12800, train_loss: 5.78803, acc: 0.25992


INFO:root:[0] Ed: 16000, train_loss: 5.57674, acc: 0.26269


[INFO 2025-02-27 14:30:34,623] [0] Ed: 16000, train_loss: 5.57674, acc: 0.26269


INFO:root:[0] Ed: 19200, train_loss: 5.32837, acc: 0.26630


[INFO 2025-02-27 14:30:45,683] [0] Ed: 19200, train_loss: 5.32837, acc: 0.26630


INFO:root:[0] Ed: 22400, train_loss: 5.08878, acc: 0.26942


[INFO 2025-02-27 14:30:56,727] [0] Ed: 22400, train_loss: 5.08878, acc: 0.26942


INFO:root:[0] Ed: 25600, train_loss: 4.87446, acc: 0.27246


[INFO 2025-02-27 14:31:07,771] [0] Ed: 25600, train_loss: 4.87446, acc: 0.27246


INFO:root:[0] Ed: 28800, train_loss: 4.67855, acc: 0.27451


[INFO 2025-02-27 14:31:18,795] [0] Ed: 28800, train_loss: 4.67855, acc: 0.27451


INFO:root:[0] Ed: 32000, train_loss: 4.51674, acc: 0.27697


[INFO 2025-02-27 14:31:29,728] [0] Ed: 32000, train_loss: 4.51674, acc: 0.27697


INFO:root:[0] Ed: 35200, train_loss: 4.37696, acc: 0.27878


[INFO 2025-02-27 14:31:40,674] [0] Ed: 35200, train_loss: 4.37696, acc: 0.27878


INFO:root:[0] Ed: 38400, train_loss: 4.25099, acc: 0.28128


[INFO 2025-02-27 14:31:51,616] [0] Ed: 38400, train_loss: 4.25099, acc: 0.28128


INFO:root:[0] Ed: 41600, train_loss: 4.14697, acc: 0.28212


[INFO 2025-02-27 14:32:02,577] [0] Ed: 41600, train_loss: 4.14697, acc: 0.28212


INFO:root:[0] Ed: 44800, train_loss: 4.05125, acc: 0.28366


[INFO 2025-02-27 14:32:13,522] [0] Ed: 44800, train_loss: 4.05125, acc: 0.28366


INFO:root:[0] Ed: 48000, train_loss: 3.96082, acc: 0.28527


[INFO 2025-02-27 14:32:24,472] [0] Ed: 48000, train_loss: 3.96082, acc: 0.28527


INFO:root:[0] Ed: 51200, train_loss: 3.87234, acc: 0.28762


[INFO 2025-02-27 14:32:35,426] [0] Ed: 51200, train_loss: 3.87234, acc: 0.28762


INFO:root:[0] Ed: 54400, train_loss: 3.80022, acc: 0.28991


[INFO 2025-02-27 14:32:46,679] [0] Ed: 54400, train_loss: 3.80022, acc: 0.28991


INFO:root:[0] Ed: 57600, train_loss: 3.73102, acc: 0.29193


[INFO 2025-02-27 14:32:57,669] [0] Ed: 57600, train_loss: 3.73102, acc: 0.29193


INFO:root:[0] Ed: 60800, train_loss: 3.67012, acc: 0.29225


[INFO 2025-02-27 14:33:08,571] [0] Ed: 60800, train_loss: 3.67012, acc: 0.29225


INFO:root:[0] Ed: 64000, train_loss: 3.60761, acc: 0.29369


[INFO 2025-02-27 14:33:19,445] [0] Ed: 64000, train_loss: 3.60761, acc: 0.29369


INFO:root:[0] Ed: 67200, train_loss: 3.54786, acc: 0.29586


[INFO 2025-02-27 14:33:30,360] [0] Ed: 67200, train_loss: 3.54786, acc: 0.29586


INFO:root:[0] Ed: 70400, train_loss: 3.49425, acc: 0.29697


[INFO 2025-02-27 14:33:41,274] [0] Ed: 70400, train_loss: 3.49425, acc: 0.29697


INFO:root:[0] Ed: 73600, train_loss: 3.44590, acc: 0.29810


[INFO 2025-02-27 14:33:52,207] [0] Ed: 73600, train_loss: 3.44590, acc: 0.29810


INFO:root:[0] Ed: 76800, train_loss: 3.39785, acc: 0.29954


[INFO 2025-02-27 14:34:03,069] [0] Ed: 76800, train_loss: 3.39785, acc: 0.29954


INFO:root:[0] Ed: 80000, train_loss: 3.35868, acc: 0.30034


[INFO 2025-02-27 14:34:13,983] [0] Ed: 80000, train_loss: 3.35868, acc: 0.30034


INFO:root:[0] Ed: 83200, train_loss: 3.31536, acc: 0.30166


[INFO 2025-02-27 14:34:24,947] [0] Ed: 83200, train_loss: 3.31536, acc: 0.30166


INFO:root:[0] Ed: 86400, train_loss: 3.27755, acc: 0.30352


[INFO 2025-02-27 14:34:35,818] [0] Ed: 86400, train_loss: 3.27755, acc: 0.30352


INFO:root:[0] Ed: 89600, train_loss: 3.24799, acc: 0.30463


[INFO 2025-02-27 14:34:46,728] [0] Ed: 89600, train_loss: 3.24799, acc: 0.30463


INFO:root:[0] Ed: 92800, train_loss: 3.21677, acc: 0.30568


[INFO 2025-02-27 14:34:57,628] [0] Ed: 92800, train_loss: 3.21677, acc: 0.30568


INFO:root:[0] Ed: 96000, train_loss: 3.19067, acc: 0.30623


[INFO 2025-02-27 14:35:08,569] [0] Ed: 96000, train_loss: 3.19067, acc: 0.30623


INFO:root:[0] Ed: 99200, train_loss: 3.16494, acc: 0.30711


[INFO 2025-02-27 14:35:19,486] [0] Ed: 99200, train_loss: 3.16494, acc: 0.30711


INFO:root:[0] Ed: 102400, train_loss: 3.13631, acc: 0.30851


[INFO 2025-02-27 14:35:30,418] [0] Ed: 102400, train_loss: 3.13631, acc: 0.30851


INFO:root:[0] Ed: 105600, train_loss: 3.11069, acc: 0.30987


[INFO 2025-02-27 14:35:41,313] [0] Ed: 105600, train_loss: 3.11069, acc: 0.30987


INFO:root:[0] Ed: 108800, train_loss: 3.08862, acc: 0.31028


[INFO 2025-02-27 14:35:52,311] [0] Ed: 108800, train_loss: 3.08862, acc: 0.31028


INFO:root:[0] Ed: 112000, train_loss: 3.06599, acc: 0.31134


[INFO 2025-02-27 14:36:03,234] [0] Ed: 112000, train_loss: 3.06599, acc: 0.31134


INFO:root:[0] Ed: 115200, train_loss: 3.04373, acc: 0.31219


[INFO 2025-02-27 14:36:14,136] [0] Ed: 115200, train_loss: 3.04373, acc: 0.31219


INFO:root:[0] Ed: 118400, train_loss: 3.02331, acc: 0.31316


[INFO 2025-02-27 14:36:25,100] [0] Ed: 118400, train_loss: 3.02331, acc: 0.31316


INFO:root:[0] Ed: 121600, train_loss: 3.00473, acc: 0.31403


[INFO 2025-02-27 14:36:36,049] [0] Ed: 121600, train_loss: 3.00473, acc: 0.31403


INFO:root:[0] Ed: 124800, train_loss: 2.98616, acc: 0.31512


[INFO 2025-02-27 14:36:47,174] [0] Ed: 124800, train_loss: 2.98616, acc: 0.31512


INFO:root:[0] Ed: 128000, train_loss: 2.96940, acc: 0.31563


[INFO 2025-02-27 14:36:58,102] [0] Ed: 128000, train_loss: 2.96940, acc: 0.31563


INFO:root:[0] Ed: 131200, train_loss: 2.95239, acc: 0.31636


[INFO 2025-02-27 14:37:09,036] [0] Ed: 131200, train_loss: 2.95239, acc: 0.31636


INFO:root:[0] Ed: 134400, train_loss: 2.93699, acc: 0.31700


[INFO 2025-02-27 14:37:19,942] [0] Ed: 134400, train_loss: 2.93699, acc: 0.31700


INFO:root:[0] Ed: 137600, train_loss: 2.92243, acc: 0.31799


[INFO 2025-02-27 14:37:30,890] [0] Ed: 137600, train_loss: 2.92243, acc: 0.31799


INFO:root:[0] Ed: 140800, train_loss: 2.90642, acc: 0.31881


[INFO 2025-02-27 14:37:41,790] [0] Ed: 140800, train_loss: 2.90642, acc: 0.31881


INFO:root:[0] Ed: 144000, train_loss: 2.89044, acc: 0.31955


[INFO 2025-02-27 14:37:52,710] [0] Ed: 144000, train_loss: 2.89044, acc: 0.31955


INFO:root:[0] Ed: 147200, train_loss: 2.87661, acc: 0.32024


[INFO 2025-02-27 14:38:03,600] [0] Ed: 147200, train_loss: 2.87661, acc: 0.32024


INFO:root:[0] Ed: 150400, train_loss: 2.86494, acc: 0.32083


[INFO 2025-02-27 14:38:14,547] [0] Ed: 150400, train_loss: 2.86494, acc: 0.32083


INFO:root:[0] Ed: 153600, train_loss: 2.85229, acc: 0.32128


[INFO 2025-02-27 14:38:25,441] [0] Ed: 153600, train_loss: 2.85229, acc: 0.32128


INFO:root:[0] Ed: 156800, train_loss: 2.83856, acc: 0.32191


[INFO 2025-02-27 14:38:36,336] [0] Ed: 156800, train_loss: 2.83856, acc: 0.32191


INFO:root:[0] Ed: 160000, train_loss: 2.82611, acc: 0.32245


[INFO 2025-02-27 14:38:47,278] [0] Ed: 160000, train_loss: 2.82611, acc: 0.32245


INFO:root:[0] Ed: 163200, train_loss: 2.81388, acc: 0.32305


[INFO 2025-02-27 14:38:58,227] [0] Ed: 163200, train_loss: 2.81388, acc: 0.32305


INFO:root:[0] Ed: 166400, train_loss: 2.80476, acc: 0.32342


[INFO 2025-02-27 14:39:09,116] [0] Ed: 166400, train_loss: 2.80476, acc: 0.32342


INFO:root:[0] Ed: 169600, train_loss: 2.79504, acc: 0.32386


[INFO 2025-02-27 14:39:20,052] [0] Ed: 169600, train_loss: 2.79504, acc: 0.32386


INFO:root:[0] Ed: 172800, train_loss: 2.78534, acc: 0.32428


[INFO 2025-02-27 14:39:31,011] [0] Ed: 172800, train_loss: 2.78534, acc: 0.32428


INFO:root:[0] Ed: 176000, train_loss: 2.77449, acc: 0.32500


[INFO 2025-02-27 14:39:41,876] [0] Ed: 176000, train_loss: 2.77449, acc: 0.32500


INFO:root:[0] Ed: 179200, train_loss: 2.76404, acc: 0.32551


[INFO 2025-02-27 14:39:52,879] [0] Ed: 179200, train_loss: 2.76404, acc: 0.32551


INFO:root:[0] Ed: 182400, train_loss: 2.75301, acc: 0.32634


[INFO 2025-02-27 14:40:03,807] [0] Ed: 182400, train_loss: 2.75301, acc: 0.32634


INFO:root:[0] Ed: 185600, train_loss: 2.74231, acc: 0.32697


[INFO 2025-02-27 14:40:14,718] [0] Ed: 185600, train_loss: 2.74231, acc: 0.32697


INFO:root:[0] Ed: 188800, train_loss: 2.73118, acc: 0.32781


[INFO 2025-02-27 14:40:25,718] [0] Ed: 188800, train_loss: 2.73118, acc: 0.32781


INFO:root:[0] Ed: 192000, train_loss: 2.72218, acc: 0.32809


[INFO 2025-02-27 14:40:36,706] [0] Ed: 192000, train_loss: 2.72218, acc: 0.32809


INFO:root:[0] Ed: 195200, train_loss: 2.71246, acc: 0.32852


[INFO 2025-02-27 14:40:47,931] [0] Ed: 195200, train_loss: 2.71246, acc: 0.32852


INFO:root:[0] Ed: 198400, train_loss: 2.70352, acc: 0.32882


[INFO 2025-02-27 14:40:58,914] [0] Ed: 198400, train_loss: 2.70352, acc: 0.32882


INFO:root:[0] Ed: 201600, train_loss: 2.69425, acc: 0.32921


[INFO 2025-02-27 14:41:09,947] [0] Ed: 201600, train_loss: 2.69425, acc: 0.32921


INFO:root:[0] Ed: 204800, train_loss: 2.68600, acc: 0.32970


[INFO 2025-02-27 14:41:20,929] [0] Ed: 204800, train_loss: 2.68600, acc: 0.32970


INFO:root:[0] Ed: 208000, train_loss: 2.67658, acc: 0.33013


[INFO 2025-02-27 14:41:31,924] [0] Ed: 208000, train_loss: 2.67658, acc: 0.33013


INFO:root:[0] Ed: 211200, train_loss: 2.66772, acc: 0.33040


[INFO 2025-02-27 14:41:42,851] [0] Ed: 211200, train_loss: 2.66772, acc: 0.33040


INFO:root:[0] Ed: 214400, train_loss: 2.65952, acc: 0.33089


[INFO 2025-02-27 14:41:53,818] [0] Ed: 214400, train_loss: 2.65952, acc: 0.33089


INFO:root:[0] Ed: 217600, train_loss: 2.65110, acc: 0.33124


[INFO 2025-02-27 14:42:04,784] [0] Ed: 217600, train_loss: 2.65110, acc: 0.33124


INFO:root:[0] Ed: 220800, train_loss: 2.64297, acc: 0.33180


[INFO 2025-02-27 14:42:15,712] [0] Ed: 220800, train_loss: 2.64297, acc: 0.33180


INFO:root:[0] Ed: 224000, train_loss: 2.63518, acc: 0.33220


[INFO 2025-02-27 14:42:26,603] [0] Ed: 224000, train_loss: 2.63518, acc: 0.33220


INFO:root:[0] Ed: 227200, train_loss: 2.62725, acc: 0.33262


[INFO 2025-02-27 14:42:37,558] [0] Ed: 227200, train_loss: 2.62725, acc: 0.33262


INFO:root:[0] Ed: 230400, train_loss: 2.61891, acc: 0.33309


[INFO 2025-02-27 14:42:48,423] [0] Ed: 230400, train_loss: 2.61891, acc: 0.33309


INFO:root:[0] Ed: 233600, train_loss: 2.61138, acc: 0.33369


[INFO 2025-02-27 14:42:59,301] [0] Ed: 233600, train_loss: 2.61138, acc: 0.33369


INFO:root:Training finish.


[INFO 2025-02-27 14:43:08,518] Training finish.


INFO:root:Model saved to /content/model/epoch-1.pt.


[INFO 2025-02-27 14:43:08,934] Model saved to /content/model/epoch-1.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-27 14:43:09,073] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 2.00506, acc: 0.35781


[INFO 2025-02-27 14:43:19,982] [0] Ed: 3200, train_loss: 2.00506, acc: 0.35781


INFO:root:[0] Ed: 6400, train_loss: 1.98687, acc: 0.36141


[INFO 2025-02-27 14:43:30,940] [0] Ed: 6400, train_loss: 1.98687, acc: 0.36141


INFO:root:[0] Ed: 9600, train_loss: 2.01743, acc: 0.36021


[INFO 2025-02-27 14:43:41,844] [0] Ed: 9600, train_loss: 2.01743, acc: 0.36021


INFO:root:[0] Ed: 12800, train_loss: 2.01405, acc: 0.35875


[INFO 2025-02-27 14:43:52,777] [0] Ed: 12800, train_loss: 2.01405, acc: 0.35875


INFO:root:[0] Ed: 16000, train_loss: 2.01834, acc: 0.35963


[INFO 2025-02-27 14:44:03,703] [0] Ed: 16000, train_loss: 2.01834, acc: 0.35963


INFO:root:[0] Ed: 19200, train_loss: 2.00968, acc: 0.36182


[INFO 2025-02-27 14:44:14,676] [0] Ed: 19200, train_loss: 2.00968, acc: 0.36182


INFO:root:[0] Ed: 22400, train_loss: 1.99527, acc: 0.36330


[INFO 2025-02-27 14:44:25,691] [0] Ed: 22400, train_loss: 1.99527, acc: 0.36330


INFO:root:[0] Ed: 25600, train_loss: 1.99016, acc: 0.36250


[INFO 2025-02-27 14:44:36,656] [0] Ed: 25600, train_loss: 1.99016, acc: 0.36250


INFO:root:[0] Ed: 28800, train_loss: 1.98183, acc: 0.36326


[INFO 2025-02-27 14:44:47,627] [0] Ed: 28800, train_loss: 1.98183, acc: 0.36326


INFO:root:[0] Ed: 32000, train_loss: 1.98063, acc: 0.36366


[INFO 2025-02-27 14:44:58,788] [0] Ed: 32000, train_loss: 1.98063, acc: 0.36366


INFO:root:[0] Ed: 35200, train_loss: 1.97932, acc: 0.36264


[INFO 2025-02-27 14:45:09,710] [0] Ed: 35200, train_loss: 1.97932, acc: 0.36264


INFO:root:[0] Ed: 38400, train_loss: 1.97394, acc: 0.36276


[INFO 2025-02-27 14:45:20,646] [0] Ed: 38400, train_loss: 1.97394, acc: 0.36276


INFO:root:[0] Ed: 41600, train_loss: 1.97310, acc: 0.36329


[INFO 2025-02-27 14:45:31,584] [0] Ed: 41600, train_loss: 1.97310, acc: 0.36329


INFO:root:[0] Ed: 44800, train_loss: 1.97163, acc: 0.36431


[INFO 2025-02-27 14:45:42,512] [0] Ed: 44800, train_loss: 1.97163, acc: 0.36431


INFO:root:[0] Ed: 48000, train_loss: 1.96785, acc: 0.36427


[INFO 2025-02-27 14:45:53,462] [0] Ed: 48000, train_loss: 1.96785, acc: 0.36427


INFO:root:[0] Ed: 51200, train_loss: 1.96170, acc: 0.36445


[INFO 2025-02-27 14:46:04,412] [0] Ed: 51200, train_loss: 1.96170, acc: 0.36445


INFO:root:[0] Ed: 54400, train_loss: 1.95567, acc: 0.36599


[INFO 2025-02-27 14:46:15,376] [0] Ed: 54400, train_loss: 1.95567, acc: 0.36599


INFO:root:[0] Ed: 57600, train_loss: 1.95053, acc: 0.36712


[INFO 2025-02-27 14:46:26,289] [0] Ed: 57600, train_loss: 1.95053, acc: 0.36712


INFO:root:[0] Ed: 60800, train_loss: 1.94690, acc: 0.36727


[INFO 2025-02-27 14:46:37,235] [0] Ed: 60800, train_loss: 1.94690, acc: 0.36727


INFO:root:[0] Ed: 64000, train_loss: 1.94007, acc: 0.36777


[INFO 2025-02-27 14:46:48,173] [0] Ed: 64000, train_loss: 1.94007, acc: 0.36777


INFO:root:[0] Ed: 67200, train_loss: 1.93623, acc: 0.36908


[INFO 2025-02-27 14:46:59,162] [0] Ed: 67200, train_loss: 1.93623, acc: 0.36908


INFO:root:[0] Ed: 70400, train_loss: 1.93232, acc: 0.36905


[INFO 2025-02-27 14:47:10,067] [0] Ed: 70400, train_loss: 1.93232, acc: 0.36905


INFO:root:[0] Ed: 73600, train_loss: 1.92779, acc: 0.36931


[INFO 2025-02-27 14:47:20,930] [0] Ed: 73600, train_loss: 1.92779, acc: 0.36931


INFO:root:[0] Ed: 76800, train_loss: 1.92254, acc: 0.37007


[INFO 2025-02-27 14:47:31,832] [0] Ed: 76800, train_loss: 1.92254, acc: 0.37007


INFO:root:[0] Ed: 80000, train_loss: 1.91663, acc: 0.37032


[INFO 2025-02-27 14:47:42,751] [0] Ed: 80000, train_loss: 1.91663, acc: 0.37032


INFO:root:[0] Ed: 83200, train_loss: 1.90980, acc: 0.37050


[INFO 2025-02-27 14:47:53,739] [0] Ed: 83200, train_loss: 1.90980, acc: 0.37050


INFO:root:[0] Ed: 86400, train_loss: 1.90587, acc: 0.37112


[INFO 2025-02-27 14:48:04,678] [0] Ed: 86400, train_loss: 1.90587, acc: 0.37112


INFO:root:[0] Ed: 89600, train_loss: 1.90167, acc: 0.37135


[INFO 2025-02-27 14:48:15,609] [0] Ed: 89600, train_loss: 1.90167, acc: 0.37135


INFO:root:[0] Ed: 92800, train_loss: 1.89481, acc: 0.37163


[INFO 2025-02-27 14:48:26,517] [0] Ed: 92800, train_loss: 1.89481, acc: 0.37163


INFO:root:[0] Ed: 96000, train_loss: 1.89140, acc: 0.37192


[INFO 2025-02-27 14:48:37,455] [0] Ed: 96000, train_loss: 1.89140, acc: 0.37192


INFO:root:[0] Ed: 99200, train_loss: 1.88571, acc: 0.37178


[INFO 2025-02-27 14:48:48,415] [0] Ed: 99200, train_loss: 1.88571, acc: 0.37178


INFO:root:[0] Ed: 102400, train_loss: 1.88048, acc: 0.37215


[INFO 2025-02-27 14:48:59,646] [0] Ed: 102400, train_loss: 1.88048, acc: 0.37215


INFO:root:[0] Ed: 105600, train_loss: 1.87507, acc: 0.37260


[INFO 2025-02-27 14:49:10,654] [0] Ed: 105600, train_loss: 1.87507, acc: 0.37260


INFO:root:[0] Ed: 108800, train_loss: 1.86953, acc: 0.37255


[INFO 2025-02-27 14:49:21,651] [0] Ed: 108800, train_loss: 1.86953, acc: 0.37255


INFO:root:[0] Ed: 112000, train_loss: 1.86339, acc: 0.37305


[INFO 2025-02-27 14:49:32,626] [0] Ed: 112000, train_loss: 1.86339, acc: 0.37305


INFO:root:[0] Ed: 115200, train_loss: 1.85801, acc: 0.37333


[INFO 2025-02-27 14:49:43,483] [0] Ed: 115200, train_loss: 1.85801, acc: 0.37333


INFO:root:[0] Ed: 118400, train_loss: 1.85249, acc: 0.37385


[INFO 2025-02-27 14:49:54,451] [0] Ed: 118400, train_loss: 1.85249, acc: 0.37385


INFO:root:[0] Ed: 121600, train_loss: 1.84770, acc: 0.37419


[INFO 2025-02-27 14:50:05,425] [0] Ed: 121600, train_loss: 1.84770, acc: 0.37419


INFO:root:[0] Ed: 124800, train_loss: 1.84263, acc: 0.37468


[INFO 2025-02-27 14:50:16,390] [0] Ed: 124800, train_loss: 1.84263, acc: 0.37468


INFO:root:[0] Ed: 128000, train_loss: 1.83806, acc: 0.37471


[INFO 2025-02-27 14:50:27,304] [0] Ed: 128000, train_loss: 1.83806, acc: 0.37471


INFO:root:[0] Ed: 131200, train_loss: 1.83337, acc: 0.37504


[INFO 2025-02-27 14:50:38,241] [0] Ed: 131200, train_loss: 1.83337, acc: 0.37504


INFO:root:[0] Ed: 134400, train_loss: 1.82842, acc: 0.37533


[INFO 2025-02-27 14:50:49,203] [0] Ed: 134400, train_loss: 1.82842, acc: 0.37533


INFO:root:[0] Ed: 137600, train_loss: 1.82337, acc: 0.37600


[INFO 2025-02-27 14:51:00,163] [0] Ed: 137600, train_loss: 1.82337, acc: 0.37600


INFO:root:[0] Ed: 140800, train_loss: 1.81879, acc: 0.37633


[INFO 2025-02-27 14:51:11,043] [0] Ed: 140800, train_loss: 1.81879, acc: 0.37633


INFO:root:[0] Ed: 144000, train_loss: 1.81345, acc: 0.37677


[INFO 2025-02-27 14:51:21,921] [0] Ed: 144000, train_loss: 1.81345, acc: 0.37677


INFO:root:[0] Ed: 147200, train_loss: 1.80902, acc: 0.37713


[INFO 2025-02-27 14:51:32,813] [0] Ed: 147200, train_loss: 1.80902, acc: 0.37713


INFO:root:[0] Ed: 150400, train_loss: 1.80461, acc: 0.37733


[INFO 2025-02-27 14:51:43,778] [0] Ed: 150400, train_loss: 1.80461, acc: 0.37733


INFO:root:[0] Ed: 153600, train_loss: 1.79960, acc: 0.37783


[INFO 2025-02-27 14:51:54,713] [0] Ed: 153600, train_loss: 1.79960, acc: 0.37783


INFO:root:[0] Ed: 156800, train_loss: 1.79551, acc: 0.37821


[INFO 2025-02-27 14:52:05,696] [0] Ed: 156800, train_loss: 1.79551, acc: 0.37821


INFO:root:[0] Ed: 160000, train_loss: 1.79094, acc: 0.37874


[INFO 2025-02-27 14:52:16,661] [0] Ed: 160000, train_loss: 1.79094, acc: 0.37874


INFO:root:[0] Ed: 163200, train_loss: 1.78748, acc: 0.37884


[INFO 2025-02-27 14:52:27,638] [0] Ed: 163200, train_loss: 1.78748, acc: 0.37884


INFO:root:[0] Ed: 166400, train_loss: 1.78396, acc: 0.37879


[INFO 2025-02-27 14:52:38,587] [0] Ed: 166400, train_loss: 1.78396, acc: 0.37879


INFO:root:[0] Ed: 169600, train_loss: 1.78031, acc: 0.37886


[INFO 2025-02-27 14:52:49,526] [0] Ed: 169600, train_loss: 1.78031, acc: 0.37886


INFO:root:[0] Ed: 172800, train_loss: 1.77652, acc: 0.37907


[INFO 2025-02-27 14:53:00,740] [0] Ed: 172800, train_loss: 1.77652, acc: 0.37907


INFO:root:[0] Ed: 176000, train_loss: 1.77277, acc: 0.37949


[INFO 2025-02-27 14:53:11,660] [0] Ed: 176000, train_loss: 1.77277, acc: 0.37949


INFO:root:[0] Ed: 179200, train_loss: 1.76826, acc: 0.37985


[INFO 2025-02-27 14:53:22,600] [0] Ed: 179200, train_loss: 1.76826, acc: 0.37985


INFO:root:[0] Ed: 182400, train_loss: 1.76517, acc: 0.37999


[INFO 2025-02-27 14:53:33,553] [0] Ed: 182400, train_loss: 1.76517, acc: 0.37999


INFO:root:[0] Ed: 185600, train_loss: 1.76100, acc: 0.38044


[INFO 2025-02-27 14:53:44,524] [0] Ed: 185600, train_loss: 1.76100, acc: 0.38044


INFO:root:[0] Ed: 188800, train_loss: 1.75722, acc: 0.38074


[INFO 2025-02-27 14:53:55,508] [0] Ed: 188800, train_loss: 1.75722, acc: 0.38074


INFO:root:[0] Ed: 192000, train_loss: 1.75367, acc: 0.38075


[INFO 2025-02-27 14:54:06,477] [0] Ed: 192000, train_loss: 1.75367, acc: 0.38075


INFO:root:[0] Ed: 195200, train_loss: 1.75079, acc: 0.38082


[INFO 2025-02-27 14:54:17,434] [0] Ed: 195200, train_loss: 1.75079, acc: 0.38082


INFO:root:[0] Ed: 198400, train_loss: 1.74738, acc: 0.38086


[INFO 2025-02-27 14:54:28,384] [0] Ed: 198400, train_loss: 1.74738, acc: 0.38086


INFO:root:[0] Ed: 201600, train_loss: 1.74396, acc: 0.38105


[INFO 2025-02-27 14:54:39,372] [0] Ed: 201600, train_loss: 1.74396, acc: 0.38105


INFO:root:[0] Ed: 204800, train_loss: 1.74099, acc: 0.38116


[INFO 2025-02-27 14:54:50,299] [0] Ed: 204800, train_loss: 1.74099, acc: 0.38116


INFO:root:[0] Ed: 208000, train_loss: 1.73854, acc: 0.38126


[INFO 2025-02-27 14:55:01,254] [0] Ed: 208000, train_loss: 1.73854, acc: 0.38126


INFO:root:[0] Ed: 211200, train_loss: 1.73577, acc: 0.38123


[INFO 2025-02-27 14:55:12,226] [0] Ed: 211200, train_loss: 1.73577, acc: 0.38123


INFO:root:[0] Ed: 214400, train_loss: 1.73342, acc: 0.38132


[INFO 2025-02-27 14:55:23,133] [0] Ed: 214400, train_loss: 1.73342, acc: 0.38132


INFO:root:[0] Ed: 217600, train_loss: 1.73101, acc: 0.38140


[INFO 2025-02-27 14:55:34,085] [0] Ed: 217600, train_loss: 1.73101, acc: 0.38140


INFO:root:[0] Ed: 220800, train_loss: 1.72807, acc: 0.38176


[INFO 2025-02-27 14:55:45,101] [0] Ed: 220800, train_loss: 1.72807, acc: 0.38176


INFO:root:[0] Ed: 224000, train_loss: 1.72526, acc: 0.38194


[INFO 2025-02-27 14:55:56,064] [0] Ed: 224000, train_loss: 1.72526, acc: 0.38194


INFO:root:[0] Ed: 227200, train_loss: 1.72222, acc: 0.38215


[INFO 2025-02-27 14:56:07,029] [0] Ed: 227200, train_loss: 1.72222, acc: 0.38215


INFO:root:[0] Ed: 230400, train_loss: 1.71890, acc: 0.38251


[INFO 2025-02-27 14:56:17,973] [0] Ed: 230400, train_loss: 1.71890, acc: 0.38251


INFO:root:[0] Ed: 233600, train_loss: 1.71566, acc: 0.38289


[INFO 2025-02-27 14:56:28,896] [0] Ed: 233600, train_loss: 1.71566, acc: 0.38289


INFO:root:Training finish.


[INFO 2025-02-27 14:56:38,133] Training finish.


INFO:root:Model saved to /content/model/epoch-2.pt.


[INFO 2025-02-27 14:56:38,532] Model saved to /content/model/epoch-2.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-27 14:56:38,671] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.53367, acc: 0.39375


[INFO 2025-02-27 14:56:49,564] [0] Ed: 3200, train_loss: 1.53367, acc: 0.39375


INFO:root:[0] Ed: 6400, train_loss: 1.52000, acc: 0.39828


[INFO 2025-02-27 14:57:00,770] [0] Ed: 6400, train_loss: 1.52000, acc: 0.39828


INFO:root:[0] Ed: 9600, train_loss: 1.51527, acc: 0.39427


[INFO 2025-02-27 14:57:11,670] [0] Ed: 9600, train_loss: 1.51527, acc: 0.39427


INFO:root:[0] Ed: 12800, train_loss: 1.51738, acc: 0.39602


[INFO 2025-02-27 14:57:22,545] [0] Ed: 12800, train_loss: 1.51738, acc: 0.39602


INFO:root:[0] Ed: 16000, train_loss: 1.51924, acc: 0.39331


[INFO 2025-02-27 14:57:33,482] [0] Ed: 16000, train_loss: 1.51924, acc: 0.39331


INFO:root:[0] Ed: 19200, train_loss: 1.51827, acc: 0.39552


[INFO 2025-02-27 14:57:44,387] [0] Ed: 19200, train_loss: 1.51827, acc: 0.39552


INFO:root:[0] Ed: 22400, train_loss: 1.51442, acc: 0.39585


[INFO 2025-02-27 14:57:55,288] [0] Ed: 22400, train_loss: 1.51442, acc: 0.39585


INFO:root:[0] Ed: 25600, train_loss: 1.51499, acc: 0.39562


[INFO 2025-02-27 14:58:06,217] [0] Ed: 25600, train_loss: 1.51499, acc: 0.39562


INFO:root:[0] Ed: 28800, train_loss: 1.51133, acc: 0.39535


[INFO 2025-02-27 14:58:17,167] [0] Ed: 28800, train_loss: 1.51133, acc: 0.39535


INFO:root:[0] Ed: 32000, train_loss: 1.51018, acc: 0.39484


[INFO 2025-02-27 14:58:28,093] [0] Ed: 32000, train_loss: 1.51018, acc: 0.39484


INFO:root:[0] Ed: 35200, train_loss: 1.51252, acc: 0.39378


[INFO 2025-02-27 14:58:39,023] [0] Ed: 35200, train_loss: 1.51252, acc: 0.39378


INFO:root:[0] Ed: 38400, train_loss: 1.51034, acc: 0.39372


[INFO 2025-02-27 14:58:49,921] [0] Ed: 38400, train_loss: 1.51034, acc: 0.39372


INFO:root:[0] Ed: 41600, train_loss: 1.50873, acc: 0.39526


[INFO 2025-02-27 14:59:00,850] [0] Ed: 41600, train_loss: 1.50873, acc: 0.39526


INFO:root:[0] Ed: 44800, train_loss: 1.50942, acc: 0.39531


[INFO 2025-02-27 14:59:11,762] [0] Ed: 44800, train_loss: 1.50942, acc: 0.39531


INFO:root:[0] Ed: 48000, train_loss: 1.51042, acc: 0.39500


[INFO 2025-02-27 14:59:22,654] [0] Ed: 48000, train_loss: 1.51042, acc: 0.39500


INFO:root:[0] Ed: 51200, train_loss: 1.50979, acc: 0.39529


[INFO 2025-02-27 14:59:33,556] [0] Ed: 51200, train_loss: 1.50979, acc: 0.39529


INFO:root:[0] Ed: 54400, train_loss: 1.50785, acc: 0.39706


[INFO 2025-02-27 14:59:44,481] [0] Ed: 54400, train_loss: 1.50785, acc: 0.39706


INFO:root:[0] Ed: 57600, train_loss: 1.50760, acc: 0.39771


[INFO 2025-02-27 14:59:55,375] [0] Ed: 57600, train_loss: 1.50760, acc: 0.39771


INFO:root:[0] Ed: 60800, train_loss: 1.50697, acc: 0.39758


[INFO 2025-02-27 15:00:06,278] [0] Ed: 60800, train_loss: 1.50697, acc: 0.39758


INFO:root:[0] Ed: 64000, train_loss: 1.50555, acc: 0.39794


[INFO 2025-02-27 15:00:17,212] [0] Ed: 64000, train_loss: 1.50555, acc: 0.39794


INFO:root:[0] Ed: 67200, train_loss: 1.50537, acc: 0.39915


[INFO 2025-02-27 15:00:28,112] [0] Ed: 67200, train_loss: 1.50537, acc: 0.39915


INFO:root:[0] Ed: 70400, train_loss: 1.50551, acc: 0.39879


[INFO 2025-02-27 15:00:39,004] [0] Ed: 70400, train_loss: 1.50551, acc: 0.39879


INFO:root:[0] Ed: 73600, train_loss: 1.50620, acc: 0.39883


[INFO 2025-02-27 15:00:49,903] [0] Ed: 73600, train_loss: 1.50620, acc: 0.39883


INFO:root:[0] Ed: 76800, train_loss: 1.50583, acc: 0.39887


[INFO 2025-02-27 15:01:00,834] [0] Ed: 76800, train_loss: 1.50583, acc: 0.39887


INFO:root:[0] Ed: 80000, train_loss: 1.50511, acc: 0.39936


[INFO 2025-02-27 15:01:12,010] [0] Ed: 80000, train_loss: 1.50511, acc: 0.39936


INFO:root:[0] Ed: 83200, train_loss: 1.50468, acc: 0.39936


[INFO 2025-02-27 15:01:22,954] [0] Ed: 83200, train_loss: 1.50468, acc: 0.39936


INFO:root:[0] Ed: 86400, train_loss: 1.50407, acc: 0.40000


[INFO 2025-02-27 15:01:33,881] [0] Ed: 86400, train_loss: 1.50407, acc: 0.40000


INFO:root:[0] Ed: 89600, train_loss: 1.50310, acc: 0.40049


[INFO 2025-02-27 15:01:44,751] [0] Ed: 89600, train_loss: 1.50310, acc: 0.40049


INFO:root:[0] Ed: 92800, train_loss: 1.50280, acc: 0.40047


[INFO 2025-02-27 15:01:55,688] [0] Ed: 92800, train_loss: 1.50280, acc: 0.40047


INFO:root:[0] Ed: 96000, train_loss: 1.50280, acc: 0.40039


[INFO 2025-02-27 15:02:06,591] [0] Ed: 96000, train_loss: 1.50280, acc: 0.40039


INFO:root:[0] Ed: 99200, train_loss: 1.50326, acc: 0.40037


[INFO 2025-02-27 15:02:17,527] [0] Ed: 99200, train_loss: 1.50326, acc: 0.40037


INFO:root:[0] Ed: 102400, train_loss: 1.50279, acc: 0.40041


[INFO 2025-02-27 15:02:28,453] [0] Ed: 102400, train_loss: 1.50279, acc: 0.40041


INFO:root:[0] Ed: 105600, train_loss: 1.50270, acc: 0.40076


[INFO 2025-02-27 15:02:39,341] [0] Ed: 105600, train_loss: 1.50270, acc: 0.40076


INFO:root:[0] Ed: 108800, train_loss: 1.50286, acc: 0.40102


[INFO 2025-02-27 15:02:50,248] [0] Ed: 108800, train_loss: 1.50286, acc: 0.40102


INFO:root:[0] Ed: 112000, train_loss: 1.50241, acc: 0.40127


[INFO 2025-02-27 15:03:01,117] [0] Ed: 112000, train_loss: 1.50241, acc: 0.40127


INFO:root:[0] Ed: 115200, train_loss: 1.50323, acc: 0.40142


[INFO 2025-02-27 15:03:11,981] [0] Ed: 115200, train_loss: 1.50323, acc: 0.40142


INFO:root:[0] Ed: 118400, train_loss: 1.50295, acc: 0.40162


[INFO 2025-02-27 15:03:22,883] [0] Ed: 118400, train_loss: 1.50295, acc: 0.40162


INFO:root:[0] Ed: 121600, train_loss: 1.50306, acc: 0.40167


[INFO 2025-02-27 15:03:33,791] [0] Ed: 121600, train_loss: 1.50306, acc: 0.40167


INFO:root:[0] Ed: 124800, train_loss: 1.50252, acc: 0.40204


[INFO 2025-02-27 15:03:44,701] [0] Ed: 124800, train_loss: 1.50252, acc: 0.40204


INFO:root:[0] Ed: 128000, train_loss: 1.50260, acc: 0.40215


[INFO 2025-02-27 15:03:55,612] [0] Ed: 128000, train_loss: 1.50260, acc: 0.40215


INFO:root:[0] Ed: 131200, train_loss: 1.50229, acc: 0.40238


[INFO 2025-02-27 15:04:06,541] [0] Ed: 131200, train_loss: 1.50229, acc: 0.40238


INFO:root:[0] Ed: 134400, train_loss: 1.50207, acc: 0.40228


[INFO 2025-02-27 15:04:17,454] [0] Ed: 134400, train_loss: 1.50207, acc: 0.40228


INFO:root:[0] Ed: 137600, train_loss: 1.50181, acc: 0.40258


[INFO 2025-02-27 15:04:28,400] [0] Ed: 137600, train_loss: 1.50181, acc: 0.40258


INFO:root:[0] Ed: 140800, train_loss: 1.50137, acc: 0.40266


[INFO 2025-02-27 15:04:39,303] [0] Ed: 140800, train_loss: 1.50137, acc: 0.40266


INFO:root:[0] Ed: 144000, train_loss: 1.50146, acc: 0.40264


[INFO 2025-02-27 15:04:50,265] [0] Ed: 144000, train_loss: 1.50146, acc: 0.40264


INFO:root:[0] Ed: 147200, train_loss: 1.50071, acc: 0.40281


[INFO 2025-02-27 15:05:01,169] [0] Ed: 147200, train_loss: 1.50071, acc: 0.40281


INFO:root:[0] Ed: 150400, train_loss: 1.50001, acc: 0.40259


[INFO 2025-02-27 15:05:12,052] [0] Ed: 150400, train_loss: 1.50001, acc: 0.40259


INFO:root:[0] Ed: 153600, train_loss: 1.50011, acc: 0.40236


[INFO 2025-02-27 15:05:23,226] [0] Ed: 153600, train_loss: 1.50011, acc: 0.40236


INFO:root:[0] Ed: 156800, train_loss: 1.49922, acc: 0.40281


[INFO 2025-02-27 15:05:34,185] [0] Ed: 156800, train_loss: 1.49922, acc: 0.40281


INFO:root:[0] Ed: 160000, train_loss: 1.49855, acc: 0.40306


[INFO 2025-02-27 15:05:45,057] [0] Ed: 160000, train_loss: 1.49855, acc: 0.40306


INFO:root:[0] Ed: 163200, train_loss: 1.49803, acc: 0.40316


[INFO 2025-02-27 15:05:55,932] [0] Ed: 163200, train_loss: 1.49803, acc: 0.40316


INFO:root:[0] Ed: 166400, train_loss: 1.49764, acc: 0.40318


[INFO 2025-02-27 15:06:06,879] [0] Ed: 166400, train_loss: 1.49764, acc: 0.40318


INFO:root:[0] Ed: 169600, train_loss: 1.49802, acc: 0.40295


[INFO 2025-02-27 15:06:17,833] [0] Ed: 169600, train_loss: 1.49802, acc: 0.40295


INFO:root:[0] Ed: 172800, train_loss: 1.49734, acc: 0.40308


[INFO 2025-02-27 15:06:28,784] [0] Ed: 172800, train_loss: 1.49734, acc: 0.40308


INFO:root:[0] Ed: 176000, train_loss: 1.49685, acc: 0.40331


[INFO 2025-02-27 15:06:39,720] [0] Ed: 176000, train_loss: 1.49685, acc: 0.40331


INFO:root:[0] Ed: 179200, train_loss: 1.49601, acc: 0.40352


[INFO 2025-02-27 15:06:50,602] [0] Ed: 179200, train_loss: 1.49601, acc: 0.40352


INFO:root:[0] Ed: 182400, train_loss: 1.49609, acc: 0.40362


[INFO 2025-02-27 15:07:01,501] [0] Ed: 182400, train_loss: 1.49609, acc: 0.40362


INFO:root:[0] Ed: 185600, train_loss: 1.49513, acc: 0.40384


[INFO 2025-02-27 15:07:12,342] [0] Ed: 185600, train_loss: 1.49513, acc: 0.40384


INFO:root:[0] Ed: 188800, train_loss: 1.49472, acc: 0.40406


[INFO 2025-02-27 15:07:23,191] [0] Ed: 188800, train_loss: 1.49472, acc: 0.40406


INFO:root:[0] Ed: 192000, train_loss: 1.49425, acc: 0.40385


[INFO 2025-02-27 15:07:34,056] [0] Ed: 192000, train_loss: 1.49425, acc: 0.40385


INFO:root:[0] Ed: 195200, train_loss: 1.49407, acc: 0.40384


[INFO 2025-02-27 15:07:44,926] [0] Ed: 195200, train_loss: 1.49407, acc: 0.40384


INFO:root:[0] Ed: 198400, train_loss: 1.49392, acc: 0.40395


[INFO 2025-02-27 15:07:55,817] [0] Ed: 198400, train_loss: 1.49392, acc: 0.40395


INFO:root:[0] Ed: 201600, train_loss: 1.49331, acc: 0.40399


[INFO 2025-02-27 15:08:06,748] [0] Ed: 201600, train_loss: 1.49331, acc: 0.40399


INFO:root:[0] Ed: 204800, train_loss: 1.49333, acc: 0.40399


[INFO 2025-02-27 15:08:17,654] [0] Ed: 204800, train_loss: 1.49333, acc: 0.40399


INFO:root:[0] Ed: 208000, train_loss: 1.49316, acc: 0.40413


[INFO 2025-02-27 15:08:28,569] [0] Ed: 208000, train_loss: 1.49316, acc: 0.40413


INFO:root:[0] Ed: 211200, train_loss: 1.49285, acc: 0.40411


[INFO 2025-02-27 15:08:39,625] [0] Ed: 211200, train_loss: 1.49285, acc: 0.40411


INFO:root:[0] Ed: 214400, train_loss: 1.49241, acc: 0.40415


[INFO 2025-02-27 15:08:50,685] [0] Ed: 214400, train_loss: 1.49241, acc: 0.40415


INFO:root:[0] Ed: 217600, train_loss: 1.49179, acc: 0.40411


[INFO 2025-02-27 15:09:01,705] [0] Ed: 217600, train_loss: 1.49179, acc: 0.40411


INFO:root:[0] Ed: 220800, train_loss: 1.49113, acc: 0.40428


[INFO 2025-02-27 15:09:12,776] [0] Ed: 220800, train_loss: 1.49113, acc: 0.40428


INFO:root:[0] Ed: 224000, train_loss: 1.49052, acc: 0.40454


[INFO 2025-02-27 15:09:24,088] [0] Ed: 224000, train_loss: 1.49052, acc: 0.40454


INFO:root:[0] Ed: 227200, train_loss: 1.48997, acc: 0.40481


[INFO 2025-02-27 15:09:35,212] [0] Ed: 227200, train_loss: 1.48997, acc: 0.40481


INFO:root:[0] Ed: 230400, train_loss: 1.48933, acc: 0.40503


[INFO 2025-02-27 15:09:46,316] [0] Ed: 230400, train_loss: 1.48933, acc: 0.40503


INFO:root:[0] Ed: 233600, train_loss: 1.48859, acc: 0.40533


[INFO 2025-02-27 15:09:57,397] [0] Ed: 233600, train_loss: 1.48859, acc: 0.40533


INFO:root:Training finish.


[INFO 2025-02-27 15:10:06,828] Training finish.


INFO:root:Model saved to /content/model/epoch-3.pt.


[INFO 2025-02-27 15:10:07,227] Model saved to /content/model/epoch-3.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-27 15:10:07,368] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.47047, acc: 0.40219


[INFO 2025-02-27 15:10:18,378] [0] Ed: 3200, train_loss: 1.47047, acc: 0.40219


INFO:root:[0] Ed: 6400, train_loss: 1.44993, acc: 0.40750


[INFO 2025-02-27 15:10:29,419] [0] Ed: 6400, train_loss: 1.44993, acc: 0.40750


INFO:root:[0] Ed: 9600, train_loss: 1.45369, acc: 0.40531


[INFO 2025-02-27 15:10:40,483] [0] Ed: 9600, train_loss: 1.45369, acc: 0.40531


INFO:root:[0] Ed: 12800, train_loss: 1.44761, acc: 0.40766


[INFO 2025-02-27 15:10:51,508] [0] Ed: 12800, train_loss: 1.44761, acc: 0.40766


INFO:root:[0] Ed: 16000, train_loss: 1.45247, acc: 0.40744


[INFO 2025-02-27 15:11:02,582] [0] Ed: 16000, train_loss: 1.45247, acc: 0.40744


INFO:root:[0] Ed: 19200, train_loss: 1.45447, acc: 0.40776


[INFO 2025-02-27 15:11:13,588] [0] Ed: 19200, train_loss: 1.45447, acc: 0.40776


INFO:root:[0] Ed: 22400, train_loss: 1.45554, acc: 0.41049


[INFO 2025-02-27 15:11:24,506] [0] Ed: 22400, train_loss: 1.45554, acc: 0.41049


INFO:root:[0] Ed: 25600, train_loss: 1.45707, acc: 0.41016


[INFO 2025-02-27 15:11:35,447] [0] Ed: 25600, train_loss: 1.45707, acc: 0.41016


INFO:root:[0] Ed: 28800, train_loss: 1.45605, acc: 0.41003


[INFO 2025-02-27 15:11:46,344] [0] Ed: 28800, train_loss: 1.45605, acc: 0.41003


INFO:root:[0] Ed: 32000, train_loss: 1.45611, acc: 0.40966


[INFO 2025-02-27 15:11:57,249] [0] Ed: 32000, train_loss: 1.45611, acc: 0.40966


INFO:root:[0] Ed: 35200, train_loss: 1.45890, acc: 0.40813


[INFO 2025-02-27 15:12:08,153] [0] Ed: 35200, train_loss: 1.45890, acc: 0.40813


INFO:root:[0] Ed: 38400, train_loss: 1.45864, acc: 0.40859


[INFO 2025-02-27 15:12:19,096] [0] Ed: 38400, train_loss: 1.45864, acc: 0.40859


INFO:root:[0] Ed: 41600, train_loss: 1.45889, acc: 0.40954


[INFO 2025-02-27 15:12:30,073] [0] Ed: 41600, train_loss: 1.45889, acc: 0.40954


INFO:root:[0] Ed: 44800, train_loss: 1.46125, acc: 0.40940


[INFO 2025-02-27 15:12:41,039] [0] Ed: 44800, train_loss: 1.46125, acc: 0.40940


INFO:root:[0] Ed: 48000, train_loss: 1.46310, acc: 0.40840


[INFO 2025-02-27 15:12:51,903] [0] Ed: 48000, train_loss: 1.46310, acc: 0.40840


INFO:root:[0] Ed: 51200, train_loss: 1.46198, acc: 0.40809


[INFO 2025-02-27 15:13:02,776] [0] Ed: 51200, train_loss: 1.46198, acc: 0.40809


INFO:root:[0] Ed: 54400, train_loss: 1.46039, acc: 0.40936


[INFO 2025-02-27 15:13:13,625] [0] Ed: 54400, train_loss: 1.46039, acc: 0.40936


INFO:root:[0] Ed: 57600, train_loss: 1.46029, acc: 0.41010


[INFO 2025-02-27 15:13:24,518] [0] Ed: 57600, train_loss: 1.46029, acc: 0.41010


INFO:root:[0] Ed: 60800, train_loss: 1.46057, acc: 0.41028


[INFO 2025-02-27 15:13:35,656] [0] Ed: 60800, train_loss: 1.46057, acc: 0.41028


INFO:root:[0] Ed: 64000, train_loss: 1.45989, acc: 0.41014


[INFO 2025-02-27 15:13:46,586] [0] Ed: 64000, train_loss: 1.45989, acc: 0.41014


INFO:root:[0] Ed: 67200, train_loss: 1.45861, acc: 0.41104


[INFO 2025-02-27 15:13:57,486] [0] Ed: 67200, train_loss: 1.45861, acc: 0.41104


INFO:root:[0] Ed: 70400, train_loss: 1.45870, acc: 0.41104


[INFO 2025-02-27 15:14:08,398] [0] Ed: 70400, train_loss: 1.45870, acc: 0.41104


INFO:root:[0] Ed: 73600, train_loss: 1.45779, acc: 0.41121


[INFO 2025-02-27 15:14:19,267] [0] Ed: 73600, train_loss: 1.45779, acc: 0.41121


INFO:root:[0] Ed: 76800, train_loss: 1.45707, acc: 0.41178


[INFO 2025-02-27 15:14:30,176] [0] Ed: 76800, train_loss: 1.45707, acc: 0.41178


INFO:root:[0] Ed: 80000, train_loss: 1.45648, acc: 0.41200


[INFO 2025-02-27 15:14:41,163] [0] Ed: 80000, train_loss: 1.45648, acc: 0.41200


INFO:root:[0] Ed: 83200, train_loss: 1.45531, acc: 0.41281


[INFO 2025-02-27 15:14:52,206] [0] Ed: 83200, train_loss: 1.45531, acc: 0.41281


INFO:root:[0] Ed: 86400, train_loss: 1.45356, acc: 0.41373


[INFO 2025-02-27 15:15:03,231] [0] Ed: 86400, train_loss: 1.45356, acc: 0.41373


INFO:root:[0] Ed: 89600, train_loss: 1.45340, acc: 0.41379


[INFO 2025-02-27 15:15:14,256] [0] Ed: 89600, train_loss: 1.45340, acc: 0.41379


INFO:root:[0] Ed: 92800, train_loss: 1.45370, acc: 0.41373


[INFO 2025-02-27 15:15:25,242] [0] Ed: 92800, train_loss: 1.45370, acc: 0.41373


INFO:root:[0] Ed: 96000, train_loss: 1.45442, acc: 0.41341


[INFO 2025-02-27 15:15:36,256] [0] Ed: 96000, train_loss: 1.45442, acc: 0.41341


INFO:root:[0] Ed: 99200, train_loss: 1.45408, acc: 0.41331


[INFO 2025-02-27 15:15:47,315] [0] Ed: 99200, train_loss: 1.45408, acc: 0.41331


INFO:root:[0] Ed: 102400, train_loss: 1.45387, acc: 0.41346


[INFO 2025-02-27 15:15:58,370] [0] Ed: 102400, train_loss: 1.45387, acc: 0.41346


INFO:root:[0] Ed: 105600, train_loss: 1.45304, acc: 0.41386


[INFO 2025-02-27 15:16:09,388] [0] Ed: 105600, train_loss: 1.45304, acc: 0.41386


INFO:root:[0] Ed: 108800, train_loss: 1.45262, acc: 0.41409


[INFO 2025-02-27 15:16:20,457] [0] Ed: 108800, train_loss: 1.45262, acc: 0.41409


INFO:root:[0] Ed: 112000, train_loss: 1.45202, acc: 0.41401


[INFO 2025-02-27 15:16:31,506] [0] Ed: 112000, train_loss: 1.45202, acc: 0.41401


INFO:root:[0] Ed: 115200, train_loss: 1.45197, acc: 0.41418


[INFO 2025-02-27 15:16:42,564] [0] Ed: 115200, train_loss: 1.45197, acc: 0.41418


INFO:root:[0] Ed: 118400, train_loss: 1.45069, acc: 0.41450


[INFO 2025-02-27 15:16:53,584] [0] Ed: 118400, train_loss: 1.45069, acc: 0.41450


INFO:root:[0] Ed: 121600, train_loss: 1.45056, acc: 0.41441


[INFO 2025-02-27 15:17:04,661] [0] Ed: 121600, train_loss: 1.45056, acc: 0.41441


INFO:root:[0] Ed: 124800, train_loss: 1.45047, acc: 0.41489


[INFO 2025-02-27 15:17:15,600] [0] Ed: 124800, train_loss: 1.45047, acc: 0.41489


INFO:root:[0] Ed: 128000, train_loss: 1.45052, acc: 0.41512


[INFO 2025-02-27 15:17:26,527] [0] Ed: 128000, train_loss: 1.45052, acc: 0.41512


INFO:root:[0] Ed: 131200, train_loss: 1.45120, acc: 0.41502


[INFO 2025-02-27 15:17:37,497] [0] Ed: 131200, train_loss: 1.45120, acc: 0.41502


INFO:root:[0] Ed: 134400, train_loss: 1.45140, acc: 0.41474


[INFO 2025-02-27 15:17:48,718] [0] Ed: 134400, train_loss: 1.45140, acc: 0.41474


INFO:root:[0] Ed: 137600, train_loss: 1.45102, acc: 0.41499


[INFO 2025-02-27 15:17:59,649] [0] Ed: 137600, train_loss: 1.45102, acc: 0.41499


INFO:root:[0] Ed: 140800, train_loss: 1.45034, acc: 0.41529


[INFO 2025-02-27 15:18:10,570] [0] Ed: 140800, train_loss: 1.45034, acc: 0.41529


INFO:root:[0] Ed: 144000, train_loss: 1.45015, acc: 0.41499


[INFO 2025-02-27 15:18:21,508] [0] Ed: 144000, train_loss: 1.45015, acc: 0.41499


INFO:root:[0] Ed: 147200, train_loss: 1.44986, acc: 0.41522


[INFO 2025-02-27 15:18:32,440] [0] Ed: 147200, train_loss: 1.44986, acc: 0.41522


INFO:root:[0] Ed: 150400, train_loss: 1.44934, acc: 0.41518


[INFO 2025-02-27 15:18:43,398] [0] Ed: 150400, train_loss: 1.44934, acc: 0.41518


INFO:root:[0] Ed: 153600, train_loss: 1.44905, acc: 0.41512


[INFO 2025-02-27 15:18:54,312] [0] Ed: 153600, train_loss: 1.44905, acc: 0.41512


INFO:root:[0] Ed: 156800, train_loss: 1.44865, acc: 0.41520


[INFO 2025-02-27 15:19:05,245] [0] Ed: 156800, train_loss: 1.44865, acc: 0.41520


INFO:root:[0] Ed: 160000, train_loss: 1.44812, acc: 0.41526


[INFO 2025-02-27 15:19:16,159] [0] Ed: 160000, train_loss: 1.44812, acc: 0.41526


INFO:root:[0] Ed: 163200, train_loss: 1.44792, acc: 0.41539


[INFO 2025-02-27 15:19:27,069] [0] Ed: 163200, train_loss: 1.44792, acc: 0.41539


INFO:root:[0] Ed: 166400, train_loss: 1.44784, acc: 0.41528


[INFO 2025-02-27 15:19:37,994] [0] Ed: 166400, train_loss: 1.44784, acc: 0.41528


INFO:root:[0] Ed: 169600, train_loss: 1.44794, acc: 0.41521


[INFO 2025-02-27 15:19:48,941] [0] Ed: 169600, train_loss: 1.44794, acc: 0.41521


INFO:root:[0] Ed: 172800, train_loss: 1.44750, acc: 0.41531


[INFO 2025-02-27 15:19:59,822] [0] Ed: 172800, train_loss: 1.44750, acc: 0.41531


INFO:root:[0] Ed: 176000, train_loss: 1.44739, acc: 0.41539


[INFO 2025-02-27 15:20:10,669] [0] Ed: 176000, train_loss: 1.44739, acc: 0.41539


INFO:root:[0] Ed: 179200, train_loss: 1.44691, acc: 0.41550


[INFO 2025-02-27 15:20:21,516] [0] Ed: 179200, train_loss: 1.44691, acc: 0.41550


INFO:root:[0] Ed: 182400, train_loss: 1.44702, acc: 0.41559


[INFO 2025-02-27 15:20:32,443] [0] Ed: 182400, train_loss: 1.44702, acc: 0.41559


INFO:root:[0] Ed: 185600, train_loss: 1.44659, acc: 0.41575


[INFO 2025-02-27 15:20:43,347] [0] Ed: 185600, train_loss: 1.44659, acc: 0.41575


INFO:root:[0] Ed: 188800, train_loss: 1.44642, acc: 0.41594


[INFO 2025-02-27 15:20:54,207] [0] Ed: 188800, train_loss: 1.44642, acc: 0.41594


INFO:root:[0] Ed: 192000, train_loss: 1.44618, acc: 0.41591


[INFO 2025-02-27 15:21:05,115] [0] Ed: 192000, train_loss: 1.44618, acc: 0.41591


INFO:root:[0] Ed: 195200, train_loss: 1.44572, acc: 0.41603


[INFO 2025-02-27 15:21:15,998] [0] Ed: 195200, train_loss: 1.44572, acc: 0.41603


INFO:root:[0] Ed: 198400, train_loss: 1.44581, acc: 0.41587


[INFO 2025-02-27 15:21:26,882] [0] Ed: 198400, train_loss: 1.44581, acc: 0.41587


INFO:root:[0] Ed: 201600, train_loss: 1.44546, acc: 0.41585


[INFO 2025-02-27 15:21:37,776] [0] Ed: 201600, train_loss: 1.44546, acc: 0.41585


INFO:root:[0] Ed: 204800, train_loss: 1.44555, acc: 0.41585


[INFO 2025-02-27 15:21:48,708] [0] Ed: 204800, train_loss: 1.44555, acc: 0.41585


INFO:root:[0] Ed: 208000, train_loss: 1.44598, acc: 0.41585


[INFO 2025-02-27 15:21:59,859] [0] Ed: 208000, train_loss: 1.44598, acc: 0.41585


INFO:root:[0] Ed: 211200, train_loss: 1.44617, acc: 0.41565


[INFO 2025-02-27 15:22:10,747] [0] Ed: 211200, train_loss: 1.44617, acc: 0.41565


INFO:root:[0] Ed: 214400, train_loss: 1.44592, acc: 0.41562


[INFO 2025-02-27 15:22:21,670] [0] Ed: 214400, train_loss: 1.44592, acc: 0.41562


INFO:root:[0] Ed: 217600, train_loss: 1.44594, acc: 0.41553


[INFO 2025-02-27 15:22:32,589] [0] Ed: 217600, train_loss: 1.44594, acc: 0.41553


INFO:root:[0] Ed: 220800, train_loss: 1.44553, acc: 0.41567


[INFO 2025-02-27 15:22:43,444] [0] Ed: 220800, train_loss: 1.44553, acc: 0.41567


INFO:root:[0] Ed: 224000, train_loss: 1.44551, acc: 0.41578


[INFO 2025-02-27 15:22:54,370] [0] Ed: 224000, train_loss: 1.44551, acc: 0.41578


INFO:root:[0] Ed: 227200, train_loss: 1.44519, acc: 0.41582


[INFO 2025-02-27 15:23:05,354] [0] Ed: 227200, train_loss: 1.44519, acc: 0.41582


INFO:root:[0] Ed: 230400, train_loss: 1.44496, acc: 0.41594


[INFO 2025-02-27 15:23:16,218] [0] Ed: 230400, train_loss: 1.44496, acc: 0.41594


INFO:root:[0] Ed: 233600, train_loss: 1.44486, acc: 0.41613


[INFO 2025-02-27 15:23:27,073] [0] Ed: 233600, train_loss: 1.44486, acc: 0.41613


INFO:root:Training finish.


[INFO 2025-02-27 15:23:36,363] Training finish.


INFO:root:Model saved to /content/model/epoch-4.pt.


[INFO 2025-02-27 15:23:36,760] Model saved to /content/model/epoch-4.pt.


INFO:root:[0] Ed: 0, train_loss: inf, acc: inf


[INFO 2025-02-27 15:23:36,900] [0] Ed: 0, train_loss: inf, acc: inf


INFO:root:[0] Ed: 3200, train_loss: 1.46296, acc: 0.41250


[INFO 2025-02-27 15:23:47,841] [0] Ed: 3200, train_loss: 1.46296, acc: 0.41250


INFO:root:[0] Ed: 6400, train_loss: 1.44296, acc: 0.41328


[INFO 2025-02-27 15:23:58,782] [0] Ed: 6400, train_loss: 1.44296, acc: 0.41328


INFO:root:[0] Ed: 9600, train_loss: 1.44591, acc: 0.41438


[INFO 2025-02-27 15:24:09,691] [0] Ed: 9600, train_loss: 1.44591, acc: 0.41438


INFO:root:[0] Ed: 12800, train_loss: 1.44059, acc: 0.41750


[INFO 2025-02-27 15:24:20,623] [0] Ed: 12800, train_loss: 1.44059, acc: 0.41750


INFO:root:[0] Ed: 16000, train_loss: 1.44511, acc: 0.41500


[INFO 2025-02-27 15:24:31,561] [0] Ed: 16000, train_loss: 1.44511, acc: 0.41500


INFO:root:[0] Ed: 19200, train_loss: 1.44415, acc: 0.41547


[INFO 2025-02-27 15:24:42,469] [0] Ed: 19200, train_loss: 1.44415, acc: 0.41547


INFO:root:[0] Ed: 22400, train_loss: 1.44294, acc: 0.41759


[INFO 2025-02-27 15:24:53,403] [0] Ed: 22400, train_loss: 1.44294, acc: 0.41759


INFO:root:[0] Ed: 25600, train_loss: 1.44363, acc: 0.41672


[INFO 2025-02-27 15:25:04,309] [0] Ed: 25600, train_loss: 1.44363, acc: 0.41672


INFO:root:[0] Ed: 28800, train_loss: 1.44210, acc: 0.41733


[INFO 2025-02-27 15:25:15,214] [0] Ed: 28800, train_loss: 1.44210, acc: 0.41733


INFO:root:[0] Ed: 32000, train_loss: 1.44094, acc: 0.41769


[INFO 2025-02-27 15:25:26,115] [0] Ed: 32000, train_loss: 1.44094, acc: 0.41769


INFO:root:[0] Ed: 35200, train_loss: 1.44351, acc: 0.41685


[INFO 2025-02-27 15:25:37,068] [0] Ed: 35200, train_loss: 1.44351, acc: 0.41685


INFO:root:[0] Ed: 38400, train_loss: 1.44100, acc: 0.41706


[INFO 2025-02-27 15:25:48,051] [0] Ed: 38400, train_loss: 1.44100, acc: 0.41706


INFO:root:[0] Ed: 41600, train_loss: 1.44245, acc: 0.41760


[INFO 2025-02-27 15:25:59,173] [0] Ed: 41600, train_loss: 1.44245, acc: 0.41760


INFO:root:[0] Ed: 44800, train_loss: 1.44135, acc: 0.41781


[INFO 2025-02-27 15:26:10,075] [0] Ed: 44800, train_loss: 1.44135, acc: 0.41781


INFO:root:[0] Ed: 48000, train_loss: 1.44242, acc: 0.41723


[INFO 2025-02-27 15:26:20,987] [0] Ed: 48000, train_loss: 1.44242, acc: 0.41723


INFO:root:[0] Ed: 51200, train_loss: 1.44181, acc: 0.41699


[INFO 2025-02-27 15:26:31,944] [0] Ed: 51200, train_loss: 1.44181, acc: 0.41699


INFO:root:[0] Ed: 54400, train_loss: 1.43982, acc: 0.41778


[INFO 2025-02-27 15:26:42,841] [0] Ed: 54400, train_loss: 1.43982, acc: 0.41778


INFO:root:[0] Ed: 57600, train_loss: 1.44051, acc: 0.41819


[INFO 2025-02-27 15:26:53,796] [0] Ed: 57600, train_loss: 1.44051, acc: 0.41819


INFO:root:[0] Ed: 60800, train_loss: 1.43973, acc: 0.41826


[INFO 2025-02-27 15:27:04,648] [0] Ed: 60800, train_loss: 1.43973, acc: 0.41826


INFO:root:[0] Ed: 64000, train_loss: 1.43947, acc: 0.41802


[INFO 2025-02-27 15:27:15,546] [0] Ed: 64000, train_loss: 1.43947, acc: 0.41802


INFO:root:[0] Ed: 67200, train_loss: 1.43979, acc: 0.41856


[INFO 2025-02-27 15:27:26,485] [0] Ed: 67200, train_loss: 1.43979, acc: 0.41856


INFO:root:[0] Ed: 70400, train_loss: 1.44018, acc: 0.41841


[INFO 2025-02-27 15:27:37,436] [0] Ed: 70400, train_loss: 1.44018, acc: 0.41841


INFO:root:[0] Ed: 73600, train_loss: 1.44018, acc: 0.41827


[INFO 2025-02-27 15:27:48,394] [0] Ed: 73600, train_loss: 1.44018, acc: 0.41827


INFO:root:[0] Ed: 76800, train_loss: 1.43983, acc: 0.41862


[INFO 2025-02-27 15:27:59,287] [0] Ed: 76800, train_loss: 1.43983, acc: 0.41862


INFO:root:[0] Ed: 80000, train_loss: 1.43951, acc: 0.41911


[INFO 2025-02-27 15:28:10,235] [0] Ed: 80000, train_loss: 1.43951, acc: 0.41911


INFO:root:[0] Ed: 83200, train_loss: 1.43935, acc: 0.41950


[INFO 2025-02-27 15:28:21,141] [0] Ed: 83200, train_loss: 1.43935, acc: 0.41950


INFO:root:[0] Ed: 86400, train_loss: 1.43849, acc: 0.42007


[INFO 2025-02-27 15:28:32,168] [0] Ed: 86400, train_loss: 1.43849, acc: 0.42007


INFO:root:[0] Ed: 89600, train_loss: 1.43790, acc: 0.42056


[INFO 2025-02-27 15:28:43,189] [0] Ed: 89600, train_loss: 1.43790, acc: 0.42056


INFO:root:[0] Ed: 92800, train_loss: 1.43732, acc: 0.42087


[INFO 2025-02-27 15:28:54,102] [0] Ed: 92800, train_loss: 1.43732, acc: 0.42087


INFO:root:[0] Ed: 96000, train_loss: 1.43834, acc: 0.42083


[INFO 2025-02-27 15:29:05,002] [0] Ed: 96000, train_loss: 1.43834, acc: 0.42083


INFO:root:[0] Ed: 99200, train_loss: 1.43823, acc: 0.42076


[INFO 2025-02-27 15:29:15,937] [0] Ed: 99200, train_loss: 1.43823, acc: 0.42076


INFO:root:[0] Ed: 102400, train_loss: 1.43838, acc: 0.42060


[INFO 2025-02-27 15:29:26,869] [0] Ed: 102400, train_loss: 1.43838, acc: 0.42060


INFO:root:[0] Ed: 105600, train_loss: 1.43797, acc: 0.42111


[INFO 2025-02-27 15:29:37,777] [0] Ed: 105600, train_loss: 1.43797, acc: 0.42111


INFO:root:[0] Ed: 108800, train_loss: 1.43850, acc: 0.42085


[INFO 2025-02-27 15:29:48,734] [0] Ed: 108800, train_loss: 1.43850, acc: 0.42085


INFO:root:[0] Ed: 112000, train_loss: 1.43830, acc: 0.42071


[INFO 2025-02-27 15:29:59,646] [0] Ed: 112000, train_loss: 1.43830, acc: 0.42071


INFO:root:[0] Ed: 115200, train_loss: 1.43813, acc: 0.42078


[INFO 2025-02-27 15:30:10,838] [0] Ed: 115200, train_loss: 1.43813, acc: 0.42078


INFO:root:[0] Ed: 118400, train_loss: 1.43694, acc: 0.42102


[INFO 2025-02-27 15:30:21,725] [0] Ed: 118400, train_loss: 1.43694, acc: 0.42102


INFO:root:[0] Ed: 121600, train_loss: 1.43701, acc: 0.42090


[INFO 2025-02-27 15:30:32,647] [0] Ed: 121600, train_loss: 1.43701, acc: 0.42090


INFO:root:[0] Ed: 124800, train_loss: 1.43668, acc: 0.42098


[INFO 2025-02-27 15:30:43,548] [0] Ed: 124800, train_loss: 1.43668, acc: 0.42098


INFO:root:[0] Ed: 128000, train_loss: 1.43712, acc: 0.42092


[INFO 2025-02-27 15:30:54,455] [0] Ed: 128000, train_loss: 1.43712, acc: 0.42092


INFO:root:[0] Ed: 131200, train_loss: 1.43704, acc: 0.42083


[INFO 2025-02-27 15:31:05,360] [0] Ed: 131200, train_loss: 1.43704, acc: 0.42083


INFO:root:[0] Ed: 134400, train_loss: 1.43644, acc: 0.42075


[INFO 2025-02-27 15:31:16,286] [0] Ed: 134400, train_loss: 1.43644, acc: 0.42075


INFO:root:[0] Ed: 137600, train_loss: 1.43642, acc: 0.42104


[INFO 2025-02-27 15:31:27,195] [0] Ed: 137600, train_loss: 1.43642, acc: 0.42104


INFO:root:[0] Ed: 140800, train_loss: 1.43600, acc: 0.42143


[INFO 2025-02-27 15:31:38,141] [0] Ed: 140800, train_loss: 1.43600, acc: 0.42143


INFO:root:[0] Ed: 144000, train_loss: 1.43596, acc: 0.42143


[INFO 2025-02-27 15:31:49,029] [0] Ed: 144000, train_loss: 1.43596, acc: 0.42143


INFO:root:[0] Ed: 147200, train_loss: 1.43585, acc: 0.42173


[INFO 2025-02-27 15:31:59,901] [0] Ed: 147200, train_loss: 1.43585, acc: 0.42173


INFO:root:[0] Ed: 150400, train_loss: 1.43523, acc: 0.42169


[INFO 2025-02-27 15:32:10,807] [0] Ed: 150400, train_loss: 1.43523, acc: 0.42169


INFO:root:[0] Ed: 153600, train_loss: 1.43504, acc: 0.42169


[INFO 2025-02-27 15:32:21,768] [0] Ed: 153600, train_loss: 1.43504, acc: 0.42169


INFO:root:[0] Ed: 156800, train_loss: 1.43485, acc: 0.42177


[INFO 2025-02-27 15:32:32,729] [0] Ed: 156800, train_loss: 1.43485, acc: 0.42177


INFO:root:[0] Ed: 160000, train_loss: 1.43464, acc: 0.42184


[INFO 2025-02-27 15:32:43,703] [0] Ed: 160000, train_loss: 1.43464, acc: 0.42184


INFO:root:[0] Ed: 163200, train_loss: 1.43453, acc: 0.42195


[INFO 2025-02-27 15:32:54,678] [0] Ed: 163200, train_loss: 1.43453, acc: 0.42195


INFO:root:[0] Ed: 166400, train_loss: 1.43452, acc: 0.42185


[INFO 2025-02-27 15:33:05,627] [0] Ed: 166400, train_loss: 1.43452, acc: 0.42185


INFO:root:[0] Ed: 169600, train_loss: 1.43488, acc: 0.42179


[INFO 2025-02-27 15:33:16,524] [0] Ed: 169600, train_loss: 1.43488, acc: 0.42179


INFO:root:[0] Ed: 172800, train_loss: 1.43456, acc: 0.42178


[INFO 2025-02-27 15:33:27,488] [0] Ed: 172800, train_loss: 1.43456, acc: 0.42178


INFO:root:[0] Ed: 176000, train_loss: 1.43420, acc: 0.42194


[INFO 2025-02-27 15:33:38,470] [0] Ed: 176000, train_loss: 1.43420, acc: 0.42194


INFO:root:[0] Ed: 179200, train_loss: 1.43326, acc: 0.42215


[INFO 2025-02-27 15:33:49,462] [0] Ed: 179200, train_loss: 1.43326, acc: 0.42215


INFO:root:[0] Ed: 182400, train_loss: 1.43336, acc: 0.42224


[INFO 2025-02-27 15:34:00,422] [0] Ed: 182400, train_loss: 1.43336, acc: 0.42224


INFO:root:[0] Ed: 185600, train_loss: 1.43314, acc: 0.42238


[INFO 2025-02-27 15:34:11,594] [0] Ed: 185600, train_loss: 1.43314, acc: 0.42238


INFO:root:[0] Ed: 188800, train_loss: 1.43325, acc: 0.42242


[INFO 2025-02-27 15:34:22,513] [0] Ed: 188800, train_loss: 1.43325, acc: 0.42242


INFO:root:[0] Ed: 192000, train_loss: 1.43357, acc: 0.42236


[INFO 2025-02-27 15:34:33,440] [0] Ed: 192000, train_loss: 1.43357, acc: 0.42236


INFO:root:[0] Ed: 195200, train_loss: 1.43346, acc: 0.42243


[INFO 2025-02-27 15:34:44,413] [0] Ed: 195200, train_loss: 1.43346, acc: 0.42243


INFO:root:[0] Ed: 198400, train_loss: 1.43361, acc: 0.42245


[INFO 2025-02-27 15:34:55,279] [0] Ed: 198400, train_loss: 1.43361, acc: 0.42245


INFO:root:[0] Ed: 201600, train_loss: 1.43355, acc: 0.42250


[INFO 2025-02-27 15:35:06,168] [0] Ed: 201600, train_loss: 1.43355, acc: 0.42250


INFO:root:[0] Ed: 204800, train_loss: 1.43364, acc: 0.42245


[INFO 2025-02-27 15:35:17,080] [0] Ed: 204800, train_loss: 1.43364, acc: 0.42245


INFO:root:[0] Ed: 208000, train_loss: 1.43398, acc: 0.42253


[INFO 2025-02-27 15:35:27,992] [0] Ed: 208000, train_loss: 1.43398, acc: 0.42253


INFO:root:[0] Ed: 211200, train_loss: 1.43410, acc: 0.42233


[INFO 2025-02-27 15:35:38,894] [0] Ed: 211200, train_loss: 1.43410, acc: 0.42233


INFO:root:[0] Ed: 214400, train_loss: 1.43511, acc: 0.42217


[INFO 2025-02-27 15:35:49,830] [0] Ed: 214400, train_loss: 1.43511, acc: 0.42217


INFO:root:[0] Ed: 217600, train_loss: 1.43521, acc: 0.42209


[INFO 2025-02-27 15:36:00,759] [0] Ed: 217600, train_loss: 1.43521, acc: 0.42209


INFO:root:[0] Ed: 220800, train_loss: 1.43498, acc: 0.42220


[INFO 2025-02-27 15:36:11,697] [0] Ed: 220800, train_loss: 1.43498, acc: 0.42220


INFO:root:[0] Ed: 224000, train_loss: 1.43483, acc: 0.42233


[INFO 2025-02-27 15:36:22,601] [0] Ed: 224000, train_loss: 1.43483, acc: 0.42233


INFO:root:[0] Ed: 227200, train_loss: 1.43445, acc: 0.42238


[INFO 2025-02-27 15:36:33,550] [0] Ed: 227200, train_loss: 1.43445, acc: 0.42238


INFO:root:[0] Ed: 230400, train_loss: 1.43370, acc: 0.42259


[INFO 2025-02-27 15:36:44,449] [0] Ed: 230400, train_loss: 1.43370, acc: 0.42259


INFO:root:[0] Ed: 233600, train_loss: 1.43334, acc: 0.42274


[INFO 2025-02-27 15:36:55,375] [0] Ed: 233600, train_loss: 1.43334, acc: 0.42274


INFO:root:Training finish.


[INFO 2025-02-27 15:37:04,635] Training finish.


INFO:root:Model saved to /content/model/epoch-5.pt.


[INFO 2025-02-27 15:37:05,030] Model saved to /content/model/epoch-5.pt.


In [None]:
args.mode = 'test'
args.user_log_mask=True
args.batch_size=128
args.load_ckpt_name= 'epoch-5.pt'
args.prepare=True
if 'test' in args.mode:
        if args.prepare:
            logging.info('Preparing testing data...')
            total_sample_num = prepare_testing_data(args.test_data_dir, args.nGPU)
        else:
            total_sample_num = 0
            for i in range(args.nGPU):
                data_file_path = os.path.join(args.test_data_dir, f'behaviors_{i}.tsv')
                if not os.path.exists(data_file_path):
                    logging.error(f'Splited testing data {data_file_path} for GPU {i} does not exist. Please set the parameter --prepare as True and rerun the code.')
                    exit()
                result = subprocess.getoutput(f'wc -l {data_file_path}')
                total_sample_num += int(result.split(' ')[0])
            logging.info('Skip testing data preparation.')
        logging.info(f'{total_sample_num} testing samples in total.')



INFO:root:Preparing testing data...


[INFO 2025-03-03 09:00:03,938] Preparing testing data...


73152it [00:01, 42465.48it/s]
INFO:root:Writing files...


[INFO 2025-03-03 09:00:06,477] Writing files...


INFO:root:73152 testing samples in total.


[INFO 2025-03-03 09:00:08,292] 73152 testing samples in total.


# Test


In [None]:
  rank = 0
  is_distributed = False

  if args.enable_gpu:
      torch.cuda.set_device(rank)

  ckpt_path = get_checkpoint(args.model_dir, args.load_ckpt_name)
  assert ckpt_path is not None, 'No checkpoint found.'
  checkpoint = torch.load(ckpt_path, map_location='cpu')

  category_dict = checkpoint['category_dict']
  subcategory_dict = checkpoint['subcategory_dict']
  word_dict = checkpoint['word_dict']

  dummy_embedding_matrix = np.zeros((len(word_dict) + 1, args.word_embedding_dim))
  model = LSTUR(args, dummy_embedding_matrix)
  model.load_state_dict(checkpoint['model_state_dict'])
  logging.info(f"Model loaded from {ckpt_path}")

  if args.enable_gpu:
      model.cuda(rank)

  model.eval()
  torch.set_grad_enabled(False)

  news, news_index, category_dict, subcategory_dict, word_dict = read_news(
  os.path.join(args.test_data_dir, 'news.tsv'), args.train_abstract_dir, args, mode='train')

  news_title, news_category, news_subcategory, news_abstract = get_doc_input(
      news, news_index, category_dict, subcategory_dict, word_dict, args)

  news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory] if x is not None], axis=-1)

  news_dataset = NewsDataset(news_combined)
  news_dataloader = DataLoader(news_dataset,
                                batch_size=args.batch_size,
                                num_workers=4)

  news_scoring = []
  with torch.no_grad():
      for input_ids in tqdm(news_dataloader):
          input_ids = input_ids.cuda(rank)
          news_vec = model.news_encoder(input_ids)
          news_vec = news_vec.to(torch.device("cpu")).detach().numpy()
          news_scoring.extend(news_vec)

  news_scoring = np.array(news_scoring)
  logging.info("news scoring num: {}".format(news_scoring.shape[0]))

  if rank == 0:
      doc_sim = 0
      for _ in tqdm(range(1000000)):
          i = random.randrange(1, len(news_scoring))
          j = random.randrange(1, len(news_scoring))
          if i != j:
              doc_sim += np.dot(news_scoring[i], news_scoring[j]) / (np.linalg.norm(news_scoring[i]) * np.linalg.norm(news_scoring[j]))
      logging.info(f'News doc-sim: {doc_sim / 1000000}')


  checkpoint = torch.load(ckpt_path, map_location='cpu')
INFO:root:Model loaded from /content/drive/MyDrive/Colab Notebooks/NewsRecommendation/LSTUR model/epoch-5.pt


[INFO 2025-03-03 09:04:02,942] Model loaded from /content/drive/MyDrive/Colab Notebooks/NewsRecommendation/LSTUR model/epoch-5.pt


42416it [00:03, 10638.65it/s]
100%|██████████| 42416/42416 [00:00<00:00, 113063.11it/s]
100%|██████████| 332/332 [00:02<00:00, 137.45it/s]
INFO:root:news scoring num: 42417


[INFO 2025-03-03 09:04:10,200] news scoring num: 42417


100%|██████████| 1000000/1000000 [00:09<00:00, 110477.04it/s]
INFO:root:News doc-sim: 0.48951068924601376


[INFO 2025-03-03 09:04:19,258] News doc-sim: 0.48951068924601376


In [None]:
def collate_fn(tuple_list):
  user_index = torch.tensor([x[0] for x in tuple_list])
  clicked_news_length = torch.tensor([x[1] for x in tuple_list])
  # Inspect candidate news feature sizes
  candidate_news_feature = ([x[2] for x in tuple_list])
  clicked_news_feature = torch.FloatTensor([x[3] for x in tuple_list])
  label = [x[4] for x in tuple_list]
  return (user_index, clicked_news_length, candidate_news_feature, clicked_news_feature, label)

data_file_path = os.path.join(args.test_data_dir, f'behaviors_{rank}.tsv')
dataset = DatasetTest(data_file_path, news_index, news_scoring, '/content/user2id.json', args)
dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn)

AUC, MRR, nDCG5, nDCG10 = [], [], [], []
def print_metrics(rank, cnt, x):
        logging.info("[{}] {} samples: {}".format(rank, cnt, '\t'.join(["{:0.2f}".format(i * 100) for i in x])))

def get_mean(arr):
    return [np.array(i).mean() for i in arr]

def get_sum(arr):
    return [np.array(i).sum() for i in arr]

local_sample_num = 0
logging.info("Testing...")
for cnt, (user_index, clicked_news_length, candidate_news_feature, clicked_news_feature, label) in enumerate(dataloader):
    local_sample_num += clicked_news_feature.shape[0]
    if args.enable_gpu:
        user_index = user_index.cuda(rank, non_blocking=True)
        clicked_news_length = clicked_news_length.cuda(rank, non_blocking=True)
        # candidate_news_feature = candidate_news_feature.cuda(rank, non_blocking=True)
        clicked_news_feature = clicked_news_feature.cuda(rank, non_blocking=True)
        # label = label.cuda(rank, non_blocking=True)
        # pass
    clicked_news_vector = torch.stack(
            [model.news_encoder(x) for x in clicked_news_feature])
    user_vecs = model.get_user_vector(user_index, clicked_news_length, clicked_news_vector).cpu().detach().numpy()


   # Encode candidate news
    candidate_news_vecs = [
    model.news_encoder(torch.tensor(news_feat, dtype=torch.float32).to(device)).cpu().detach().numpy()
    for news_feat in candidate_news_feature
]


    for user_vec, news_vec, label in zip(user_vecs, candidate_news_feature, label):
        if np.mean(label) == 0 or np.mean(label) == 1:
            continue

        score = np.dot(news_vec, user_vec)
        # print("Scores:", score[:10])  # Print first 10 scores
        # print("Labels:", label[:10])  # Print first 10 labels
        AUC.append(roc_auc_score(label, score))
        MRR.append(mrr_score(label, score))
        nDCG5.append(ndcg_score(label, score, k=5))
        nDCG10.append(ndcg_score(label, score, k=10))

    if cnt % args.log_steps == 0:
        print_metrics(rank, local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))

logging.info(f"Final Test Results: AUC: {np.mean(AUC):.5f}")

INFO:root:Testing...


[INFO 2025-03-03 09:04:34,005] Testing...


INFO:root:[0] 128 samples: 56.19	26.62	31.12	35.99


[INFO 2025-03-03 09:04:36,455] [0] 128 samples: 56.19	26.62	31.12	35.99


INFO:root:[0] 12928 samples: 53.82	23.70	24.92	31.00


[INFO 2025-03-03 09:07:07,337] [0] 12928 samples: 53.82	23.70	24.92	31.00


INFO:root:[0] 25728 samples: 53.87	23.94	25.08	31.21


[INFO 2025-03-03 09:09:36,783] [0] 25728 samples: 53.87	23.94	25.08	31.21


INFO:root:[0] 38528 samples: 53.72	23.69	24.78	30.97


[INFO 2025-03-03 09:12:09,163] [0] 38528 samples: 53.72	23.69	24.78	30.97


INFO:root:[0] 51328 samples: 53.50	23.55	24.58	30.79


[INFO 2025-03-03 09:14:40,252] [0] 51328 samples: 53.50	23.55	24.58	30.79


INFO:root:[0] 64128 samples: 53.58	23.62	24.66	30.88


[INFO 2025-03-03 09:17:10,341] [0] 64128 samples: 53.58	23.62	24.66	30.88


INFO:root:Final Test Results: AUC: 0.53589


[INFO 2025-03-03 09:18:55,006] Final Test Results: AUC: 0.53589
