## Assignment 1. Neural Text Classification
## CS310 Natural Language Processing

**Total points**: 50

You should roughtly follow the structure of the notebook. Add additional cells if you feel needed. 

You can (and you should) re-use the code from Lab 2. 

Make sure your code is readable and well-structured.

### 0. Import Necessary Libraries

In [21]:
import json
import re

import adabound as adabound
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from torchtext.vocab import build_vocab_from_iterator
import time
from torch.utils.data.dataset import random_split
from torchtext.data.functional import to_map_style_dataset
import jieba
import paddle
from sklearn.metrics import precision_score, recall_score, f1_score,accuracy_score
import  adabound


### 1. Data Processing

In [22]:
class myDataset(Dataset):
    def __init__(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
        processed_data = []
        for line in lines:
            json_data = json.loads(line)
            processed_data.append(json_data)
        self.data = processed_data


    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]


def basic_tokenizer(sentence):
    tokens = re.findall(r'[\u4e00-\u9fff]', sentence)
    return tokens

def improved_tokenizer(sentence):
    chinese_pattern = re.compile(r'[\u4e00-\u9fff]')
    digit_pattern = re.compile(r'\d+')
    english_pattern = re.compile(r'[a-zA-Z]+')
    # 匹配除了中英文数字空格之外的特殊字符
    punctuation_pattern = re.compile(r'[^\u4e00-\u9fff\da-zA-Z\s]')
    tokens = re.findall(r'[\u4e00-\u9fff]|\d+|[a-zA-Z]+|[^\u4e00-\u9fff\da-zA-Z\s]', sentence)
    return tokens

# Example usage
train_dataset = myDataset('train.jsonl')

train_iterator = iter(train_dataset)




def yield_tokens(data_iter):
    for item in data_iter:
        yield improved_tokenizer(item['sentence'])

count = 0
for tokens in yield_tokens(train_iterator): # Use a new iterator
    print(tokens)
    count += 1
    if count > 7:
        break



vocab = build_vocab_from_iterator(yield_tokens(train_iterator), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])

# Print the vocabulary size
print("Vocabulary size:", len(vocab))

# Print a few random tokens and their corresponding indices
for token in ['保', '说', '，', '小']:
    print(f"Token: {token}, Index: {vocab[token]}")


text_pipeline = lambda x: vocab(improved_tokenizer(x))
label_pipeline = lambda x: int(x)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def collate_batch(batch):
    label_list, token_ids_list, offsets = [], [], [0]
    for item in batch:
        label_list.append(label_pipeline(item['label'][0]))
        token_ids = torch.tensor(text_pipeline('sentence'), dtype=torch.int64)
        token_ids_list.append(token_ids)
        offsets.append(token_ids.size(0))

    labels = torch.tensor(label_list, dtype=torch.int64)
    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    token_ids = torch.cat(token_ids_list)

    return labels.to(device), token_ids.to(device), offsets.to(device)



train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=False,collate_fn=collate_batch)




# Test the dataloader
for i, (labels, token_ids, offsets) in enumerate(train_dataloader):
    print(f"batch {i} label: {labels}")
    print(f"batch {i} text: {token_ids}")
    print(f"batch {i} offsets: {offsets}")
    if i == 0:
        break

# What does offsets mean?
print('Number of tokens: ', token_ids.size(0))
print('Number of examples in one batch: ', labels.size(0))




['卖', '油', '条', '小', '刘', '说', '：', '我', '说']
['保', '姆', '小', '张', '说', '：', '干', '啥', '子', '嘛', '？']
['卖', '油', '条', '小', '刘', '说', '：', '你', '看', '你', '往', '星', '空', '看', '月', '朦', '胧', '，', '鸟', '朦', '胧']
['卖', '油', '条', '小', '刘', '说', '：', '咱', '是', '不', '是', '歇', '一', '下', '这', '双', '，', '疲', '惫', '的', '双', '腿', '？']
['卖', '油', '条', '小', '刘', '说', '：', '快', '把', '我', '累', '死', '了']
['卖', '油', '条', '小', '刘', '说', '：', '我', '说', '亲', '爱', '的', '大', '姐', '你', '贵', '姓', '啊', '？']
['保', '姆', '小', '张', '说', '：', '我', '免', '贵', '姓', '张', '我', '叫', '张', '凤', '姑']
['卖', '油', '条', '小', '刘', '说', '：', '凤', '姑']
Vocabulary size: 2820
Token: 保, Index: 74
Token: 说, Index: 1
Token: ，, Index: 6
Token: 小, Index: 23
batch 0 label: tensor([0, 0, 1, 0, 0, 1, 0, 0])
batch 0 text: tensor([0, 0, 0, 0, 0, 0, 0, 0])
batch 0 offsets: tensor([0, 1, 2, 3, 4, 5, 6, 7])
Number of tokens:  8
Number of examples in one batch:  8


### 2. Build the Model

In [23]:

class Model(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
        super(Model, self).__init__()

        self.embedding = nn.EmbeddingBag(vocab_size, embedding_dim, sparse=False)
        #指定两个隐藏层，每个隐藏层由nn.Linear和nn.ReLU激活函数组成。最后一层是线性层，输出num_classes个类别
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )


    def forward(self, token_ids, offsets):
        embedded = self.embedding(token_ids, offsets)
        output = self.fc(embedded)
        return output


# Example usage
vocab_size = len(vocab)
embedding_dim = 256
hidden_dim = 512
num_classes = 2 #0,1

model = Model(vocab_size, embedding_dim, hidden_dim, num_classes).to(device)

model.eval()
with torch.no_grad():
    for i, (labels, token_ids, offsets) in enumerate(train_dataloader):
        output = model(token_ids, offsets)
        # print(f"batch {i} output: {output}")
        if i == 0:
            break

# Examine the output
print('output size:', output.size())
print('output:', output)



output size: torch.Size([8, 2])
output: tensor([[0.0545, 0.0791],
        [0.0545, 0.0791],
        [0.0545, 0.0791],
        [0.0545, 0.0791],
        [0.0545, 0.0791],
        [0.0545, 0.0791],
        [0.0545, 0.0791],
        [0.0545, 0.0791]])


### 3. Train and Evaluate

In [36]:

from sklearn.metrics import accuracy_score


def train(model, dataloader, optimizer, criterion, epoch: int):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 500
    start_time = time.time()

    for idx, (labels, token_ids, offsets) in enumerate(dataloader):
        optimizer.zero_grad()
        output = model(token_ids, offsets)
        try:
            loss = criterion(output, labels)
        except Exception:
            print('Error in loss calculation')
            print('output: ', output.size())
            print('labels: ', labels.size())
            # print('token_ids: ', token_ids)
            # print('offsets: ', offsets)
            raise
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()

        total_acc += (output.argmax(1) == labels).sum().item()
        total_count += labels.size(0)
        if idx % log_interval == 0 and idx > 0:
            elapsed = time.time() - start_time
            print(
                "| epoch {:3d} | {:5d}/{:5d} batches "
                "| accuracy {:8.3f}".format(
                    epoch, idx, len(dataloader), total_acc / total_count
                )
            )
            total_acc, total_count = 0, 0
            start_time = time.time()

def evaluate(model, dataloader, criterion):
    model.eval()
    total_acc, total_count = 0, 0
    y_true, y_pred = [], []

    with torch.no_grad():
        for idx, (labels, token_ids, offsets) in enumerate(dataloader):
            output = model(token_ids, offsets)
            loss = criterion(output, labels)
            total_acc += (output.argmax(1) == labels).sum().item()
            total_count += labels.size(0)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(output.argmax(1).cpu().numpy())

    #         accuracy = accuracy_score(true_labels, predictions)
    # precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted', zero_division=0)

    accuracy = total_acc/total_count
    precision = precision_score(y_true, y_pred,average='weighted')
    recall = recall_score(y_true, y_pred,average='weighted')
    f1 = f1_score(y_true, y_pred,average='weighted')

    return accuracy, precision, recall, f1



# Hyperparameters
EPOCHS = 10  # epoch
LR = 1  # learning rate
BATCH_SIZE = 8  # batch size for training

criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=LR)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
# scheduler=torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(train_dataloader), epochs=10)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)

# First, obtain some output and labels
model.eval()
with torch.no_grad():
    for i, (labels, token_ids, offsets) in enumerate(train_dataloader):
        output = model(token_ids, offsets)
        # print(f"batch {i} output: {output}")
        if i == 0:
            break

loss = criterion(output, labels)
print('loss:', loss)

criterion2 = torch.nn.CrossEntropyLoss(reduction='none')
loss2 = criterion2(output, labels)
print('loss non-reduced:', loss2)
print('mean of loss non-reduced:', torch.mean(loss2))

# Manually calculate the loss
probs = torch.exp(output[0,:]) / torch.exp(output[0,:]).sum()
loss3 = -torch.log(probs[labels[0]])
print('loss manually computed:', loss3)





# Prepare train, valid, and test data
train_dataset = myDataset('train.jsonl')
test_dataset = myDataset('test.jsonl')

train_dataloader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch
)

test_dataloader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch
)
### Main Training Loop
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(train_dataloader), epochs=10)

# Run the training loop
total_accu = None
for epoch in range(1, EPOCHS + 1):
    epoch_start_time = time.time()
    train(model, train_dataloader, optimizer, criterion, epoch)
    accuracy, precision, recall, f1 = evaluate(model, test_dataloader, criterion)

    if total_accu is not None and total_accu > accuracy:
        scheduler.step()
    else:
        total_accu = accuracy

    print("-" * 59)
    print(
        "| end of epoch {:3d} | time: {:5.2f}s | "
        "valid accuracy {:8.3f} | precision {:8.3f} | recall {:6.3f} |f1 {:5.3f} ".format(
            epoch, time.time() - epoch_start_time, accuracy,precision,recall,f1
        )
    )
    print("-" * 59)


# Save the model
torch.save(model.state_dict(), "model.pth")
# accu_test = evaluate(model, valid_dataloader, criterion)
# print("test accuracy {:8.3f}".format(accu_test))

loss: tensor(0.5660)
loss non-reduced: tensor([0.2409, 1.5415, 0.2409, 0.2409, 1.5415, 0.2409, 0.2409, 0.2409])
mean of loss non-reduced: tensor(0.5660)
loss manually computed: tensor(0.2409)
| epoch   1 |   500/ 1585 batches | accuracy    0.709
| epoch   1 |  1000/ 1585 batches | accuracy    0.714
| epoch   1 |  1500/ 1585 batches | accuracy    0.713


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   1 | time: 12.56s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   2 |   500/ 1585 batches | accuracy    0.701
| epoch   2 |  1000/ 1585 batches | accuracy    0.721
| epoch   2 |  1500/ 1585 batches | accuracy    0.709


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   2 | time: 12.48s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   3 |   500/ 1585 batches | accuracy    0.721
| epoch   3 |  1000/ 1585 batches | accuracy    0.708
| epoch   3 |  1500/ 1585 batches | accuracy    0.708


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   3 | time: 12.56s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   4 |   500/ 1585 batches | accuracy    0.713
| epoch   4 |  1000/ 1585 batches | accuracy    0.710
| epoch   4 |  1500/ 1585 batches | accuracy    0.714


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   4 | time: 12.64s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   5 |   500/ 1585 batches | accuracy    0.720
| epoch   5 |  1000/ 1585 batches | accuracy    0.706
| epoch   5 |  1500/ 1585 batches | accuracy    0.710


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   5 | time: 13.58s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   6 |   500/ 1585 batches | accuracy    0.721
| epoch   6 |  1000/ 1585 batches | accuracy    0.711
| epoch   6 |  1500/ 1585 batches | accuracy    0.706


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   6 | time: 14.32s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   7 |   500/ 1585 batches | accuracy    0.709
| epoch   7 |  1000/ 1585 batches | accuracy    0.714
| epoch   7 |  1500/ 1585 batches | accuracy    0.720


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   7 | time: 12.90s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   8 |   500/ 1585 batches | accuracy    0.705
| epoch   8 |  1000/ 1585 batches | accuracy    0.709
| epoch   8 |  1500/ 1585 batches | accuracy    0.719


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   8 | time: 13.39s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   9 |   500/ 1585 batches | accuracy    0.720
| epoch   9 |  1000/ 1585 batches | accuracy    0.714
| epoch   9 |  1500/ 1585 batches | accuracy    0.706


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   9 | time: 12.40s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch  10 |   500/ 1585 batches | accuracy    0.726
| epoch  10 |  1000/ 1585 batches | accuracy    0.717
| epoch  10 |  1500/ 1585 batches | accuracy    0.694
-----------------------------------------------------------
| end of epoch  10 | time: 12.04s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### 4. Explore Word Segmentation

In [39]:
def jieba_tokenizer(sentence):
    tokens=[]
    seg_list = jieba.cut(sentence)
    for seg in seg_list:
        tokens.append(seg)
    return tokens

# Example usage
train_dataset = myDataset('train.jsonl')

train_iterator = iter(train_dataset)




def yield_tokens(data_iter):
    for item in data_iter:
        yield jieba_tokenizer(item['sentence'])



vocab = build_vocab_from_iterator(yield_tokens(train_iterator), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])

# Print the vocabulary size
print("Vocabulary size:", len(vocab))

# Print a few random tokens and their corresponding indices
for token in ['保', '说', '，', '小']:
    print(f"Token: {token}, Index: {vocab[token]}")


text_pipeline = lambda x: vocab(jieba_tokenizer(x))
label_pipeline = lambda x: int(x)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def collate_batch(batch):
    label_list, token_ids_list, offsets = [], [], [0]
    for item in batch:
        label_list.append(label_pipeline(item['label'][0]))
        token_ids = torch.tensor(text_pipeline('sentence'), dtype=torch.int64)
        token_ids_list.append(token_ids)
        offsets.append(token_ids.size(0))

    labels = torch.tensor(label_list, dtype=torch.int64)
    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    token_ids = torch.cat(token_ids_list)

    return labels.to(device), token_ids.to(device), offsets.to(device)



train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=False,collate_fn=collate_batch)




# Test the dataloader
for i, (labels, token_ids, offsets) in enumerate(train_dataloader):
    print(f"batch {i} label: {labels}")
    print(f"batch {i} text: {token_ids}")
    print(f"batch {i} offsets: {offsets}")
    if i == 0:
        break

# What does offsets mean?
print('Number of tokens: ', token_ids.size(0))
print('Number of examples in one batch: ', labels.size(0))

#######################################



class Model(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
        super(Model, self).__init__()

        self.embedding = nn.EmbeddingBag(vocab_size, embedding_dim, sparse=False)
        #指定两个隐藏层，每个隐藏层由nn.Linear和nn.ReLU激活函数组成。最后一层是线性层，输出num_classes个类别
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )


    def forward(self, token_ids, offsets):
        embedded = self.embedding(token_ids, offsets)
        output = self.fc(embedded)
        return output


# Example usage
vocab_size = len(vocab)
embedding_dim = 256
hidden_dim = 512
num_classes = 2 #0,1

model_jieba = Model(vocab_size, embedding_dim, hidden_dim, num_classes).to(device)

model_jieba.eval()
with torch.no_grad():
    for i, (labels, token_ids, offsets) in enumerate(train_dataloader):
        output = model_jieba(token_ids, offsets)
        # print(f"batch {i} output: {output}")
        if i == 0:
            break

# Examine the output
print('output size:', output.size())
print('output:', output)


########################################################


# Hyperparameters
EPOCHS = 10  # epoch
LR = 1  # learning rate
BATCH_SIZE = 8  # batch size for training

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_jieba.parameters(), lr=LR)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(train_dataloader), epochs=10)
# scheduler= torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)

# First, obtain some output and labels
model_jieba.eval()
with torch.no_grad():
    for i, (labels, token_ids, offsets) in enumerate(train_dataloader):
        output = model_jieba(token_ids, offsets)
        # print(f"batch {i} output: {output}")
        if i == 0:
            break

loss = criterion(output, labels)
print('loss:', loss)

criterion2 = torch.nn.CrossEntropyLoss(reduction='none')
loss2 = criterion2(output, labels)
print('loss non-reduced:', loss2)
print('mean of loss non-reduced:', torch.mean(loss2))

# Manually calculate the loss
probs = torch.exp(output[0,:]) / torch.exp(output[0,:]).sum()
loss3 = -torch.log(probs[labels[0]])
print('loss manually computed:', loss3)


# Prepare train, valid, and test data
train_dataset = myDataset('train.jsonl')
test_dataset = myDataset('test.jsonl')

train_dataloader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch
)
test_dataloader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch
)
### Main Training Loop

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(train_dataloader), epochs=10)

# Run the training loop
total_accu = None
for epoch in range(1, EPOCHS + 1):
    epoch_start_time = time.time()
    train(model_jieba, train_dataloader, optimizer, criterion, epoch)
    accuracy, precision, recall, f1 = evaluate(model_jieba, test_dataloader, criterion)

    if total_accu is not None and total_accu > accuracy:
        scheduler.step()
    else:
        total_accu = accuracy

    print("-" * 59)
    print(
        "| end of epoch {:3d} | time: {:5.2f}s | "
        "valid accuracy {:8.3f} | precision {:8.3f} | recall {:6.3f} |f1 {:5.3f} ".format(
            epoch, time.time() - epoch_start_time, total_accu,precision,recall,f1
        )
    )
    print("-" * 59)


# Save the model
torch.save(model_jieba.state_dict(), "model_jieba.pth")
# accu_test = evaluate(model_jieba, valid_dataloader, criterion)
# print("test accuracy {:8.3f}".format(accu_test))



Vocabulary size: 13847
Token: 保, Index: 4142
Token: 说, Index: 2
Token: ，, Index: 4
Token: 小, Index: 79
batch 0 label: tensor([0, 0, 1, 0, 0, 1, 0, 0])
batch 0 text: tensor([0, 0, 0, 0, 0, 0, 0, 0])
batch 0 offsets: tensor([0, 1, 2, 3, 4, 5, 6, 7])
Number of tokens:  8
Number of examples in one batch:  8
output size: torch.Size([8, 2])
output: tensor([[-0.0730, -0.3199],
        [-0.0730, -0.3199],
        [-0.0730, -0.3199],
        [-0.0730, -0.3199],
        [-0.0730, -0.3199],
        [-0.0730, -0.3199],
        [-0.0730, -0.3199],
        [-0.0730, -0.3199]])
loss: tensor(0.6390)
loss non-reduced: tensor([0.5773, 0.5773, 0.8242, 0.5773, 0.5773, 0.8242, 0.5773, 0.5773])
mean of loss non-reduced: tensor(0.6390)
loss manually computed: tensor(0.5773)
| epoch   1 |   500/ 1585 batches | accuracy    0.715
| epoch   1 |  1000/ 1585 batches | accuracy    0.711
| epoch   1 |  1500/ 1585 batches | accuracy    0.713


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   1 | time: 12.65s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   2 |   500/ 1585 batches | accuracy    0.720
| epoch   2 |  1000/ 1585 batches | accuracy    0.710
| epoch   2 |  1500/ 1585 batches | accuracy    0.711


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   2 | time: 13.47s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   3 |   500/ 1585 batches | accuracy    0.712
| epoch   3 |  1000/ 1585 batches | accuracy    0.709
| epoch   3 |  1500/ 1585 batches | accuracy    0.716


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   3 | time: 14.93s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   4 |   500/ 1585 batches | accuracy    0.709
| epoch   4 |  1000/ 1585 batches | accuracy    0.709
| epoch   4 |  1500/ 1585 batches | accuracy    0.717


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   4 | time: 12.46s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   5 |   500/ 1585 batches | accuracy    0.713
| epoch   5 |  1000/ 1585 batches | accuracy    0.720
| epoch   5 |  1500/ 1585 batches | accuracy    0.706


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   5 | time: 12.06s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   6 |   500/ 1585 batches | accuracy    0.706
| epoch   6 |  1000/ 1585 batches | accuracy    0.704
| epoch   6 |  1500/ 1585 batches | accuracy    0.726


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   6 | time: 11.99s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   7 |   500/ 1585 batches | accuracy    0.708
| epoch   7 |  1000/ 1585 batches | accuracy    0.711
| epoch   7 |  1500/ 1585 batches | accuracy    0.719


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   7 | time: 12.02s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   8 |   500/ 1585 batches | accuracy    0.719
| epoch   8 |  1000/ 1585 batches | accuracy    0.703
| epoch   8 |  1500/ 1585 batches | accuracy    0.718


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   8 | time: 12.04s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch   9 |   500/ 1585 batches | accuracy    0.710
| epoch   9 |  1000/ 1585 batches | accuracy    0.717
| epoch   9 |  1500/ 1585 batches | accuracy    0.716


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


-----------------------------------------------------------
| end of epoch   9 | time: 11.95s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------
| epoch  10 |   500/ 1585 batches | accuracy    0.725
| epoch  10 |  1000/ 1585 batches | accuracy    0.705
| epoch  10 |  1500/ 1585 batches | accuracy    0.708
-----------------------------------------------------------
| end of epoch  10 | time: 12.42s | valid accuracy    0.739 | precision    0.546 | recall  0.739 |f1 0.628 
-----------------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
