In [1]:
import os
import json
import torch
import urllib
import random
import torch.nn as nn
import numpy as np
import pandas as pd

from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

In [2]:
## data download
url = "https://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json"
urllib.request.urlretrieve(url, 'sarcasm.json')

with open('./sarcasm.json') as f:
    datas = json.load(f)

In [3]:
df = pd.DataFrame(datas)
df.head()

Unnamed: 0,article_link,headline,is_sarcastic
0,https://www.huffingtonpost.com/entry/versace-b...,former versace store clerk sues over secret 'b...,0
1,https://www.huffingtonpost.com/entry/roseanne-...,the 'roseanne' revival catches up to our thorn...,0
2,https://local.theonion.com/mom-starting-to-fea...,mom starting to fear son's web series closest ...,1
3,https://politics.theonion.com/boehner-just-wan...,"boehner just wants wife to listen, not come up...",1
4,https://www.huffingtonpost.com/entry/jk-rowlin...,j.k. rowling wishes snape happy birthday in th...,0


In [4]:
## Tokenize example
## 문장을 token화한다.
sample = "Hello, my name is minjun kim."
tokenizer = get_tokenizer("basic_english")
tokenizer(sample)

['hello', ',', 'my', 'name', 'is', 'minjun', 'kim', '.']

In [5]:
def yield_tokens(sentences):
    for text in sentences:
        yield tokenizer(text)

vocab = build_vocab_from_iterator(yield_tokens(df['headline'].tolist()),
                                  specials=["<Unknown>"], ## 어휘에 없는 단어들을 "<Unknown>"로 대체
                                  min_freq=2,
                                  max_tokens=1000,)
vocab.set_default_index(vocab['<Unknown>']) ## 생성된 어휘에서 "<UNK>" 토큰을 기본 인덱스로 설정

str_to_idx = vocab.get_stoi()
idx_to_str = vocab.get_itos()

print(idx_to_str)
vocab(tokenizer(sample))

['<Unknown>', "'", 'to', 'of', 'the', 's', 'in', ',', 'for', 'a', 'on', '.', 'and', 'with', 'is', 'trump', 'new', 'man', 'from', 'at', 'you', 't', 'it', 'about', 'this', 'by', 'after', '?', 'be', 'that', 'how', 'out', 'he', 'as', 'up', 'not', 'what', 'can', 'are', 'your', 'his', 'who', 'just', 'has', 'will', 'more', 'all', 'one', 'into', 'report', 'i', 'why', 'have', 'area', 'woman', 'over', 'donald', 'u', 'says', 'day', 'obama', 'time', 'no', 'first', 'like', 'people', 'women', 'get', 'her', 'we', 'world', 'an', 'now', 'nation', 'house', 'life', 'off', 'clinton', 'they', 'make', 'still', 'than', 'was', 'my', 'white', 'back', 'down', 'if', 'when', 'family', 'could', 'she', 'their', 'do', 'before', 'americans', 'gop', 'most', 'way', '5', 'black', 'year', 'here', 'study', 'years', 'bill', 'should', 'would', 'him', 'president', 'best', 'so', 'america', 'police', 'only', 'watch', 'school', 'show', 'american', 'really', 'being', 'but', 'know', 'home', 'mom', 'things', 'death', 'during', 'go

[0, 7, 83, 347, 14, 0, 474, 11]

In [6]:
x_train, x_test, y_train, y_test = train_test_split(df["headline"],
                                                    df['is_sarcastic'],
                                                    stratify=df['is_sarcastic'],
                                                    test_size=0.2)

print(len(x_train[0]))
print(x_train[0])

print(len(x_train[1]))
print(x_train[1])

78
former versace store clerk sues over secret 'black code' for minority shoppers
84
the 'roseanne' revival catches up to our thorny political mood, for better and worse


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
print(device)

class CustomDataset(Dataset):
    def __init__(self, texts, labels, vocab, tokenizer):
        super().__init__()
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        label = self.labels.iloc[idx]

        return self.vocab(self.tokenizer(text)), label
    
def collate_batch(batch, max_sequence_length):
    label_list, text_list = [], []

    for text, label in batch:
        processed_text = torch.tensor(text[:max_sequence_length], dtype=torch.int64) ## 길이를 max_sequence_length를 넘지 못하게 만든다.
        text_list.append(processed_text)
        label_list.append(label)

    label_list = torch.tensor(label_list, dtype=torch.int64)
    text_list = pad_sequence(text_list, batch_first=True, padding_value=0) ## padding을 통해 데이터의 길이를 일정하게 맞춰준다.

    return text_list.to(device), label_list.to(device)

cuda


In [8]:
train_dataset = CustomDataset(x_train, y_train, vocab=vocab, tokenizer=tokenizer)
valid_dataset = CustomDataset(x_test, y_test, vocab=vocab, tokenizer=tokenizer)

for data in train_dataset:
    print(data)
    break

([96, 568, 415, 0, 43, 45, 0, 664, 139, 229, 829], 0)


In [9]:
MAX_SEQUENCE_LEN = 100
BATCH_SIZE = 32

train_dataloader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              collate_fn=lambda x : collate_batch(x, MAX_SEQUENCE_LEN)) ## 최대 길이가 100

valid_dataloader = DataLoader(valid_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              collate_fn=lambda x : collate_batch(x, MAX_SEQUENCE_LEN))

for data in train_dataloader:
    tokens, labels = data[0], data[1]
    print(tokens.shape, labels.shape)
    print(tokens[0])
    break

torch.Size([32, 22]) torch.Size([32])
tensor([ 17, 926,  66,   6,   0, 615, 107, 265, 108, 631,   0,   0,   0,   6,
        827,   0,   0,   0,   0,   0,   0,   0], device='cuda:0')


In [10]:
NUM_VOCAB = len(vocab)
print(len(vocab))

x, y = next(iter(train_dataloader))
x = x.to(device)
y = y.to(device)

1000


In [11]:
EMBEDDING_DIM = 30
embedding = nn.Embedding(len(vocab), EMBEDDING_DIM).to(device) ## len(vocab)개의 단어들을 EMBEDDING_DIM 크기의 실수 벡터로 변환하는 임베딩을 생성.
print(embedding)

embedding_out = embedding(x)
print(embedding_out.shape)
print(embedding_out[0])

Embedding(1000, 30)
torch.Size([32, 20, 30])
tensor([[ 5.6780e-01, -1.8588e-01, -1.0390e+00, -1.5141e+00,  4.4408e-01,
         -1.3901e-01, -9.3228e-01, -1.7407e+00, -1.9198e+00, -1.3429e+00,
          4.8739e-01, -1.0344e+00,  1.9785e+00, -9.5842e-02, -5.3842e-01,
         -2.1118e-03,  3.5626e-01, -1.4527e+00, -1.8026e-01,  8.5381e-01,
          8.5727e-01, -7.3970e-01, -5.4882e-01,  8.9367e-01,  5.1219e-01,
          7.1578e-01,  6.1685e-01,  1.9030e-01,  1.4914e+00, -1.3061e+00],
        [ 5.6780e-01, -1.8588e-01, -1.0390e+00, -1.5141e+00,  4.4408e-01,
         -1.3901e-01, -9.3228e-01, -1.7407e+00, -1.9198e+00, -1.3429e+00,
          4.8739e-01, -1.0344e+00,  1.9785e+00, -9.5842e-02, -5.3842e-01,
         -2.1118e-03,  3.5626e-01, -1.4527e+00, -1.8026e-01,  8.5381e-01,
          8.5727e-01, -7.3970e-01, -5.4882e-01,  8.9367e-01,  5.1219e-01,
          7.1578e-01,  6.1685e-01,  1.9030e-01,  1.4914e+00, -1.3061e+00],
        [-4.5078e-01,  3.2779e-01, -1.8165e+00, -1.4904e+00, -1.3

In [12]:
HIDDEN_SIZE = 64
NUM_LAYERS = 1
BIDIRECTIONAL = 1
SEQ_LENGTH = x.size(1)

lstm = nn.LSTM(input_size=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, batch_first=True, device=device)
lstm

LSTM(30, 64, batch_first=True)

In [13]:
h_0 = torch.zeros(NUM_LAYERS * BIDIRECTIONAL, SEQ_LENGTH, HIDDEN_SIZE).to(device)
c_0 = torch.zeros(NUM_LAYERS * BIDIRECTIONAL, SEQ_LENGTH, HIDDEN_SIZE).to(device)

In [14]:
lstm_out, (hidden, cell) = lstm(embedding_out)
print(lstm_out.shape)
print(hidden.shape, cell.shape)

torch.Size([32, 20, 64])
torch.Size([1, 32, 64]) torch.Size([1, 32, 64])


In [15]:
def EmbeddingLSTM(x, vocab_size, embedding_dim, hidden_size, bidrectional, num_layers, device):
    print(f"Input Shape : {x.shape}")
    x = x.to(device)
    
    
    embedding = nn.Embedding(vocab_size, embedding_dim, device=device)
    embedding_out = embedding(x)
    print(f"Embedded Input Shape : {embedding_out.shape}")

    lstm = nn.LSTM(input_size=embedding_dim,
                   hidden_size=hidden_size,
                   num_layers=num_layers,
                   batch_first=True,
                   device=device)
    
    bidi = 2 if bidrectional else 1
    out, (h, c) = lstm(embedding_out)
    print(f"Output Shape : {out.shape}")
    print(f"Hidden Shape : {h.shape}")
    print(f"Cell State Shape : {c.shape}")

In [16]:
EmbeddingLSTM(x, vocab_size=len(vocab), embedding_dim=30, hidden_size=64, bidrectional=False, num_layers=2, device=device)

Input Shape : torch.Size([32, 20])
Embedded Input Shape : torch.Size([32, 20, 30])
Output Shape : torch.Size([32, 20, 64])
Hidden Shape : torch.Size([2, 32, 64])
Cell State Shape : torch.Size([2, 32, 64])


In [17]:
class TextClassificationModel(nn.Module):
    def __init__(self, num_classes, vocab_size, embedding_dim, hidden_size, num_layers, bidirectional=True, drop_prob=0.1):
        super(TextClassificationModel, self).__init__()
        self.num_classes = num_classes 
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = 2 if bidirectional else 1
        
        self.embedding = nn.Embedding(num_embeddings=vocab_size, 
                                      embedding_dim=embedding_dim)
        
        self.lstm = nn.LSTM(input_size=embedding_dim, 
                            hidden_size=hidden_size, 
                            num_layers=num_layers, 
                            batch_first=True,
                            bidirectional=bidirectional,
                           )
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.relu = nn.ReLU()
        
        self.fc = nn.Linear(hidden_size*self.bidirectional, hidden_size)
        self.output = nn.Linear(hidden_size, num_classes)
        
    def init_hidden_and_cell_state(self, batch_size, device):
        # LSTM 입력시 초기 Cell 에 대한 가중치 초기화를 진행합니다.
        # (num_layers*bidirectional, batch_size, hidden_size)
        self.hidden_and_cell = (
            torch.zeros(self.num_layers*self.bidirectional, batch_size, self.hidden_size).to(device),
            torch.zeros(self.num_layers*self.bidirectional, batch_size, self.hidden_size).to(device),
        )
        
    def forward(self, x):
        x = self.embedding(x)
        output, (h, c) = self.lstm(x, self.hidden_and_cell)
        # (batch_size, seq_length, hidden_size*bidirectional)
        # last sequence 의 (batch_size, hidden_size*bidirectional)
        h = output[:, -1, :]
        o = self.dropout(h)
        o = self.relu(self.fc(o))
        o = self.dropout(o)
        return self.output(o)

In [18]:
config = {
    'num_classes': 2, 
    'vocab_size': len(vocab),
    'embedding_dim': 16, 
    'hidden_size': 32, 
    'num_layers': 2, 
    'bidirectional': True,
}

model = TextClassificationModel(**config)
model.to(device)

TextClassificationModel(
  (embedding): Embedding(1000, 16)
  (lstm): LSTM(16, 32, num_layers=2, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (relu): ReLU()
  (fc): Linear(in_features=64, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=2, bias=True)
)

In [19]:
# loss 정의: CrossEntropyLoss
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저 정의: bert.paramters()와 learning_rate 설정
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [20]:
def train(model, data_loader, loss_fn, optimizer, device):
    model.train()

    running_loss = 0
    corr = 0
    counts = 0
    prograss_bar = tqdm(data_loader, unit='batch', total=len(data_loader), mininterval=1)
    for idx, (txt, lbl) in enumerate(prograss_bar):
        txt = txt.to(device)
        lbl = lbl.to(device)
        
        optimizer.zero_grad()
        model.init_hidden_and_cell_state(len(txt), device)
        
        output = model(txt)
        
        loss = loss_fn(output, lbl)
        
        loss.backward()
        
        optimizer.step()
        
        output = output.argmax(dim=1)
        
        corr += (output == lbl).sum().item()
        counts += len(lbl)
        
        running_loss += loss.item()
        
        prograss_bar.set_description(f"training loss: {running_loss/(idx+1):.5f}, training accuracy: {corr / counts:.5f}")
    
    acc = corr / len(data_loader.dataset)

    return running_loss / len(data_loader), acc

In [21]:
def evaluate(model, data_loader, loss_fn, device):
    model.eval()
    with torch.no_grad():
        corr = 0
        running_loss = 0
        
        for txt, lbl in data_loader:
            txt = txt.to(device)
            lbl = lbl.to(device)
            model.init_hidden_and_cell_state(len(txt), device)
    
            output = model(txt)
            loss = loss_fn(output, lbl)
            output = output.argmax(dim=1)

            corr += (output == lbl).sum().item()
            running_loss += loss.item()
        
        acc = corr / len(data_loader.dataset)
        return running_loss / len(data_loader), acc

In [23]:
num_epochs = 100
model_name = 'LSTM-Text-Classification'

min_loss = np.inf
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_dataloader, loss_fn, optimizer, device)
    val_loss, val_acc = evaluate(model, valid_dataloader, loss_fn, device)   
    
    if val_loss < min_loss:
        print(f'[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!')
        min_loss = val_loss

        try:
            torch.save(model.state_dict(), f'/home/pervinco/Models/LSTM/{model_name}.pth')
        except:
            os.makedirs('/home/pervinco/Models/LSTM')
    
    print(f'epoch {epoch+1:02d}, loss: {train_loss:.5f}, acc: {train_acc:.5f}, val_loss: {val_loss:.5f}, val_accuracy: {val_acc:.5f}')

training loss: 0.51338, training accuracy: 0.74840: 100%|██████████| 668/668 [00:01<00:00, 469.95batch/s]


[INFO] val_loss has been improved from inf to 0.44937. Saving Model!
epoch 01, loss: 0.51338, acc: 0.74840, val_loss: 0.44937, val_accuracy: 0.78940


training loss: 0.44191, training accuracy: 0.79506: 100%|██████████| 668/668 [00:01<00:00, 459.89batch/s]


[INFO] val_loss has been improved from 0.44937 to 0.42500. Saving Model!
epoch 02, loss: 0.44191, acc: 0.79506, val_loss: 0.42500, val_accuracy: 0.80513


training loss: 0.40298, training accuracy: 0.81556: 100%|██████████| 668/668 [00:01<00:00, 470.75batch/s]


[INFO] val_loss has been improved from 0.42500 to 0.38990. Saving Model!
epoch 03, loss: 0.40298, acc: 0.81556, val_loss: 0.38990, val_accuracy: 0.82497


training loss: 0.37599, training accuracy: 0.83002: 100%|██████████| 668/668 [00:01<00:00, 512.71batch/s]


[INFO] val_loss has been improved from 0.38990 to 0.37807. Saving Model!
epoch 04, loss: 0.37599, acc: 0.83002, val_loss: 0.37807, val_accuracy: 0.83265


training loss: 0.35653, training accuracy: 0.84097: 100%|██████████| 668/668 [00:01<00:00, 470.63batch/s]


[INFO] val_loss has been improved from 0.37807 to 0.36324. Saving Model!
epoch 05, loss: 0.35653, acc: 0.84097, val_loss: 0.36324, val_accuracy: 0.83602


training loss: 0.33982, training accuracy: 0.84902: 100%|██████████| 668/668 [00:01<00:00, 458.80batch/s]


[INFO] val_loss has been improved from 0.36324 to 0.35874. Saving Model!
epoch 06, loss: 0.33982, acc: 0.84902, val_loss: 0.35874, val_accuracy: 0.83901


training loss: 0.32610, training accuracy: 0.85426: 100%|██████████| 668/668 [00:01<00:00, 466.29batch/s]


epoch 07, loss: 0.32610, acc: 0.85426, val_loss: 0.36131, val_accuracy: 0.84051


training loss: 0.31487, training accuracy: 0.85955: 100%|██████████| 668/668 [00:01<00:00, 463.58batch/s]


epoch 08, loss: 0.31487, acc: 0.85955, val_loss: 0.37519, val_accuracy: 0.84425


training loss: 0.30416, training accuracy: 0.86643: 100%|██████████| 668/668 [00:01<00:00, 469.38batch/s]


epoch 09, loss: 0.30416, acc: 0.86643, val_loss: 0.36255, val_accuracy: 0.84388


training loss: 0.29213, training accuracy: 0.87279: 100%|██████████| 668/668 [00:01<00:00, 474.82batch/s]


epoch 10, loss: 0.29213, acc: 0.87279, val_loss: 0.38074, val_accuracy: 0.83021


training loss: 0.28369, training accuracy: 0.87607: 100%|██████████| 668/668 [00:01<00:00, 475.16batch/s]


epoch 11, loss: 0.28369, acc: 0.87607, val_loss: 0.36825, val_accuracy: 0.84238


training loss: 0.27158, training accuracy: 0.87930: 100%|██████████| 668/668 [00:01<00:00, 494.78batch/s]


epoch 12, loss: 0.27158, acc: 0.87930, val_loss: 0.38200, val_accuracy: 0.83714


training loss: 0.26138, training accuracy: 0.88492: 100%|██████████| 668/668 [00:01<00:00, 485.01batch/s]


epoch 13, loss: 0.26138, acc: 0.88492, val_loss: 0.37929, val_accuracy: 0.83677


training loss: 0.24944, training accuracy: 0.88988: 100%|██████████| 668/668 [00:01<00:00, 525.40batch/s]


epoch 14, loss: 0.24944, acc: 0.88988, val_loss: 0.39755, val_accuracy: 0.84145


training loss: 0.23973, training accuracy: 0.89811: 100%|██████████| 668/668 [00:01<00:00, 510.28batch/s]


epoch 15, loss: 0.23973, acc: 0.89811, val_loss: 0.40683, val_accuracy: 0.83976


training loss: 0.22721, training accuracy: 0.90293: 100%|██████████| 668/668 [00:01<00:00, 473.75batch/s]


epoch 16, loss: 0.22721, acc: 0.90293, val_loss: 0.43756, val_accuracy: 0.83920


training loss: 0.21675, training accuracy: 0.90710: 100%|██████████| 668/668 [00:01<00:00, 475.81batch/s]


epoch 17, loss: 0.21675, acc: 0.90710, val_loss: 0.41442, val_accuracy: 0.83826


training loss: 0.21023, training accuracy: 0.90958: 100%|██████████| 668/668 [00:01<00:00, 486.94batch/s]


epoch 18, loss: 0.21023, acc: 0.90958, val_loss: 0.43847, val_accuracy: 0.83789


training loss: 0.20320, training accuracy: 0.91412: 100%|██████████| 668/668 [00:01<00:00, 472.70batch/s]


epoch 19, loss: 0.20320, acc: 0.91412, val_loss: 0.46468, val_accuracy: 0.83714


training loss: 0.19123, training accuracy: 0.91852: 100%|██████████| 668/668 [00:01<00:00, 475.30batch/s]


epoch 20, loss: 0.19123, acc: 0.91852, val_loss: 0.44767, val_accuracy: 0.83920


training loss: 0.18419, training accuracy: 0.92044: 100%|██████████| 668/668 [00:01<00:00, 488.35batch/s]


epoch 21, loss: 0.18419, acc: 0.92044, val_loss: 0.47779, val_accuracy: 0.83826


training loss: 0.17625, training accuracy: 0.92582: 100%|██████████| 668/668 [00:01<00:00, 489.19batch/s]


epoch 22, loss: 0.17625, acc: 0.92582, val_loss: 0.53261, val_accuracy: 0.83620


training loss: 0.16986, training accuracy: 0.92928: 100%|██████████| 668/668 [00:01<00:00, 492.80batch/s]


epoch 23, loss: 0.16986, acc: 0.92928, val_loss: 0.53854, val_accuracy: 0.82984


training loss: 0.16016, training accuracy: 0.93265: 100%|██████████| 668/668 [00:01<00:00, 485.12batch/s]


epoch 24, loss: 0.16016, acc: 0.93265, val_loss: 0.55317, val_accuracy: 0.83396


training loss: 0.15746, training accuracy: 0.93331: 100%|██████████| 668/668 [00:01<00:00, 460.16batch/s]


epoch 25, loss: 0.15746, acc: 0.93331, val_loss: 0.55691, val_accuracy: 0.83265


training loss: 0.14572, training accuracy: 0.93958: 100%|██████████| 668/668 [00:01<00:00, 471.80batch/s]


epoch 26, loss: 0.14572, acc: 0.93958, val_loss: 0.63120, val_accuracy: 0.83096


training loss: 0.14465, training accuracy: 0.93897: 100%|██████████| 668/668 [00:01<00:00, 461.38batch/s]


epoch 27, loss: 0.14465, acc: 0.93897, val_loss: 0.56265, val_accuracy: 0.83152


training loss: 0.13737, training accuracy: 0.94389: 100%|██████████| 668/668 [00:01<00:00, 457.03batch/s]


epoch 28, loss: 0.13737, acc: 0.94389, val_loss: 0.56743, val_accuracy: 0.83059


training loss: 0.13118, training accuracy: 0.94604: 100%|██████████| 668/668 [00:01<00:00, 472.26batch/s]


epoch 29, loss: 0.13118, acc: 0.94604, val_loss: 0.67143, val_accuracy: 0.82946


training loss: 0.12759, training accuracy: 0.94716: 100%|██████████| 668/668 [00:01<00:00, 484.98batch/s]


epoch 30, loss: 0.12759, acc: 0.94716, val_loss: 0.63170, val_accuracy: 0.83265


training loss: 0.12434, training accuracy: 0.94828: 100%|██████████| 668/668 [00:01<00:00, 465.68batch/s]


epoch 31, loss: 0.12434, acc: 0.94828, val_loss: 0.65236, val_accuracy: 0.83040


training loss: 0.11533, training accuracy: 0.95179: 100%|██████████| 668/668 [00:01<00:00, 464.62batch/s]


epoch 32, loss: 0.11533, acc: 0.95179, val_loss: 0.65434, val_accuracy: 0.82909


training loss: 0.11723, training accuracy: 0.95161: 100%|██████████| 668/668 [00:01<00:00, 485.63batch/s]


epoch 33, loss: 0.11723, acc: 0.95161, val_loss: 0.69722, val_accuracy: 0.82909


training loss: 0.11166, training accuracy: 0.95470: 100%|██████████| 668/668 [00:01<00:00, 481.04batch/s]


epoch 34, loss: 0.11166, acc: 0.95470, val_loss: 0.70454, val_accuracy: 0.83246


training loss: 0.10915, training accuracy: 0.95554: 100%|██████████| 668/668 [00:01<00:00, 471.51batch/s]


epoch 35, loss: 0.10915, acc: 0.95554, val_loss: 0.68944, val_accuracy: 0.83134


training loss: 0.10246, training accuracy: 0.95671: 100%|██████████| 668/668 [00:01<00:00, 473.89batch/s]


epoch 36, loss: 0.10246, acc: 0.95671, val_loss: 0.66513, val_accuracy: 0.83040


training loss: 0.10184, training accuracy: 0.95802: 100%|██████████| 668/668 [00:01<00:00, 470.88batch/s]


epoch 37, loss: 0.10184, acc: 0.95802, val_loss: 0.71646, val_accuracy: 0.83152


training loss: 0.09749, training accuracy: 0.96083: 100%|██████████| 668/668 [00:01<00:00, 475.94batch/s]


epoch 38, loss: 0.09749, acc: 0.96083, val_loss: 0.78535, val_accuracy: 0.82591


training loss: 0.09487, training accuracy: 0.96069: 100%|██████████| 668/668 [00:01<00:00, 492.23batch/s]


epoch 39, loss: 0.09487, acc: 0.96069, val_loss: 0.75646, val_accuracy: 0.82254


training loss: 0.09006, training accuracy: 0.96279: 100%|██████████| 668/668 [00:01<00:00, 472.93batch/s]


epoch 40, loss: 0.09006, acc: 0.96279, val_loss: 0.76735, val_accuracy: 0.83059


training loss: 0.09250, training accuracy: 0.96148: 100%|██████████| 668/668 [00:01<00:00, 476.72batch/s]


epoch 41, loss: 0.09250, acc: 0.96148, val_loss: 0.69540, val_accuracy: 0.82909


training loss: 0.08379, training accuracy: 0.96593: 100%|██████████| 668/668 [00:01<00:00, 485.69batch/s]


epoch 42, loss: 0.08379, acc: 0.96593, val_loss: 0.78143, val_accuracy: 0.82516


training loss: 0.09006, training accuracy: 0.96218: 100%|██████████| 668/668 [00:01<00:00, 529.26batch/s]


epoch 43, loss: 0.09006, acc: 0.96218, val_loss: 0.78868, val_accuracy: 0.82834


training loss: 0.08288, training accuracy: 0.96481: 100%|██████████| 668/668 [00:01<00:00, 480.95batch/s]


epoch 44, loss: 0.08288, acc: 0.96481, val_loss: 0.90844, val_accuracy: 0.82216


training loss: 0.07748, training accuracy: 0.96785: 100%|██████████| 668/668 [00:01<00:00, 477.06batch/s]


epoch 45, loss: 0.07748, acc: 0.96785, val_loss: 0.90688, val_accuracy: 0.82666


training loss: 0.07012, training accuracy: 0.96991: 100%|██████████| 668/668 [00:01<00:00, 490.97batch/s]


epoch 46, loss: 0.07012, acc: 0.96991, val_loss: 0.83690, val_accuracy: 0.82741


training loss: 0.07225, training accuracy: 0.97005: 100%|██████████| 668/668 [00:01<00:00, 478.95batch/s]


epoch 47, loss: 0.07225, acc: 0.97005, val_loss: 0.86883, val_accuracy: 0.82872


training loss: 0.07206, training accuracy: 0.96902: 100%|██████████| 668/668 [00:01<00:00, 494.43batch/s]


epoch 48, loss: 0.07206, acc: 0.96902, val_loss: 0.95939, val_accuracy: 0.82778


training loss: 0.07102, training accuracy: 0.97164: 100%|██████████| 668/668 [00:01<00:00, 481.76batch/s]


epoch 49, loss: 0.07102, acc: 0.97164, val_loss: 0.88788, val_accuracy: 0.82273


training loss: 0.06544, training accuracy: 0.97234: 100%|██████████| 668/668 [00:01<00:00, 495.41batch/s]


epoch 50, loss: 0.06544, acc: 0.97234, val_loss: 1.02424, val_accuracy: 0.82647


training loss: 0.06544, training accuracy: 0.97150: 100%|██████████| 668/668 [00:01<00:00, 487.16batch/s]


epoch 51, loss: 0.06544, acc: 0.97150, val_loss: 0.92731, val_accuracy: 0.82478


training loss: 0.06916, training accuracy: 0.97183: 100%|██████████| 668/668 [00:01<00:00, 480.47batch/s]


epoch 52, loss: 0.06916, acc: 0.97183, val_loss: 0.92492, val_accuracy: 0.80850


training loss: 0.06927, training accuracy: 0.97159: 100%|██████████| 668/668 [00:01<00:00, 483.60batch/s]


epoch 53, loss: 0.06927, acc: 0.97159, val_loss: 1.05229, val_accuracy: 0.82179


training loss: 0.06134, training accuracy: 0.97487: 100%|██████████| 668/668 [00:01<00:00, 476.58batch/s]


epoch 54, loss: 0.06134, acc: 0.97487, val_loss: 0.99010, val_accuracy: 0.82010


training loss: 0.05590, training accuracy: 0.97613: 100%|██████████| 668/668 [00:01<00:00, 478.44batch/s]


epoch 55, loss: 0.05590, acc: 0.97613, val_loss: 1.09195, val_accuracy: 0.82628


training loss: 0.06379, training accuracy: 0.97398: 100%|██████████| 668/668 [00:01<00:00, 481.59batch/s]


epoch 56, loss: 0.06379, acc: 0.97398, val_loss: 0.84133, val_accuracy: 0.82067


training loss: 0.05746, training accuracy: 0.97618: 100%|██████████| 668/668 [00:01<00:00, 492.50batch/s]


epoch 57, loss: 0.05746, acc: 0.97618, val_loss: 1.03810, val_accuracy: 0.82067


training loss: 0.05821, training accuracy: 0.97580: 100%|██████████| 668/668 [00:01<00:00, 465.07batch/s]


epoch 58, loss: 0.05821, acc: 0.97580, val_loss: 0.97127, val_accuracy: 0.81917


training loss: 0.05367, training accuracy: 0.97768: 100%|██████████| 668/668 [00:01<00:00, 458.53batch/s]


epoch 59, loss: 0.05367, acc: 0.97768, val_loss: 1.03248, val_accuracy: 0.82310


training loss: 0.05294, training accuracy: 0.97697: 100%|██████████| 668/668 [00:01<00:00, 466.44batch/s]


epoch 60, loss: 0.05294, acc: 0.97697, val_loss: 1.04257, val_accuracy: 0.82067


training loss: 0.05214, training accuracy: 0.97754: 100%|██████████| 668/668 [00:01<00:00, 456.02batch/s]


epoch 61, loss: 0.05214, acc: 0.97754, val_loss: 1.03970, val_accuracy: 0.82048


training loss: 0.05544, training accuracy: 0.97683: 100%|██████████| 668/668 [00:01<00:00, 472.93batch/s]


epoch 62, loss: 0.05544, acc: 0.97683, val_loss: 1.08753, val_accuracy: 0.81037


training loss: 0.05283, training accuracy: 0.97777: 100%|██████████| 668/668 [00:01<00:00, 461.67batch/s]


epoch 63, loss: 0.05283, acc: 0.97777, val_loss: 0.96844, val_accuracy: 0.82366


training loss: 0.04600, training accuracy: 0.98020: 100%|██████████| 668/668 [00:01<00:00, 453.82batch/s]


epoch 64, loss: 0.04600, acc: 0.98020, val_loss: 1.17520, val_accuracy: 0.81992


training loss: 0.05285, training accuracy: 0.97828: 100%|██████████| 668/668 [00:01<00:00, 457.98batch/s]


epoch 65, loss: 0.05285, acc: 0.97828, val_loss: 1.02435, val_accuracy: 0.82273


training loss: 0.04718, training accuracy: 0.97988: 100%|██████████| 668/668 [00:01<00:00, 472.14batch/s]


epoch 66, loss: 0.04718, acc: 0.97988, val_loss: 1.04197, val_accuracy: 0.81674


training loss: 0.04423, training accuracy: 0.98119: 100%|██████████| 668/668 [00:01<00:00, 466.53batch/s]


epoch 67, loss: 0.04423, acc: 0.98119, val_loss: 1.00192, val_accuracy: 0.81823


training loss: 0.04308, training accuracy: 0.98062: 100%|██████████| 668/668 [00:01<00:00, 468.04batch/s]


epoch 68, loss: 0.04308, acc: 0.98062, val_loss: 1.27752, val_accuracy: 0.82797


training loss: 0.04912, training accuracy: 0.98030: 100%|██████████| 668/668 [00:01<00:00, 468.81batch/s]


epoch 69, loss: 0.04912, acc: 0.98030, val_loss: 1.08411, val_accuracy: 0.82048


training loss: 0.04443, training accuracy: 0.98067: 100%|██████████| 668/668 [00:01<00:00, 488.50batch/s]


epoch 70, loss: 0.04443, acc: 0.98067, val_loss: 1.19051, val_accuracy: 0.81954


training loss: 0.04514, training accuracy: 0.98109: 100%|██████████| 668/668 [00:01<00:00, 470.89batch/s]


epoch 71, loss: 0.04514, acc: 0.98109, val_loss: 1.19403, val_accuracy: 0.82216


training loss: 0.03810, training accuracy: 0.98315: 100%|██████████| 668/668 [00:01<00:00, 476.00batch/s]


epoch 72, loss: 0.03810, acc: 0.98315, val_loss: 1.21050, val_accuracy: 0.81973


training loss: 0.04456, training accuracy: 0.98170: 100%|██████████| 668/668 [00:01<00:00, 480.10batch/s]


epoch 73, loss: 0.04456, acc: 0.98170, val_loss: 1.09296, val_accuracy: 0.82235


training loss: 0.03984, training accuracy: 0.98282: 100%|██████████| 668/668 [00:01<00:00, 492.79batch/s]


epoch 74, loss: 0.03984, acc: 0.98282, val_loss: 1.12271, val_accuracy: 0.82422


training loss: 0.04254, training accuracy: 0.98250: 100%|██████████| 668/668 [00:01<00:00, 489.03batch/s]


epoch 75, loss: 0.04254, acc: 0.98250, val_loss: 1.10979, val_accuracy: 0.82216


training loss: 0.04057, training accuracy: 0.98301: 100%|██████████| 668/668 [00:01<00:00, 479.93batch/s]


epoch 76, loss: 0.04057, acc: 0.98301, val_loss: 0.95434, val_accuracy: 0.82067


training loss: 0.03818, training accuracy: 0.98371: 100%|██████████| 668/668 [00:01<00:00, 468.21batch/s]


epoch 77, loss: 0.03818, acc: 0.98371, val_loss: 1.19322, val_accuracy: 0.82516


training loss: 0.03286, training accuracy: 0.98559: 100%|██████████| 668/668 [00:01<00:00, 465.67batch/s]


epoch 78, loss: 0.03286, acc: 0.98559, val_loss: 1.28870, val_accuracy: 0.82460


training loss: 0.03341, training accuracy: 0.98573: 100%|██████████| 668/668 [00:01<00:00, 512.48batch/s]


epoch 79, loss: 0.03341, acc: 0.98573, val_loss: 1.19445, val_accuracy: 0.82235


training loss: 0.04270, training accuracy: 0.98306: 100%|██████████| 668/668 [00:01<00:00, 502.58batch/s]


epoch 80, loss: 0.04270, acc: 0.98306, val_loss: 1.23823, val_accuracy: 0.82310


training loss: 0.04175, training accuracy: 0.98343: 100%|██████████| 668/668 [00:01<00:00, 480.33batch/s]


epoch 81, loss: 0.04175, acc: 0.98343, val_loss: 0.95974, val_accuracy: 0.82759


training loss: 0.03234, training accuracy: 0.98690: 100%|██████████| 668/668 [00:01<00:00, 479.63batch/s]


epoch 82, loss: 0.03234, acc: 0.98690, val_loss: 1.22415, val_accuracy: 0.82123


training loss: 0.03928, training accuracy: 0.98502: 100%|██████████| 668/668 [00:01<00:00, 464.33batch/s]


epoch 83, loss: 0.03928, acc: 0.98502, val_loss: 1.06197, val_accuracy: 0.82778


training loss: 0.03471, training accuracy: 0.98605: 100%|██████████| 668/668 [00:01<00:00, 457.00batch/s]


epoch 84, loss: 0.03471, acc: 0.98605, val_loss: 1.11476, val_accuracy: 0.82366


training loss: 0.03614, training accuracy: 0.98573: 100%|██████████| 668/668 [00:01<00:00, 467.54batch/s]


epoch 85, loss: 0.03614, acc: 0.98573, val_loss: 1.09008, val_accuracy: 0.82104


training loss: 0.02888, training accuracy: 0.98769: 100%|██████████| 668/668 [00:01<00:00, 489.27batch/s]


epoch 86, loss: 0.02888, acc: 0.98769, val_loss: 1.25917, val_accuracy: 0.82535


training loss: 0.03348, training accuracy: 0.98563: 100%|██████████| 668/668 [00:01<00:00, 495.60batch/s]


epoch 87, loss: 0.03348, acc: 0.98563, val_loss: 1.11567, val_accuracy: 0.81805


training loss: 0.03389, training accuracy: 0.98596: 100%|██████████| 668/668 [00:01<00:00, 528.45batch/s]


epoch 88, loss: 0.03389, acc: 0.98596, val_loss: 1.03422, val_accuracy: 0.82179


training loss: 0.03353, training accuracy: 0.98643: 100%|██████████| 668/668 [00:01<00:00, 491.28batch/s]


epoch 89, loss: 0.03353, acc: 0.98643, val_loss: 1.04326, val_accuracy: 0.81805


training loss: 0.03152, training accuracy: 0.98694: 100%|██████████| 668/668 [00:01<00:00, 465.72batch/s]


epoch 90, loss: 0.03152, acc: 0.98694, val_loss: 1.07751, val_accuracy: 0.81954


training loss: 0.02991, training accuracy: 0.98713: 100%|██████████| 668/668 [00:01<00:00, 459.10batch/s]


epoch 91, loss: 0.02991, acc: 0.98713, val_loss: 1.20988, val_accuracy: 0.82460


training loss: 0.03034, training accuracy: 0.98666: 100%|██████████| 668/668 [00:01<00:00, 454.09batch/s]


epoch 92, loss: 0.03034, acc: 0.98666, val_loss: 1.45751, val_accuracy: 0.82198


training loss: 0.02530, training accuracy: 0.98942: 100%|██████████| 668/668 [00:01<00:00, 460.97batch/s]


epoch 93, loss: 0.02530, acc: 0.98942, val_loss: 1.36401, val_accuracy: 0.82198


training loss: 0.02968, training accuracy: 0.98783: 100%|██████████| 668/668 [00:01<00:00, 466.04batch/s]


epoch 94, loss: 0.02968, acc: 0.98783, val_loss: 1.20617, val_accuracy: 0.82010


training loss: 0.03434, training accuracy: 0.98652: 100%|██████████| 668/668 [00:01<00:00, 458.78batch/s]


epoch 95, loss: 0.03434, acc: 0.98652, val_loss: 1.12470, val_accuracy: 0.82478


training loss: 0.02700, training accuracy: 0.98895: 100%|██████████| 668/668 [00:01<00:00, 455.38batch/s]


epoch 96, loss: 0.02700, acc: 0.98895, val_loss: 1.32643, val_accuracy: 0.82553


training loss: 0.02820, training accuracy: 0.98778: 100%|██████████| 668/668 [00:01<00:00, 458.92batch/s]


epoch 97, loss: 0.02820, acc: 0.98778, val_loss: 1.12741, val_accuracy: 0.82029


training loss: 0.03026, training accuracy: 0.98760: 100%|██████████| 668/668 [00:01<00:00, 472.46batch/s]


epoch 98, loss: 0.03026, acc: 0.98760, val_loss: 1.19646, val_accuracy: 0.82123


training loss: 0.02765, training accuracy: 0.98867: 100%|██████████| 668/668 [00:01<00:00, 480.57batch/s]


epoch 99, loss: 0.02765, acc: 0.98867, val_loss: 1.21770, val_accuracy: 0.82404


training loss: 0.02795, training accuracy: 0.98839: 100%|██████████| 668/668 [00:01<00:00, 452.33batch/s]


epoch 100, loss: 0.02795, acc: 0.98839, val_loss: 1.30863, val_accuracy: 0.81954
