In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
# Let's write the tokenization function 

import spacy
import string
import pickle as pkl
import torch

# Load English tokenizer, tagger, parser, NER and word vectors
tokenizer = spacy.load('en_core_web_sm')
punctuations = string.punctuation

# lowercase and remove punctuation
def tokenize(sent):
    tokens = tokenizer(sent)
    return [token.text.lower() for token in tokens if (token.text not in punctuations)]

# Example
tokens = tokenize(u'Apple is looking at buying U.K. startup for $1 billion.')
print (tokens)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

['apple', 'is', 'looking', 'at', 'buying', 'u.k.', 'startup', 'for', '1', 'billion']


In [0]:
# This is the code cell that tokenizes train/val/test datasets
# However it takes about 15-20 minutes to run it
# For convinience we have provided the preprocessed datasets
# Please see the next code cell


def tokenize_dataset(dataset):
    sentence1_tokens_dataset = []
    sentence2_tokens_dataset = []
    # we are keeping track of all tokens in dataset 
    # in order to create vocabulary later
    all_tokens = []
    label_list = []
    for sample in dataset:
        sentence1, sentence2, label = sample.split("\t")
        if label[-1] == '\n':
            label = label[:-1]
        sentence1_tokens = tokenize(sentence1)
        sentence2_tokens = tokenize(sentence2)
        sentence1_tokens_dataset.append(sentence1_tokens)
        sentence2_tokens_dataset.append(sentence2_tokens)
        label_list.append(label)
        all_tokens += sentence1_tokens
        all_tokens += sentence2_tokens

    return sentence1_tokens_dataset, sentence2_tokens_dataset, label_list, all_tokens

#val set tokens
print ("Tokenizing val data")
val_data = open("/content/drive/My Drive/NLP_HW/HW1/snli_val.tsv","r")
sentence1_val_tokens_dataset, sentence2_val_tokens_dataset, snli_val_label_list, _ = tokenize_dataset(val_data)
pkl.dump(sentence1_val_tokens_dataset, open("/content/drive/My Drive/NLP_HW/HW1/snli_val_sentence1_tokens.p", "wb"))
pkl.dump(sentence2_val_tokens_dataset, open("/content/drive/My Drive/NLP_HW/HW1/snli_val_sentence2_tokens.p", "wb"))
pkl.dump(snli_val_label_list, open("/content/drive/My Drive/NLP_HW/HW1/snli_val_label_list.p", "wb"))

#train set tokens
print ("Tokenizing train data")
train_data = open("/content/drive/My Drive/NLP_HW/HW1/snli_train.tsv","r")
sentence1_train_tokens_dataset, sentence2_train_tokens_dataset, snli_train_label_list, snli_train_tokens = tokenize_dataset(train_data)
pkl.dump(sentence1_train_tokens_dataset, open("/content/drive/My Drive/NLP_HW/HW1/snli_train_sentence1_tokens.p", "wb"))
pkl.dump(sentence2_train_tokens_dataset, open("/content/drive/My Drive/NLP_HW/HW1/snli_train_sentence2_tokens.p", "wb"))
pkl.dump(snli_train_label_list, open("/content/drive/My Drive/NLP_HW/HW1/snli_train_label_list.p", "wb"))
pkl.dump(snli_train_tokens, open("/content/drive/My Drive/NLP_HW/HW1/snli_train_tokens.p", "wb"))

In [0]:
sentence1_train_tokens_dataset = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_train_sentence1_tokens.p", "rb"))
sentence2_train_tokens_dataset = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_train_sentence2_tokens.p", "rb"))
snli_train_label_list = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_train_label_list.p", "rb"))
snli_train_tokens = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_train_tokens.p", "rb"))

sentence1_val_tokens_dataset = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_val_sentence1_tokens.p", "rb"))
sentence2_val_tokens_dataset = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_val_sentence2_tokens.p", "rb"))
snli_val_label_list = pkl.load(open("/content/drive/My Drive/NLP_HW/HW1/snli_val_label_list.p", "rb"))

In [0]:
train_target = []
val_target = []
target_dict = {'contradiction':0, 'entailment':1, 'neutral':2}
for target in snli_train_label_list[1:]:
    train_target.append(target_dict[target])
for target in snli_val_label_list[1:]:
    val_target.append(target_dict[target])

In [0]:
MAX_SENTENCE_LENGTH = 20

In [0]:

import numpy as np
import torch
from torch.utils.data import Dataset

class SNLIDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """
    
    def __init__(self, sentence1_list, sentence2_list, target_list):
        """
        @param data_list: list of newsgroup tokens 
        @param target_list: list of newsgroup targets 

        """
        self.sentence1_list = sentence1_list
        self.sentence2_list = sentence2_list
        self.target_list = target_list
        assert (len(self.sentence1_list) == len(self.target_list))

    def __len__(self):
        return len(self.target_list)
        
    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        
        token1_idx = self.sentence1_list[key][:MAX_SENTENCE_LENGTH]
        token2_idx = self.sentence2_list[key][:MAX_SENTENCE_LENGTH]
        label = self.target_list[key]
        return [token1_idx, token2_idx, len(token1_idx), len(token2_idx), label]



In [0]:
from collections import Counter

# save index 0 for unk and 1 for pad
PAD_IDX = 0
UNK_IDX = 1

def build_vocab(all_tokens, max_vocab_size=10**4):
    # Returns:
    # id2token: list of tokens, where id2token[i] returns token that corresponds to token i
    # token2id: dictionary where keys represent tokens and corresponding values represent indices
    token_counter = Counter(all_tokens)
    vocab, count = zip(*token_counter.most_common(max_vocab_size))
    id2token = list(vocab)
    token2id = dict(zip(vocab, range(2,2+len(vocab)))) 
    id2token = ['<pad>', '<unk>'] + id2token
    token2id['<pad>'] = PAD_IDX 
    token2id['<unk>'] = UNK_IDX
    return token2id, id2token

token2id, id2token = build_vocab(snli_train_tokens)

In [9]:
# Lets check the dictionary by loading random token from it
import random

random_token_id = random.randint(0, len(id2token)-1)
random_token = id2token[random_token_id]

print ("Token id {} ; token {}".format(random_token_id, id2token[random_token_id]))
print ("Token {}; token id {}".format(random_token, token2id[random_token]))

Token id 9845 ; token picasso
Token picasso; token id 9845


In [10]:
# convert token to id in the dataset
def token2index_dataset(tokens_data):
    indices_data = []
    for tokens in tokens_data:
        index_list = [token2id[token] if token in token2id else UNK_IDX for token in tokens]
        indices_data.append(index_list)
    return indices_data

sent1_train_indices = token2index_dataset(sentence1_train_tokens_dataset)
sent2_train_indices = token2index_dataset(sentence2_train_tokens_dataset)
sent1_val_indices = token2index_dataset(sentence1_val_tokens_dataset)
sent2_val_indices = token2index_dataset(sentence2_val_tokens_dataset)

# double checking
print ("Train dataset size is {}".format(len(sent1_train_indices)))
print ("Train dataset size is {}".format(len(sentence1_train_tokens_dataset)))
print ("Train dataset size is {}".format(len(sent2_train_indices)))
print ("Train dataset size is {}".format(len(sentence2_train_tokens_dataset)))

Train dataset size is 100001
Train dataset size is 100001
Train dataset size is 100001
Train dataset size is 100001


In [0]:
train_dataset = SNLIDataset(sent1_train_indices[1:], sent2_train_indices[1:], train_target)

In [0]:
def snli_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    sent1_list = []
    sent2_list = []
    label_list = []
    length_list_sent1 = []
    length_list_sent2 = []
    #print("collate batch: ", batch[0][0])
    #batch[0][0] = batch[0][0][:MAX_SENTENCE_LENGTH]
    for datum in batch:
        label_list.append(datum[4])
        length_list_sent1.append(datum[2])
        length_list_sent2.append(datum[3])
    # padding
    for datum in batch:
        padded_vec1 = np.pad(np.array(datum[0]), 
                                pad_width=((0,MAX_SENTENCE_LENGTH-datum[2])), 
                                mode="constant", constant_values=0)
        sent1_list.append(padded_vec1)
        padded_vec2 = np.pad(np.array(datum[1]), 
                                pad_width=((0,MAX_SENTENCE_LENGTH-datum[3])), 
                                mode="constant", constant_values=0)
        sent2_list.append(padded_vec2)
    return [torch.from_numpy(np.array(sent1_list)), torch.from_numpy(np.array(sent2_list)), torch.LongTensor(length_list_sent1),torch.LongTensor(length_list_sent2), torch.LongTensor(label_list)]

In [0]:
BATCH_SIZE = 256

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=snli_collate_func,
                                           shuffle=True)

val_dataset = SNLIDataset(sent1_val_indices[1:], sent2_val_indices[1:], val_target)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=snli_collate_func,
                                           shuffle=True)

In [0]:
# First import torch related libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

class BagOfWords_Model(nn.Module):
    """
    BagOfWords classification model
    """
    def __init__(self, vocab_size, emb_dim, nn_ind = False, interaction_type = 'cat'):
        """
        @param vocab_size: size of the vocabulary. 
        @param emb_dim: size of the word embedding
        """
        super(BagOfWords_Model, self).__init__()
        # pay attention to padding_idx 
        self.embed = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.nn_ind = nn_ind
        self.interaction_type = interaction_type
        if self.interaction_type =='cat':
            if nn_ind:
                self.linear1 = nn.Linear(2*emb_dim, emb_dim)
                self.relu1 = nn.ReLU()
                self.linear2 = nn.Linear(emb_dim, int(emb_dim/2))
                self.relu2 = nn.ReLU()
                self.linear3 = nn.Linear(int(emb_dim/2), 3)
            else:
                self.linear = nn.Linear(2*emb_dim,3)
        else:
            if nn_ind:
                self.linear1 = nn.Linear(emb_dim, int(emb_dim/2))
                self.relu1 = nn.ReLU()
                self.linear2 = nn.Linear(int(emb_dim/2),int(emb_dim/4))
                self.relu2 = nn.ReLU()
                self.linear3 = nn.Linear(int(emb_dim/4), 3)
            else:
                self.linear = nn.Linear(emb_dim,3)
    
    def interaction_func(self, sent1, sent2):
        if self.interaction_type == 'sum':
            out = sent1.float() + sent2.float()
        elif self.interaction_type == 'cat':
            out = torch.cat((sent1.float(),sent2.float()),dim=1)
        else:
            out = sent1.float()*sent2.float()
        return out
    
    def forward(self, sent1, sent2, length1, length2):
        """
        
        @param data: matrix of size (batch_size, max_sentence_length). Each row in data represents a 
            review that is represented using n-gram index. Note that they are padded to have same length.
        @param length: an int tensor of size (batch_size), which represents the non-trivial (excludes padding)
            length of each sentences in the data.
        """
        out1 = self.embed(sent1)
        out1 = torch.sum(out1, dim=1)
        out1 /= length1.view(length1.size()[0],1).expand_as(out1).float()
        out2 = self.embed(sent2)
        out2 = torch.sum(out2, dim=1)
        out2 /= length2.view(length2.size()[0],1).expand_as(out2).float()
        out = self.interaction_func(out1, out2)

        # return logits
        
        if self.nn_ind:
            out = self.relu1(self.linear1(out))
            out = self.relu2(self.linear2(out))
            out = self.linear3(out)            
        else:
            out = self.linear(out)
        return out

In [0]:
num_epochs = 10 # number epoch to train

# Function for testing the model
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for sent1, sent2, length1, length2, labels in loader:
            sent1_batch, sent2_batch, length1_batch, length2_batch, label_batch = sent1.to(device), sent2.to(device), length1.to(device), length2.to(device), labels.to(device)
            outputs = F.softmax(model(sent1_batch, sent2_batch, length1_batch, length2_batch), dim=1).to(device)
            _, predicted = torch.max(outputs.data, 1)

            total += label_batch.size(0)
            correct += predicted.eq(label_batch.view_as(predicted)).sum().item()
    return (100 * correct / total)

def train_model(train_loader, val_loader, model, optimizer, criterion, num_epochs):
    for epoch in range(num_epochs):
        train_loss = 0
        total = 0
        i = 0
        for sent1, sent2, length1, length2, labels in train_loader:
            model.train()
            sent1_batch, sent2_batch, length1_batch, length2_batch, label_batch = sent1.to(device), sent2.to(device), length1.to(device), length2.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(sent1_batch, sent2_batch, length1_batch, length2_batch)
            loss = criterion(outputs, label_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            total += label_batch.size(0)
            i += 1
            # validate every 100 iterations
            if i > 0 and i % 100 == 0:
                # validate
                val_acc = test_model(val_loader, model)
                print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}'.format(epoch+1, num_epochs, i+1, len(train_loader), val_acc))
        train_acc = test_model(train_loader, model)
        print("Epoch:{}, Training Loss:{}, Training Acc: {}".format(epoch+1, train_loss/total, train_acc))

In [37]:
emb_dim = 100
model = BagOfWords_Model(len(id2token), emb_dim).to(device)
criterion = torch.nn.CrossEntropyLoss(reduction='sum')  

learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_model(train_loader, val_loader, model, optimizer, criterion, num_epochs)

Epoch: [1/10], Step: [101/391], Validation Acc: 57.3
Epoch: [1/10], Step: [201/391], Validation Acc: 62.1
Epoch: [1/10], Step: [301/391], Validation Acc: 62.3
Epoch:1, Training Loss:0.895773776550293, Training Acc: 65.149
Epoch: [2/10], Step: [101/391], Validation Acc: 61.7
Epoch: [2/10], Step: [201/391], Validation Acc: 63.9
Epoch: [2/10], Step: [301/391], Validation Acc: 63.0
Epoch:2, Training Loss:0.8010885529327393, Training Acc: 68.078
Epoch: [3/10], Step: [101/391], Validation Acc: 62.3
Epoch: [3/10], Step: [201/391], Validation Acc: 62.6
Epoch: [3/10], Step: [301/391], Validation Acc: 61.9
Epoch:3, Training Loss:0.7670704032897949, Training Acc: 69.76
Epoch: [4/10], Step: [101/391], Validation Acc: 60.8
Epoch: [4/10], Step: [201/391], Validation Acc: 63.3
Epoch: [4/10], Step: [301/391], Validation Acc: 61.6
Epoch:4, Training Loss:0.7428371154785156, Training Acc: 70.912
Epoch: [5/10], Step: [101/391], Validation Acc: 62.2
Epoch: [5/10], Step: [201/391], Validation Acc: 61.1
Epoc

In [17]:
emb_dim = 100
model = BagOfWords_Model(len(id2token), emb_dim, interaction_type = 'sum').to(device)
criterion = torch.nn.CrossEntropyLoss(reduction='sum')  

learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_model(train_loader, val_loader, model, optimizer, criterion, num_epochs)

Epoch: [1/10], Step: [101/391], Validation Acc: 50.3
Epoch: [1/10], Step: [201/391], Validation Acc: 53.3
Epoch: [1/10], Step: [301/391], Validation Acc: 55.5
Epoch:1, Training Loss:0.9881934674072266, Training Acc: 58.524
Epoch: [2/10], Step: [101/391], Validation Acc: 56.7
Epoch: [2/10], Step: [201/391], Validation Acc: 56.4
Epoch: [2/10], Step: [301/391], Validation Acc: 56.5
Epoch:2, Training Loss:0.9124954850769043, Training Acc: 60.629
Epoch: [3/10], Step: [101/391], Validation Acc: 55.9
Epoch: [3/10], Step: [201/391], Validation Acc: 56.8
Epoch: [3/10], Step: [301/391], Validation Acc: 56.4
Epoch:3, Training Loss:0.8872130285644532, Training Acc: 62.189
Epoch: [4/10], Step: [101/391], Validation Acc: 57.2
Epoch: [4/10], Step: [201/391], Validation Acc: 56.7
Epoch: [4/10], Step: [301/391], Validation Acc: 58.2
Epoch:4, Training Loss:0.8726772467041015, Training Acc: 62.598
Epoch: [5/10], Step: [101/391], Validation Acc: 57.1
Epoch: [5/10], Step: [201/391], Validation Acc: 55.8
Ep

In [18]:
emb_dim = 100
model = BagOfWords_Model(len(id2token), emb_dim, interaction_type = 'mult').to(device)
criterion = torch.nn.CrossEntropyLoss(reduction='sum')  

learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_model(train_loader, val_loader, model, optimizer, criterion, num_epochs)

Epoch: [1/10], Step: [101/391], Validation Acc: 48.6
Epoch: [1/10], Step: [201/391], Validation Acc: 56.4
Epoch: [1/10], Step: [301/391], Validation Acc: 60.5
Epoch:1, Training Loss:0.9477548596191406, Training Acc: 67.863
Epoch: [2/10], Step: [101/391], Validation Acc: 62.2
Epoch: [2/10], Step: [201/391], Validation Acc: 62.6
Epoch: [2/10], Step: [301/391], Validation Acc: 63.4
Epoch:2, Training Loss:0.7266880146026611, Training Acc: 78.683
Epoch: [3/10], Step: [101/391], Validation Acc: 62.9
Epoch: [3/10], Step: [201/391], Validation Acc: 63.7
Epoch: [3/10], Step: [301/391], Validation Acc: 63.7
Epoch:3, Training Loss:0.5479757135009765, Training Acc: 86.563
Epoch: [4/10], Step: [101/391], Validation Acc: 64.0
Epoch: [4/10], Step: [201/391], Validation Acc: 65.1
Epoch: [4/10], Step: [301/391], Validation Acc: 64.6
Epoch:4, Training Loss:0.3973693204498291, Training Acc: 91.264
Epoch: [5/10], Step: [101/391], Validation Acc: 64.4
Epoch: [5/10], Step: [201/391], Validation Acc: 64.2
Ep

In [38]:
emb_dim = 100
model1 = BagOfWords_Model(len(id2token), emb_dim, nn_ind=True).to(device)
criterion1 = torch.nn.CrossEntropyLoss(reduction='sum')  

learning_rate = 0.01
optimizer1 = torch.optim.Adam(model1.parameters(), lr=learning_rate)
train_model(train_loader, val_loader, model1, optimizer1, criterion1, num_epochs)

Epoch: [1/10], Step: [101/391], Validation Acc: 57.8
Epoch: [1/10], Step: [201/391], Validation Acc: 60.8
Epoch: [1/10], Step: [301/391], Validation Acc: 62.5
Epoch:1, Training Loss:0.8840453399658204, Training Acc: 68.085
Epoch: [2/10], Step: [101/391], Validation Acc: 64.3
Epoch: [2/10], Step: [201/391], Validation Acc: 65.5
Epoch: [2/10], Step: [301/391], Validation Acc: 63.7
Epoch:2, Training Loss:0.7363392852020264, Training Acc: 74.715
Epoch: [3/10], Step: [101/391], Validation Acc: 65.8
Epoch: [3/10], Step: [201/391], Validation Acc: 66.3
Epoch: [3/10], Step: [301/391], Validation Acc: 67.1
Epoch:3, Training Loss:0.6400900885009766, Training Acc: 79.246
Epoch: [4/10], Step: [101/391], Validation Acc: 64.4
Epoch: [4/10], Step: [201/391], Validation Acc: 66.1
Epoch: [4/10], Step: [301/391], Validation Acc: 66.7
Epoch:4, Training Loss:0.5474358529663086, Training Acc: 82.701
Epoch: [5/10], Step: [101/391], Validation Acc: 66.2
Epoch: [5/10], Step: [201/391], Validation Acc: 66.1
Ep

In [39]:
emb_dim = 100
model1 = BagOfWords_Model(len(id2token), emb_dim, nn_ind=True, interaction_type = 'sum').to(device)
criterion1 = torch.nn.CrossEntropyLoss(reduction='sum')  

learning_rate = 0.01
optimizer1 = torch.optim.Adam(model1.parameters(), lr=learning_rate)
train_model(train_loader, val_loader, model1, optimizer1, criterion1, num_epochs)

Epoch: [1/10], Step: [101/391], Validation Acc: 52.3
Epoch: [1/10], Step: [201/391], Validation Acc: 55.6
Epoch: [1/10], Step: [301/391], Validation Acc: 55.7
Epoch:1, Training Loss:0.9801254168701172, Training Acc: 60.612
Epoch: [2/10], Step: [101/391], Validation Acc: 57.3
Epoch: [2/10], Step: [201/391], Validation Acc: 56.8
Epoch: [2/10], Step: [301/391], Validation Acc: 57.2
Epoch:2, Training Loss:0.8742438984680175, Training Acc: 65.488
Epoch: [3/10], Step: [101/391], Validation Acc: 57.5
Epoch: [3/10], Step: [201/391], Validation Acc: 57.9
Epoch: [3/10], Step: [301/391], Validation Acc: 57.1
Epoch:3, Training Loss:0.800897191619873, Training Acc: 69.927
Epoch: [4/10], Step: [101/391], Validation Acc: 58.5
Epoch: [4/10], Step: [201/391], Validation Acc: 58.5
Epoch: [4/10], Step: [301/391], Validation Acc: 59.9
Epoch:4, Training Loss:0.7313453771972657, Training Acc: 73.542
Epoch: [5/10], Step: [101/391], Validation Acc: 58.3
Epoch: [5/10], Step: [201/391], Validation Acc: 58.6
Epo

In [20]:
emb_dim = 100
model1 = BagOfWords_Model(len(id2token), emb_dim, nn_ind=True, interaction_type = 'mult').to(device)
criterion1 = torch.nn.CrossEntropyLoss(reduction='sum')  

learning_rate = 0.01
optimizer1 = torch.optim.Adam(model1.parameters(), lr=learning_rate)
train_model(train_loader, val_loader, model1, optimizer1, criterion1, num_epochs)

Epoch: [1/10], Step: [101/391], Validation Acc: 50.8
Epoch: [1/10], Step: [201/391], Validation Acc: 55.9
Epoch: [1/10], Step: [301/391], Validation Acc: 59.5
Epoch:1, Training Loss:0.9530219715881347, Training Acc: 67.455
Epoch: [2/10], Step: [101/391], Validation Acc: 63.7
Epoch: [2/10], Step: [201/391], Validation Acc: 62.6
Epoch: [2/10], Step: [301/391], Validation Acc: 63.9
Epoch:2, Training Loss:0.7300693878173828, Training Acc: 78.423
Epoch: [3/10], Step: [101/391], Validation Acc: 65.0
Epoch: [3/10], Step: [201/391], Validation Acc: 65.1
Epoch: [3/10], Step: [301/391], Validation Acc: 64.1
Epoch:3, Training Loss:0.544019750289917, Training Acc: 85.914
Epoch: [4/10], Step: [101/391], Validation Acc: 64.1
Epoch: [4/10], Step: [201/391], Validation Acc: 64.0
Epoch: [4/10], Step: [301/391], Validation Acc: 65.0
Epoch:4, Training Loss:0.38841200874328613, Training Acc: 90.157
Epoch: [5/10], Step: [101/391], Validation Acc: 63.0
Epoch: [5/10], Step: [201/391], Validation Acc: 63.8
Ep