In [1]:
# define constants
DATA_DIR = "content/UCF101"
LABEL_DIR = "content/UCF101_labels"
CACHE_DIR = "content/cache"
FRAMES_PER_CLIP = 10
IMG_SIZE = 224    # video frames would be resized to IMG_SIZE * IMG_SIZE

# Download UCF101 dataset and extract frames
References


*   https://www.kaggle.com/pevogam/starter-ucf101-with-pytorch
*   https://blog.csdn.net/HW140701/article/details/115864277



In [2]:
import os
import sys
import copy
import subprocess
import numpy as np
from prettytable import PrettyTable
from multiprocessing import Pool
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score

# !pip install av
# import av
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
from torch.nn.utils.rnn import pad_sequence
from torch.optim import Adam
from torchvision import transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader

OS = sys.platform

if not OS == 'win32' and not os.path.exists(CACHE_DIR):
  !mkdir -p $CACHE_DIR
if not OS == 'win32' and not os.path.exists(DATA_DIR):
  !mkdir -p $DATA_DIR
  !wget --no-check-certificate --limit-rate 100M -O UCF101.rar https://www.crcv.ucf.edu/data/UCF101/UCF101.rar
  !unrar x ./UCF101.rar $DATA_DIR > /dev/null
if not OS == 'win32' and not os.path.exists(LABEL_DIR):
  !mkdir $LABEL_DIR
  !wget --no-check-certificate -O UCF101_labels.zip https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip
  !unzip -d $LABEL_DIR ./UCF101_labels.zip > /dev/null

# check if data is ready
if (os.path.exists(DATA_DIR+'/UCF-101/ApplyEyeMakeup') and os.path.exists(LABEL_DIR+'/ucfTrainTestlist')):
  print("ready to go")
else:
  print("Failed to download data\nPlease manually download files from\nhttps://www.crcv.ucf.edu/data/UCF101/UCF101.rar\nand\nhttps://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip")


ready to go


In [3]:
# enable GPU
if torch.cuda.is_available():
  print("Using GPU!")
  device = torch.device("cuda")
else:
  print("Using CPU... this is going to be slow...")
  device = torch.device("cpu")

Using GPU!


In [4]:
# video loader
'''
  params:
    1. the path the video (e.g. "ApplyEyeMakepu/v_ApplyEyeMakeup_g01_c01.avi")
  return:
    sample FRAMES_PER_CLIP frames from the video (evenly distributed along the timeline),
    return a tensor (FRAMES_PER_CLIP x height x width x color_channels) that stores these frames
'''
def video_loader(filename):
  filename = DATA_DIR + '/UCF-101/' + filename
  if not os.path.exists(filename):
    raise Exception("Cannot find file " + filename)
  frames = []
  cap = cv2.VideoCapture(filename)
  while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
      break
    else:
      frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_AREA)
      frames.append(frame)
  cap.release()
  if len(frames) < 1:
    raise Exception("Invalid video")
  if len(frames) > FRAMES_PER_CLIP:
    ratio = len(frames) / FRAMES_PER_CLIP
    frames_ = []
    for i in range(FRAMES_PER_CLIP):
      idx = int(i * ratio)
      frames_.append(frames[idx])
    frames = frames_
  frames = np.stack(frames, axis=0)
  return torch.tensor(frames)

In [5]:
# class mapping
class_to_id = {}
id_to_class = {}
with open(LABEL_DIR+'/ucfTrainTestlist/classInd.txt', 'r') as f:
  for line in f:
    line = line.split()
    line[0] = int(line[0]) - 1
    class_to_id[line[1]] = line[0]
    id_to_class[line[0]] = line[1]

In [6]:
# define the UCF101 dataset class
class UCF101(Dataset):
  def __init__(self, _class_to_id, _subset, _video_loader, _transform=None):
    if _subset == 'train':
      train_data = []
      with open(LABEL_DIR+'/ucfTrainTestlist/trainlist01.txt', 'r') as f1:
        for i, line in enumerate(f1):
          if i % 5 == 0:
            # save that for dev set
            continue
          line = line.split()
          train_data.append((int(line[1])-1, line[0]))   # (caption, video_filename)
      self.data = train_data
      f1.close()
    elif _subset == 'test':
      test_data = []
      with open(LABEL_DIR+'/ucfTrainTestlist/testlist01.txt', 'r') as f1:
        for line in f1:
          line_ = line.split('/')
          test_data.append((_class_to_id[line_[0]], line.strip()))
      self.data = test_data
      f1.close()
    elif _subset == 'dev':
      dev_data = []
      with open(LABEL_DIR+'/ucfTrainTestlist/trainlist01.txt', 'r') as f1:
        for i, line in enumerate(f1):
          if i % 5 != 0:
            # the sample is already in training set
            continue
          line = line.split()
          dev_data.append((int(line[1])-1, line[0]))
      self.data = dev_data
      f1.close()
    else:
      raise Exception("_subset should have value 'train', 'test', or 'dev'")
    self.video_loader = _video_loader
    self.transform = _transform
    
  def __len__(self):
    return len(self.data)
    
  def __getitem__(self, idx):
    res = self.data[idx]
    enc_video = self.video_loader(res[1])
    if self.transform is not None:
      enc_video = self.transform(enc_video)
    return (res[0], enc_video)

In [7]:
def custom_collate(batch):
  captions, frames = [], []
  for caption, frame in batch:
    captions.append(caption)    # label of current video sample
    frames.append(frame)        # sampled sequence of frames from the video
  return (
    torch.tensor(captions),
    pad_sequence(frames, batch_first=True)
  )

In [8]:
def tfs(enc_video):
  enc_video = torch.permute(enc_video, [0, 3, 1, 2]).float() / 255
  transfrom = torch.nn.Sequential(
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
  )
  return enc_video

# Load CoAtNet image classifier
Model:
*   CoAtNet (Zihang Dai, et al. 2021)

References:
*   <a href="https://arxiv.org/abs/2106.04803">Research Paper</a>
*   <a href="https://github.com/chinhsuanwu/coatnet-pytorch/blob/master/coatnet.py">Code</a>

In [9]:
# download model
if not OS == 'win32' and not os.path.exists('model'):
  !mkdir -p model/coatnet
  !wget -O model/coatnet/. --no-check-certificate https://github.com/chinhsuanwu/coatnet-pytorch/raw/master/coatnet.py
from model.coatnet.coatnet import CoAtNet

# Encoder + Decoder

In [9]:
'''
  Reference:
  This model is developed based on
  https://github.com/HHTseng/video-classification
'''


class ResCNNEncoder(nn.Module):
    def __init__(self, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=300):
        """Load the pretrained ResNet-152 and replace top fc layer."""
        super(ResCNNEncoder, self).__init__()

        self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
        self.drop_p = drop_p

        resnet = models.resnet152(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.resnet = nn.Sequential(*modules)
        self.fc1 = nn.Linear(resnet.fc.in_features, fc_hidden1)
        self.bn1 = nn.BatchNorm1d(fc_hidden1, momentum=0.01)
        self.fc2 = nn.Linear(fc_hidden1, fc_hidden2)
        self.bn2 = nn.BatchNorm1d(fc_hidden2, momentum=0.01)
        self.fc3 = nn.Linear(fc_hidden2, CNN_embed_dim)
        
    def forward(self, x_3d):
        cnn_embed_seq = []
        for t in range(x_3d.size(1)):
            # ResNet CNN
            with torch.no_grad():
                x = self.resnet(x_3d[:, t, :, :, :])  # ResNet
                x = x.view(x.size(0), -1)             # flatten output of conv

            # FC layers
            x = self.bn1(self.fc1(x))
            x = F.relu(x)
            x = self.bn2(self.fc2(x))
            x = F.relu(x)
            x = F.dropout(x, p=self.drop_p, training=self.training)
            x = self.fc3(x)

            cnn_embed_seq.append(x)

        # swap time and sample dim such that (sample dim, time dim, CNN latent dim)
        cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
        # cnn_embed_seq: shape=(batch, time_step, input_size)

        return cnn_embed_seq

In [11]:
'''
  Reference:
  This model is developed based on
  https://github.com/HHTseng/video-classification
'''


class CoatCNNEncoder(nn.Module):
    def __init__(self, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=300):
        """Load the pretrained ResNet-152 and replace top fc layer."""
        super(CoatCNNEncoder, self).__init__()

        self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
        self.drop_p = drop_p

        coatnet = CoAtNet((IMG_SIZE, IMG_SIZE), 3, [2,2,3,5,2], [64,96,192,384,768], num_classes=len(id_to_class.keys()))
        modules = list(coatnet.children())[:-1]      # delete the last fc layer.
        self.coatnet = nn.Sequential(*modules)
        self.fc1 = nn.Linear(coatnet.fc.in_features, fc_hidden1)
        self.bn1 = nn.BatchNorm1d(fc_hidden1, momentum=0.01)
        self.fc2 = nn.Linear(fc_hidden1, fc_hidden2)
        self.bn2 = nn.BatchNorm1d(fc_hidden2, momentum=0.01)
        self.fc3 = nn.Linear(fc_hidden2, CNN_embed_dim)
        
    def forward(self, x_3d):
        cnn_embed_seq = []
        for t in range(x_3d.size(1)):
            # ResNet CNN
            x = self.coatnet(x_3d[:, t, :, :, :])   # CoAtNet
            x = x.view(x.size(0), -1)               # flatten output of conv

            # FC layers
            x = self.bn1(self.fc1(x))
            x = F.relu(x)
            x = self.bn2(self.fc2(x))
            x = F.relu(x)
            x = F.dropout(x, p=self.drop_p, training=self.training)
            x = self.fc3(x)

            cnn_embed_seq.append(x)

        # swap time and sample dim such that (sample dim, time dim, CNN latent dim)
        cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
        # cnn_embed_seq: shape=(batch, time_step, input_size)

        return cnn_embed_seq

In [10]:
class DecoderRNN(nn.Module):
    def __init__(self, CNN_embed_dim=300, h_RNN_layers=3, h_RNN=256, h_FC_dim=128, drop_p=0.3, num_classes=50):
        super(DecoderRNN, self).__init__()

        self.RNN_input_size = CNN_embed_dim
        self.h_RNN_layers = h_RNN_layers   # RNN hidden layers
        self.h_RNN = h_RNN                 # RNN hidden nodes
        self.h_FC_dim = h_FC_dim
        self.drop_p = drop_p
        self.num_classes = num_classes

        self.LSTM = nn.LSTM(
            input_size=self.RNN_input_size,
            hidden_size=self.h_RNN,        
            num_layers=h_RNN_layers,       
            batch_first=True,       # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )

        self.fc1 = nn.Linear(self.h_RNN, self.h_FC_dim)
        self.fc2 = nn.Linear(self.h_FC_dim, self.num_classes)

    def forward(self, x_RNN):
        
        self.LSTM.flatten_parameters()
        RNN_out, (h_n, h_c) = self.LSTM(x_RNN, None)  
        """ h_n shape (n_layers, batch, hidden_size), h_c shape (n_layers, batch, hidden_size) """ 
        """ None represents zero initial hidden state. RNN_out has shape=(batch, time_step, output_size) """

        # FC layers
        x = self.fc1(RNN_out[:, -1, :])   # choose RNN_out at the last time step
        x = F.relu(x)
        x = F.dropout(x, p=self.drop_p, training=self.training)
        x = self.fc2(x)
        # batch*class num

        return x
class Filter_Module(nn.Module):
    def __init__(self, len_feature):
        super(Filter_Module, self).__init__()
        self.len_feature = len_feature
        self.conv_1 = nn.Sequential(
            nn.Conv1d(in_channels=self.len_feature, out_channels=512, kernel_size=1,
                    stride=1, padding=0),
            nn.LeakyReLU()
        )
        self.conv_2 = nn.Sequential(
            nn.Conv1d(in_channels=512, out_channels=1, kernel_size=1,
                    stride=1, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x: (B, T, F)        
        out = x.permute(0, 2, 1)
        # out: (B, F, T)
        out = self.conv_1(out)
        out = self.conv_2(out)
        out = out.permute(0, 2, 1)
        # out: (B, T, 1)
        return out


class BaS_Decoder(nn.Module):
    def __init__(self, len_feature, num_classes,RNN_hidden_layers,RNN_hidden_nodes,RNN_FC_dim,dropout_p):
        super(BaS_Decoder, self).__init__()
        self.filter_module = Filter_Module(len_feature)
        self.len_feature = len_feature
        self.num_classes = num_classes

        # self.cas_module = CAS_Module(len_feature, num_classes)
        self.decoder = DecoderRNN(CNN_embed_dim=len_feature, h_RNN_layers=RNN_hidden_layers, h_RNN=RNN_hidden_nodes, h_FC_dim=RNN_FC_dim, drop_p=dropout_p, num_classes=num_classes)
        self.softmax = nn.Softmax(dim=1)

        # self.num_segments = num_segments
        # self.k = num_segments // 8
    

    def forward(self, x):
        fore_weights = self.filter_module(x)

        x_supp = fore_weights * x

        score_base = self.decoder(x)
        score_supp = self.decoder(x_supp)

        # slicing after sorting is much faster than torch.topk (https://github.com/pytorch/pytorch/issues/22812)
        # score_base = torch.mean(torch.topk(cas_base, self.k, dim=1)[0], dim=1)
        # sorted_scores_base, _= cas_base.sort(descending=True, dim=1)
        # topk_scores_base = sorted_scores_base[:, :self.k, :]
        # score_base = torch.mean(topk_scores_base, dim=1)

        # # score_supp = torch.mean(torch.topk(cas_supp, self.k, dim=1)[0], dim=1)
        # sorted_scores_supp, _= cas_supp.sort(descending=True, dim=1)
        # topk_scores_supp = sorted_scores_supp[:, :self.k, :]
        # score_supp = torch.mean(topk_scores_supp, dim=1)

        score_base = self.softmax(score_base)
        score_supp = self.softmax(score_supp)

        return score_base, score_supp, fore_weights

loss

In [11]:
class BaS_Net_loss(nn.Module):
    def __init__(self, alpha):
        super(BaS_Net_loss, self).__init__()
        self.alpha = alpha
        self.ce_criterion = nn.BCELoss()

    def forward(self, score_base, score_supp, fore_weights, label):
        loss = {}
        # print(label.shape)

        label_base = torch.cat((label, torch.ones((label.shape[0], 1)).cuda()), dim=1)
        label_supp = torch.cat((label, torch.zeros((label.shape[0], 1)).cuda()), dim=1)
        
        label_base = label_base / torch.sum(label_base, dim=1, keepdim=True)
        label_supp = label_supp / torch.sum(label_supp, dim=1, keepdim=True)

        loss_base = self.ce_criterion(score_base, label_base)
        loss_supp = self.ce_criterion(score_supp, label_supp)
        loss_norm = torch.mean(torch.norm(fore_weights, p=1, dim=1))

        loss_total = loss_base + loss_supp + self.alpha * loss_norm

        loss["loss_base"] = loss_base
        loss["loss_supp"] = loss_supp
        loss["loss_norm"] = loss_norm
        loss["loss_total"] = loss_total

        return loss_total, loss

In [12]:
def train_bas(encoder_model, decoder_model, optimizer, criterion, train_loader, dev_loader, _device):
  encoder_model.train()
  decoder_model.train()

  losses = []
  scores = []

  with tqdm(total=len(train_loader)+len(dev_loader)) as pbar:
    for batch in train_loader:
      captions, clips = batch
      captions = captions.to(_device)
      clips = clips.to(_device)

      N = clips.shape[0]  # current batch size
      L = clips.shape[1]  # current sequence length

      if N == 1:
        continue

      optimizer.zero_grad()

      # encoded = encoder_model(clips.view(-1, clips.shape[2], clips.shape[3], clips.shape[4]))
      # outputs = decoder_model(encoded.view(N, L, -1))
      encoded = encoder_model(clips)
      score_base,score_supp,fore_weights = decoder_model(encoded)
      label=nn.functional.one_hot(captions, 101)

      cost,loss = criterion(score_base, score_supp, fore_weights, label)
      cost.backward()
      optimizer.step()
      pbar.update(1)
    
    encoder_model.eval()
    decoder_model.eval()
    with torch.no_grad():
      for batch in dev_loader:
        captions, clips = batch
        captions = captions.to(_device)
        clips = clips.to(_device)

        N = clips.shape[0]  # current batch size
        L = clips.shape[1]  # current sequence length

        if N == 1:
          continue

        # encoded = encoder_model(clips.view(-1, clips.shape[2], clips.shape[3], clips.shape[4]))
        # outputs = decoder_model(encoded.view(N, L, -1))
        encoded = encoder_model(clips)
        score_base,score_supp,fore_weights = decoder_model(encoded)
        label=nn.functional.one_hot(captions, 101)

        cost,loss = criterion(score_base, score_supp, fore_weights, label)
        losses.append(cost.item())
        preds = torch.max(score_supp, 1)[1]
        score = accuracy_score(captions.cpu().data.squeeze().numpy(), preds.cpu().data.squeeze().numpy())
        scores.append(score)
        pbar.update(1)
  
  return sum(losses) / len(losses), sum(scores) / len(scores)

In [13]:
def eval_bas(encoder_model, decoder_model, criterion, test_loader, _device):
  encoder_model.eval()
  decoder_model.eval()

  losses = []
  scores = []

  with torch.no_grad():
    for batch in tqdm(test_loader):
      captions, clips = batch
      captions = captions.to(_device)
      clips = clips.to(_device)

      N = clips.shape[0]  # current batch size
      L = clips.shape[1]  # current sequence length

      if N == 1:
        continue

      # encoded = encoder_model(clips.view(-1, clips.shape[2], clips.shape[3], clips.shape[4]))
      # outputs = decoder_model(encoded.view(N, L, -1))
      encoded = encoder_model(clips)
      score_base,score_supp,fore_weights = decoder_model(encoded)
      label=nn.functional.one_hot(captions, 101)
      cost,loss = criterion(score_base, score_supp, fore_weights, label)
      losses.append(cost.item())
      preds = torch.max(score_supp, 1)[1]
      score = accuracy_score(captions.cpu().data.squeeze().numpy(), preds.cpu().data.squeeze().numpy())
      scores.append(score)
  
  return sum(losses) / len(losses), sum(scores) / len(scores)

get encoder and decoder

In [15]:
# EncoderCNN architecture
CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768
CNN_embed_dim = 512      # latent dim extracted by 2D CNN
img_x, img_y = 256, 342  # resize video 2d frame size
dropout_p = 0.0          # dropout probability

# DecoderRNN architecture
RNN_hidden_layers = 3
RNN_hidden_nodes = 512
RNN_FC_dim = 256

image_classifier = ResCNNEncoder(fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2, drop_p=dropout_p, CNN_embed_dim=300).to(device)
# image_classifier = CoatCNNEncoder(fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2, drop_p=dropout_p, CNN_embed_dim=300).to(device)
# (self, len_feature, num_classes,RNN_hidden_layers,RNN_hidden_nodes,RNN_FC_dim,dropout_p)
video_classifier = BaS_Decoder(len_feature=300, num_classes=len(id_to_class.keys())+1, RNN_hidden_layers=RNN_hidden_layers, RNN_hidden_nodes=RNN_hidden_nodes, RNN_FC_dim=RNN_FC_dim, dropout_p=dropout_p).to(device)

In [16]:
batch_size = 32
max_epoch = 50
early_stopping = 1
learning_rate = 1e-4

train_loader = DataLoader(
    UCF101(class_to_id, 'train', video_loader, _transform=tfs),
    collate_fn=custom_collate,
    batch_size=batch_size,
    shuffle=True
)
dev_loader = DataLoader(
    UCF101(class_to_id, 'dev', video_loader, _transform=tfs),
    collate_fn=custom_collate,
    batch_size=batch_size,
    shuffle=True
)
test_loader = DataLoader(
    UCF101(class_to_id, 'test', video_loader, _transform=tfs),
    collate_fn=custom_collate,
    batch_size=batch_size,
    shuffle=False
)

train_params = list(image_classifier.parameters()) + list(video_classifier.parameters())
optimizer = Adam(train_params, lr=learning_rate)
criterion = BaS_Net_loss(1e-4)

best_loss = 99.9
best_acc = 0.0

for epoch in range(max_epoch):
  train_loss, train_acc = train_bas(image_classifier, video_classifier, optimizer, criterion, train_loader, dev_loader, device)
  if train_loss < best_loss:
    torch.save(image_classifier.state_dict(), CACHE_DIR+'/image_classifier_Bas.pt')
    torch.save(video_classifier.state_dict(), CACHE_DIR+'/video_classifier_Bas.pt')
    best_loss = train_loss
  if train_loss > best_loss * (1 + early_stopping):
    print("Training loss not improving, stop training.")
    break
  print("Epoch {}: train loss = {:.4f}, train acc = {:.4f}".format(epoch, train_loss, train_acc))

  0%|          | 0/299 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 0: train loss = 0.0926, train acc = 0.0115


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0884, train acc = 0.0269


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0833, train acc = 0.0336


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0816, train acc = 0.0524


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0767, train acc = 0.0729


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0744, train acc = 0.0894


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0725, train acc = 0.1073


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0719, train acc = 0.1028


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 8: train loss = 0.0690, train acc = 0.1473


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 9: train loss = 0.0667, train acc = 0.2044


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 10: train loss = 0.0651, train acc = 0.2276


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 11: train loss = 0.0605, train acc = 0.2756


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 12: train loss = 0.0572, train acc = 0.2974


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 13: train loss = 0.0584, train acc = 0.2830


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 14: train loss = 0.0533, train acc = 0.3538


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 15: train loss = 0.0504, train acc = 0.4077


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 16: train loss = 0.0488, train acc = 0.4317


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 17: train loss = 0.0475, train acc = 0.4685


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 18: train loss = 0.0459, train acc = 0.5014


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 19: train loss = 0.0423, train acc = 0.5996


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 20: train loss = 0.0460, train acc = 0.5372


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 21: train loss = 0.0403, train acc = 0.5938


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 22: train loss = 0.0393, train acc = 0.6167


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 23: train loss = 0.0374, train acc = 0.6555


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 24: train loss = 0.0339, train acc = 0.7154


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 25: train loss = 0.0386, train acc = 0.6865


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 26: train loss = 0.0431, train acc = 0.6643


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 27: train loss = 0.0338, train acc = 0.7500


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 28: train loss = 0.0360, train acc = 0.7522


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 29: train loss = 0.0294, train acc = 0.8009


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 30: train loss = 0.0315, train acc = 0.7727


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 31: train loss = 0.0298, train acc = 0.7872


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 32: train loss = 0.0275, train acc = 0.8180


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 33: train loss = 0.0268, train acc = 0.8467


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 34: train loss = 0.0281, train acc = 0.8596


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 35: train loss = 0.0335, train acc = 0.8174


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 36: train loss = 0.0266, train acc = 0.8567


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 37: train loss = 0.0248, train acc = 0.8730


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 38: train loss = 0.0255, train acc = 0.8759


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 39: train loss = 0.0250, train acc = 0.8785


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 40: train loss = 0.0247, train acc = 0.8835


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 41: train loss = 0.0250, train acc = 0.8819


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 42: train loss = 0.0270, train acc = 0.8701


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 43: train loss = 0.0246, train acc = 0.8947


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 44: train loss = 0.0253, train acc = 0.8782


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 45: train loss = 0.0254, train acc = 0.8901


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 46: train loss = 0.0231, train acc = 0.8994


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 47: train loss = 0.0243, train acc = 0.8926


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 48: train loss = 0.0238, train acc = 0.9092


  0%|          | 0/299 [00:00<?, ?it/s]

Epoch 49: train loss = 0.0222, train acc = 0.9205
