In [1]:
import torch,sys,os

from tqdm import tqdm

from torch import nn

from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms

# Self reproduce

In [2]:
from typing import Optional,Callable
from typing import Any, Callable, List, Optional, Tuple
from torch import Tensor
import torch.nn.functional as F

class GoogLeNet_v1(nn.Module):
    def __init__(self,num_classes=1000,init_weights=False,blocks: Optional[List[Callable[..., nn.Module]]] = None, aux_logits: bool = True,dropout_aux=0.7):
        super(GoogLeNet_v1,self).__init__()

        if init_weights:
            self._initialize_weights()

        self.fc = nn.Linear(1024, num_classes)

        self.aux_logits = aux_logits
        if blocks is None:
            blocks = [BasicConv2d, Inception, InceptionAux]

        inception_block = blocks[1]
        inception_aux_block = blocks[2]

        if aux_logits:
            self.aux1 = inception_aux_block(512, num_classes, dropout=dropout_aux)
            self.aux2 = inception_aux_block(528, num_classes, dropout=dropout_aux)
        else:
            self.aux1 = None  # type: ignore[assignment]
            self.aux2 = None  # type: ignore[assignment]

        self.dropout = nn.Dropout(p=0.6)

        self.Conv = nn.Sequential(
            # 使用7x7的卷积核（滑动步长2，padding为3），64通道，输出为112x112x64，卷积后进行ReLU操作
            nn.Conv2d(kernel_size=7,stride=2,padding=3,out_channels=64,in_channels=3), 
            nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True),
            nn.ReLU(),

            nn.Conv2d(kernel_size=3,stride=1,padding=1,out_channels=192,in_channels=64), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True),
            nn.ReLU(),
        )

        self.Inception_3a = inception_block(in_channels = 192,ch1x1= 64,ch3x3red= 96,ch3x3= 128,ch5x5red= 16,ch5x5= 32,pool_proj= 32)
        self.Inception_3b = inception_block(in_channels = 256,ch1x1= 128,ch3x3red= 128,ch3x3= 192,ch5x5red= 32,ch5x5= 96,pool_proj= 64)

        self.Inception_4a = inception_block(in_channels = 480,ch1x1= 192,ch3x3red= 96,ch3x3= 208,ch5x5red= 16,ch5x5= 48,pool_proj= 64)
        self.Inception_4b = inception_block(in_channels = 512,ch1x1= 160,ch3x3red= 112,ch3x3= 224,ch5x5red= 24,ch5x5= 64,pool_proj= 64)
        self.Inception_4c = inception_block(in_channels = 512,ch1x1= 128,ch3x3red= 128,ch3x3= 256,ch5x5red= 24,ch5x5= 64,pool_proj= 64)
        self.Inception_4d = inception_block(in_channels = 512,ch1x1= 112,ch3x3red= 144,ch3x3= 288,ch5x5red= 32,ch5x5= 64,pool_proj= 64)
        self.Inception_4e = inception_block(in_channels = 528,ch1x1= 256,ch3x3red= 160,ch3x3= 320,ch5x5red= 32,ch5x5= 128,pool_proj= 128)

        self.Inception_5a = inception_block(in_channels = 832,ch1x1= 256,ch3x3red= 160,ch3x3= 320,ch5x5red= 32,ch5x5= 128,pool_proj= 128)
        self.Inception_5b = inception_block(in_channels = 832,ch1x1= 384,ch3x3red= 192,ch3x3= 384,ch5x5red= 48,ch5x5= 128,pool_proj= 128)

    def forward(self,x):
        x= self.Conv(x)

        x= self.Inception_3a(x)
        x= self.Inception_3b(x)

        x= nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)(x)

        x= self.Inception_4a(x)
        aux1: Optional[Tensor] = None
        if self.aux1 is not None:
            if self.training:
                aux1 = self.aux1(x)
        x= self.Inception_4b(x)
        x= self.Inception_4c(x)
        x= self.Inception_4d(x)

        aux2: Optional[Tensor] = None
        if self.aux2 is not None:
            if self.training:
                aux2 = self.aux2(x)
        x= self.Inception_4e(x)
        
        x= nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)(x)

        x= self.Inception_5a(x)     
        x= self.Inception_5b(x)

        x = nn.AvgPool2d(kernel_size=7,stride=1)(x)

        x = torch.flatten(x, 1)
        x = self.dropout(x)
        y = self.fc(x)
        if self.training and self.aux_logits:   # eval model lose this layer
            return y, aux2, aux1
        return y
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


class Inception(nn.Module):
    def __init__(
        self,
        in_channels: int,
        ch1x1: int,
        ch3x3red: int,
        ch3x3: int,
        ch5x5red: int,
        ch5x5: int,
        pool_proj: int,
        conv_block: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            conv_block(in_channels, ch3x3red, kernel_size=1), 
            conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, ch5x5red, kernel_size=1),
            # Here, kernel_size=3 instead of kernel_size=5 is a known bug.
            # Please see https://github.com/pytorch/vision/issues/906 for details.
            conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            conv_block(in_channels, pool_proj, kernel_size=1),
        )

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)

        

class BasicConv2d(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None:
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)

class InceptionAux(nn.Module):
    def __init__(
        self,
        in_channels: int,
        num_classes: int,
        conv_block: Optional[Callable[..., nn.Module]] = None,
        dropout: float = 0.7,
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.conv = conv_block(in_channels, 128, kernel_size=1)

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x: Tensor) -> Tensor:
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        # N x 1024
        x = self.dropout(x)
        # N x 1024
        x = self.fc2(x)
        # N x 1000 (num_classes)

        return x

 # STL10 dataset

In [3]:
transform=transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5,),(0.5,0.5,0.5)),
	transforms.Resize([224, 224])
    ]
)

# Download training data from open datasets.
train_set = datasets.STL10(
    root="~/data/STL10/",
    split ='train',
    download=True,
    transform=transform, # transform,
)
trainloader=torch.utils.data.DataLoader(
	train_set,
	batch_size=60,
	shuffle=True,
	pin_memory=True,
    num_workers=8
	)


# Download test data from open datasets.
test_set = datasets.STL10(
    root="~/data/STL10/",
    split ='test',
    download=True,
    transform=transform, # transform,
)
testloader=torch.utils.data.DataLoader(
	test_set,
	batch_size=60,
	shuffle=False,
    pin_memory=True,
    num_workers=8
	)

# test_data_iter=iter(testloader)
# test_image,test_label=test_data_iter.next()
test_num  = len(test_set)
train_steps = len(trainloader)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to /root/data/STL10/stl10_binary.tar.gz


  0%|          | 0/2640397119 [00:00<?, ?it/s]

Extracting /root/data/STL10/stl10_binary.tar.gz to /root/data/STL10/


  cpuset_checked))


Files already downloaded and verified


# Transfer learning

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

model = GoogLeNet_v1(num_classes= 10,init_weights=True).to(device)

# from torchvision import models
# pretrained_googlenet = models.googlenet(pretrained=True)
# torch.save(pretrained_googlenet.state_dict(), save_path)
# model.load_state_dict(torch.load(save_path),strict=False)

Using cuda device


# loss and optimize

In [5]:
# 定义一个损失函数
loss_fn = nn.CrossEntropyLoss()

# 定义一个优化器
# optimizer = torch.optim.Adam(model.parameters(),lr=0.005)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
# optimizer = torch.optim.SGD(model.parameters(),lr=0.001)

epochs = 40

save_path= './GoogLeNet.pth'
best_acc = 0.0


# Train

In [6]:
for epoch in range(epochs):
        # train
        model.train()
        running_loss = 0.0
        train_bar = tqdm(trainloader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()

            logits, aux_logits2, aux_logits1 = model(images.to(device))
            loss0 = loss_fn(logits, labels.to(device))
            loss1 = loss_fn(aux_logits1, labels.to(device))
            loss2 = loss_fn(aux_logits2, labels.to(device))
            loss = loss0 + loss1 * 0.3 + loss2 * 0.3

            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,epochs,loss)

        # validate
        model.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(testloader, file=sys.stdout) # show progress
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = model(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / test_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(model.state_dict(), save_path)

print('Finished Training')

train epoch[1/40] loss:2.504: 100%|██████████| 84/84 [00:27<00:00,  3.00it/s]
100%|██████████| 134/134 [00:13<00:00,  9.97it/s]
[epoch 1] train_loss: 2.885  val_accuracy: 0.249
train epoch[2/40] loss:2.511: 100%|██████████| 84/84 [00:20<00:00,  4.11it/s]
100%|██████████| 134/134 [00:13<00:00, 10.05it/s]
[epoch 2] train_loss: 2.467  val_accuracy: 0.300
train epoch[3/40] loss:2.152: 100%|██████████| 84/84 [00:21<00:00,  3.98it/s]
100%|██████████| 134/134 [00:13<00:00, 10.18it/s]
[epoch 3] train_loss: 2.195  val_accuracy: 0.378
train epoch[4/40] loss:1.753: 100%|██████████| 84/84 [00:20<00:00,  4.06it/s]
100%|██████████| 134/134 [00:13<00:00, 10.20it/s]
[epoch 4] train_loss: 2.021  val_accuracy: 0.452
train epoch[5/40] loss:1.832: 100%|██████████| 84/84 [00:20<00:00,  4.06it/s]
100%|██████████| 134/134 [00:13<00:00, 10.20it/s]
[epoch 5] train_loss: 1.848  val_accuracy: 0.479
train epoch[6/40] loss:2.192: 100%|██████████| 84/84 [00:20<00:00,  4.03it/s]
100%|██████████| 134/134 [00:13<00:00

In [7]:
best_acc

0.727625

# Pytorch Version

In [17]:
from typing import Optional,Callable
from typing import Any, Callable, List, Optional, Tuple
from torch import Tensor
import torch.nn.functional as F

class PT_GoogLeNet_v1(nn.Module):
    def __init__(self,num_classes=1000,init_weights=False,blocks: Optional[List[Callable[..., nn.Module]]] = None, aux_logits: bool = True,dropout_aux=0.7):
        super(PT_GoogLeNet_v1,self).__init__()

        if init_weights:
            self._initialize_weights()

        self.fc = nn.Linear(1024, num_classes)

        self.aux_logits = aux_logits
        if blocks is None:
            blocks = [BasicConv2d, Inception, InceptionAux]

        inception_block = blocks[1]
        inception_aux_block = blocks[2]
        conv_block = blocks[0]

        if aux_logits:
            self.aux1 = inception_aux_block(512, num_classes, dropout=dropout_aux)
            self.aux2 = inception_aux_block(528, num_classes, dropout=dropout_aux)
        else:
            self.aux1 = None  # type: ignore[assignment]
            self.aux2 = None  # type: ignore[assignment]

        self.dropout = nn.Dropout(p=0.6)

        self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.conv2 = conv_block(64, 64, kernel_size=1)
        self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.Inception3a = inception_block(in_channels = 192,ch1x1= 64,ch3x3red= 96,ch3x3= 128,ch5x5red= 16,ch5x5= 32,pool_proj= 32)
        self.Inception3b = inception_block(in_channels = 256,ch1x1= 128,ch3x3red= 128,ch3x3= 192,ch5x5red= 32,ch5x5= 96,pool_proj= 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.Inception4a = inception_block(in_channels = 480,ch1x1= 192,ch3x3red= 96,ch3x3= 208,ch5x5red= 16,ch5x5= 48,pool_proj= 64)
        self.Inception4b = inception_block(in_channels = 512,ch1x1= 160,ch3x3red= 112,ch3x3= 224,ch5x5red= 24,ch5x5= 64,pool_proj= 64)
        self.Inception4c = inception_block(in_channels = 512,ch1x1= 128,ch3x3red= 128,ch3x3= 256,ch5x5red= 24,ch5x5= 64,pool_proj= 64)
        self.Inception4d = inception_block(in_channels = 512,ch1x1= 112,ch3x3red= 144,ch3x3= 288,ch5x5red= 32,ch5x5= 64,pool_proj= 64)
        self.Inception4e = inception_block(in_channels = 528,ch1x1= 256,ch3x3red= 160,ch3x3= 320,ch5x5red= 32,ch5x5= 128,pool_proj= 128)
        self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)

        self.Inception5a = inception_block(in_channels = 832,ch1x1= 256,ch3x3red= 160,ch3x3= 320,ch5x5red= 32,ch5x5= 128,pool_proj= 128)
        self.Inception5b = inception_block(in_channels = 832,ch1x1= 384,ch3x3red= 192,ch3x3= 384,ch5x5red= 48,ch5x5= 128,pool_proj= 128)

    def forward(self,x):

        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool2(x)

        x= self.Inception3a(x)
        x= self.Inception3b(x)

        x= nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)(x)

        x= self.Inception4a(x)
        aux1: Optional[Tensor] = None
        if self.aux1 is not None:
            if self.training:
                aux1 = self.aux1(x)
        x= self.Inception4b(x)
        x= self.Inception4c(x)
        x= self.Inception4d(x)

        aux2: Optional[Tensor] = None
        if self.aux2 is not None:
            if self.training:
                aux2 = self.aux2(x)
        x= self.Inception4e(x)
        
        x= nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)(x)

        x= self.Inception5a(x)     
        x= self.Inception5b(x)

        x = nn.AvgPool2d(kernel_size=7,stride=1)(x)

        x = torch.flatten(x, 1)
        x = self.dropout(x)
        y = self.fc(x)
        if self.training and self.aux_logits:   # eval model lose this layer
            return y, aux2, aux1
        return y
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


class Inception(nn.Module):
    def __init__(
        self,
        in_channels: int,
        ch1x1: int,
        ch3x3red: int,
        ch3x3: int,
        ch5x5red: int,
        ch5x5: int,
        pool_proj: int,
        conv_block: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            conv_block(in_channels, ch3x3red, kernel_size=1), 
            conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, ch5x5red, kernel_size=1),
            # Here, kernel_size=3 instead of kernel_size=5 is a known bug.
            # Please see https://github.com/pytorch/vision/issues/906 for details.
            conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            conv_block(in_channels, pool_proj, kernel_size=1),
        )

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)

class BasicConv2d(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None:
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)

class InceptionAux(nn.Module):
    def __init__(
        self,
        in_channels: int,
        num_classes: int,
        conv_block: Optional[Callable[..., nn.Module]] = None,
        dropout: float = 0.7,
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.conv = conv_block(in_channels, 128, kernel_size=1)

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x: Tensor) -> Tensor:
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        # N x 1024
        x = self.dropout(x)
        # N x 1024
        x = self.fc2(x)
        # N x 1000 (num_classes)

        return x

In [18]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")


pretrained_weight = './GoogLeNet_pretrained_weight.pth'
model_transfer = PT_GoogLeNet_v1(num_classes= 10,init_weights=True).to(device)

from torchvision import models
pretrained_googlenet = models.googlenet(pretrained=True)
torch.save(pretrained_googlenet.state_dict(), pretrained_weight)

model_transfer.load_state_dict(dict([(n, p) for n, p in torch.load(pretrained_weight).items() if n not in ['fc.weight', 'fc.bias']]), strict=False)
# model_transfer.load_state_dict(torch.load(save_path),strict=False)

Using cuda device


_IncompatibleKeys(missing_keys=['fc.weight', 'fc.bias', 'aux1.conv.conv.weight', 'aux1.conv.bn.weight', 'aux1.conv.bn.bias', 'aux1.conv.bn.running_mean', 'aux1.conv.bn.running_var', 'aux1.fc1.weight', 'aux1.fc1.bias', 'aux1.fc2.weight', 'aux1.fc2.bias', 'aux2.conv.conv.weight', 'aux2.conv.bn.weight', 'aux2.conv.bn.bias', 'aux2.conv.bn.running_mean', 'aux2.conv.bn.running_var', 'aux2.fc1.weight', 'aux2.fc1.bias', 'aux2.fc2.weight', 'aux2.fc2.bias', 'Inception3a.branch1.conv.weight', 'Inception3a.branch1.bn.weight', 'Inception3a.branch1.bn.bias', 'Inception3a.branch1.bn.running_mean', 'Inception3a.branch1.bn.running_var', 'Inception3a.branch2.0.conv.weight', 'Inception3a.branch2.0.bn.weight', 'Inception3a.branch2.0.bn.bias', 'Inception3a.branch2.0.bn.running_mean', 'Inception3a.branch2.0.bn.running_var', 'Inception3a.branch2.1.conv.weight', 'Inception3a.branch2.1.bn.weight', 'Inception3a.branch2.1.bn.bias', 'Inception3a.branch2.1.bn.running_mean', 'Inception3a.branch2.1.bn.running_var', 

In [19]:
# 定义一个损失函数
loss_fn = nn.CrossEntropyLoss()

# 定义一个优化器
# optimizer = torch.optim.Adam(model.parameters(),lr=0.005)
optimizer_TL = torch.optim.Adam(model_transfer.parameters(),lr=0.001)
# optimizer = torch.optim.SGD(model.parameters(),lr=0.001)

epochs = 40

TL_save_path= './GoogLeNet_TL.pth'
best_acc = 0.0


In [20]:
for epoch in range(epochs):
        # train
        model_transfer.train()
        running_loss = 0.0
        train_bar = tqdm(trainloader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer_TL.zero_grad()

            logits, aux_logits2, aux_logits1 = model_transfer(images.to(device))
            loss0 = loss_fn(logits, labels.to(device))
            loss1 = loss_fn(aux_logits1, labels.to(device))
            loss2 = loss_fn(aux_logits2, labels.to(device))
            loss = loss0 + loss1 * 0.3 + loss2 * 0.3

            loss.backward()
            optimizer_TL.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,epochs,loss)

        # validate
        model_transfer.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(testloader, file=sys.stdout) # show progress
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = model_transfer(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / test_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(model_transfer.state_dict(), TL_save_path)

print('Finished Training')

train epoch[1/40] loss:2.236: 100%|██████████| 84/84 [00:21<00:00,  3.96it/s]
100%|██████████| 134/134 [00:12<00:00, 10.49it/s]
[epoch 1] train_loss: 2.424  val_accuracy: 0.515
train epoch[2/40] loss:2.357: 100%|██████████| 84/84 [00:21<00:00,  3.88it/s]
100%|██████████| 134/134 [00:12<00:00, 10.39it/s]
[epoch 2] train_loss: 1.837  val_accuracy: 0.554
train epoch[3/40] loss:2.073: 100%|██████████| 84/84 [00:21<00:00,  3.88it/s]
100%|██████████| 134/134 [00:12<00:00, 10.47it/s]
[epoch 3] train_loss: 1.633  val_accuracy: 0.541
train epoch[4/40] loss:1.454: 100%|██████████| 84/84 [00:21<00:00,  3.86it/s]
100%|██████████| 134/134 [00:12<00:00, 10.33it/s]
[epoch 4] train_loss: 1.432  val_accuracy: 0.517
train epoch[5/40] loss:0.902: 100%|██████████| 84/84 [00:21<00:00,  3.83it/s]
100%|██████████| 134/134 [00:13<00:00, 10.29it/s]
[epoch 5] train_loss: 1.296  val_accuracy: 0.619
train epoch[6/40] loss:1.734: 100%|██████████| 84/84 [00:21<00:00,  3.83it/s]
100%|██████████| 134/134 [00:12<00:00

In [21]:
best_acc

0.779375