In [2]:
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import models

import gc


https://choice37.tistory.com/27

In [3]:
print(torch.version.cuda)

11.3


In [4]:
print(torch.cuda.device_count())
print(torch.cuda.is_available())
print(torch.cuda. current_device())

1
True
0


### Dataset, DataLoader

In [5]:
from utils.dataloader_image_classification import ImageTransform,make_datapath_list,HymenopteraDataset

train_list=make_datapath_list()
val_list=make_datapath_list(phase="val")

size,mean,std=2242,(0.485,0.456,0.406),(0.229,0.224,0.225)
train_dataset=HymenopteraDataset(file_list=train_list,transform=ImageTransform(size,mean,std))
val_dataset=HymenopteraDataset(file_list=val_list,transform=ImageTransform(size,mean,std),phase="val")

batch_size=1
train_dataloader=data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
val_dataloader=data.DataLoader(val_dataset,batch_size=batch_size)

dataloaders_dict={"train": train_dataloader,"val":val_dataloader}

./data/hymenoptera_data/train/**/*.jpg
./data/hymenoptera_data/val/**/*.jpg


### Model Structure

In [6]:
#create model instance
net=models.vgg16(pretrained=True)

#change output layer for fine tuning
net.classifier[6]=nn.Linear(in_features=4096,out_features=2)

#set train mode
net.train()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

### Loss function 

In [7]:
criterion=nn.CrossEntropyLoss()

### Optimizer
- 모든 layer의 parameter 학습 가능하도록 작성

In [8]:
#학습할 파라미터 (with fine tuning)
params_to_update=[[] for i in range(3)]

#학습할 층의 파라미터이름 지정
update_param_name=[["features"],["classifier.0.bias","classifier.3.weight","classifier.3.bias"],["classifier.6.weight","classifier.6.bias"]]

#각 리스트에 파라미터 저장
for name,param in net.named_parameters():
    for i in range(3):
        if update_param_name[i][0] in name or name in update_param_name[i]:
            param.requires_grad=True
            params_to_update[i].append(param)
            print(f"params to update[{i+1}]에 {name} 저장")

params to update[1]에 features.0.weight 저장
params to update[1]에 features.0.bias 저장
params to update[1]에 features.2.weight 저장
params to update[1]에 features.2.bias 저장
params to update[1]에 features.5.weight 저장
params to update[1]에 features.5.bias 저장
params to update[1]에 features.7.weight 저장
params to update[1]에 features.7.bias 저장
params to update[1]에 features.10.weight 저장
params to update[1]에 features.10.bias 저장
params to update[1]에 features.12.weight 저장
params to update[1]에 features.12.bias 저장
params to update[1]에 features.14.weight 저장
params to update[1]에 features.14.bias 저장
params to update[1]에 features.17.weight 저장
params to update[1]에 features.17.bias 저장
params to update[1]에 features.19.weight 저장
params to update[1]에 features.19.bias 저장
params to update[1]에 features.21.weight 저장
params to update[1]에 features.21.bias 저장
params to update[1]에 features.24.weight 저장
params to update[1]에 features.24.bias 저장
params to update[1]에 features.26.weight 저장
params to update[1]에 features.26.bias 저장


In [9]:
lr_list=[1e-4,5e-4,1e-3]

optimizer=optim.SGD([
    {'params':params_to_update[idx],'lr':lr} for idx,lr in enumerate(lr_list)
],momentum=0.9)


### training
https://sincerechloe.tistory.com/44

In [10]:
def train_model(net,dataloaders_dict,criterion,optimizer,num_epochs):
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    net.to(device)
    torch.backends.cudnn.benchmark=True

    for epoch in range(num_epochs):
        gc.collect()
        torch.cuda.empty_cache()


        print(f"Epoch{epoch+1}/{num_epochs}")
        for phase in ["train","val"]:
            if phase=="train":
                net.train()
            else: net.eval()

            epoch_loss=0.0 #loss 합
            epoch_correct=0 #정답 수

            if epoch==0 and phase=="train":
                continue
            
            for inputs,labels in tqdm(dataloaders_dict[phase]):

                gc.collect()
                torch.cuda.empty_cache()

                inputs=inputs.to(device)
                labels=labels.to(device)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase=="train"):
                    outputs=net(inputs)
                    loss=criterion(outputs,labels)
                    _,preds=torch.max(outputs,1)
                    if phase=="train":
                        loss.backward()
                        optimizer.step()
                    epoch_loss+=loss.item()*inputs.size(0)
                    epoch_correct+=torch.sum(preds==labels.data)
            epoch_loss/=len(dataloaders_dict[phase].dataset)
            epoch_accuracy =epoch_correct.double()/len(dataloaders_dict[phase].dataset)

            print(f"[{phase}] loss:%0.2f, accuracy:%.2f\n"%(epoch_loss,epoch_accuracy))            

https://dokim.tistory.com/217

[Runtime Error] cuda out of memory. Tried to allocate ...

- https://m.blog.naver.com/PostView.nhn?blogId=readwritespeak&logNo=221969183928&categoryNo=58&proxyReferer=&proxyReferer=https:%2F%2Fwww.google.com%2F

watch -n l nvidia-smi


In [11]:
num_epochs=2
torch.cuda.empty_cache()
train_model(net,dataloaders_dict,criterion,optimizer,num_epochs)

cuda
Epoch1/2


100%|██████████| 153/153 [00:59<00:00,  2.58it/s]


[val] loss:0.70, accuracy:0.48

Epoch2/2


100%|██████████| 243/243 [03:38<00:00,  1.11it/s]


[train] loss:0.62, accuracy:0.68



100%|██████████| 153/153 [00:50<00:00,  3.00it/s]

[val] loss:0.42, accuracy:0.85






### save and load trained network

In [12]:
save_path='weights_fine_tuning.pth'
torch.save(net.state_dict(),save_path)

In [13]:
load_path='weights_fine_tuning.pth'
load_weights=torch.load(load_path)
net.load_state_dict(load_weights)

<All keys matched successfully>

In [14]:
load_weights_cpu=torch.load(load_path,map_location={'cuda':'cpu'})
net.load_state_dict(load_weights_cpu)

<All keys matched successfully>