In [1]:
from google.colab import drive
import os
drive.mount('/content/gdrive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
!unzip gdrive/MyDrive/images.zip

In [None]:
!pip install wandb

In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import time
import tqdm
import random
import torch.nn.functional as F
import wandb

from torch.nn import init
from torch.nn.modules.utils import _pair
import math
from timm.models.layers import to_2tuple, trunc_normal_, DropPath
import einops

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False



In [3]:
class DAttentionBaseline(nn.Module):

    def __init__(
        self, n_heads, n_head_channels, n_groups,
        attn_drop, proj_drop, stride, ksize
    ):

        super().__init__()
        self.n_head_channels = n_head_channels
        self.scale = self.n_head_channels ** -0.5
        self.n_heads = n_heads
        
        self.nc = n_head_channels * n_heads
        self.n_groups = n_groups
        self.n_group_channels = self.nc // self.n_groups
        self.ksize = ksize
        kk = self.ksize
        pad_size = kk // 2 if kk != stride else 0

        self.conv_offset = nn.Sequential(
            nn.Conv2d(self.n_group_channels, self.n_group_channels, kk, stride, pad_size, groups=self.n_group_channels),
            LayerNormProxy(self.n_group_channels),
            nn.GELU(),
            nn.Conv2d(self.n_group_channels, 2, 1, 1, 0, bias=False)
        )

        self.proj_q = nn.Conv2d(
            self.nc, self.nc,
            kernel_size=1, stride=1, padding=0
        )

        self.proj_k = nn.Conv2d(
            self.nc, self.nc,
            kernel_size=1, stride=1, padding=0
        )

        self.proj_v = nn.Conv2d(
            self.nc, self.nc,
            kernel_size=1, stride=1, padding=0
        )

        self.proj_out = nn.Conv2d(
            self.nc, self.nc,
            kernel_size=1, stride=1, padding=0
        )

        self.proj_drop = nn.Dropout(proj_drop, inplace=True)
        self.attn_drop = nn.Dropout(attn_drop, inplace=True)


    @torch.no_grad()
    def _get_ref_points(self, H_key, W_key, B, dtype, device):

        ref_y, ref_x = torch.meshgrid(
            torch.linspace(0.5, H_key - 0.5, H_key, dtype=dtype, device=device),
            torch.linspace(0.5, W_key - 0.5, W_key, dtype=dtype, device=device),
            indexing='ij'
        )
        ref = torch.stack((ref_y, ref_x), -1)
        ref[..., 1].div_(W_key - 1.0).mul_(2.0).sub_(1.0)
        ref[..., 0].div_(H_key - 1.0).mul_(2.0).sub_(1.0)
        ref = ref[None, ...].expand(B * self.n_groups, -1, -1, -1) # B * g H W 2

        return ref
    
    def forward(self, x):

        B, C, H, W = x.size()
        dtype, device = x.dtype, x.device
        # q
        q = self.proj_q(x)
        # offsets
        q_off = einops.rearrange(q, 'b (g c) h w -> (b g) c h w', g=self.n_groups, c=self.n_group_channels)
        offset = self.conv_offset(q_off).contiguous()  # B * g 2 Hg Wg
        
        Hk, Wk = offset.size(2), offset.size(3)
        n_sample = Hk * Wk


        offset = einops.rearrange(offset, 'b p h w -> b h w p')
        reference = self._get_ref_points(Hk, Wk, B, dtype, device)

        
        
        # 夾峙-1~1
        if True:
            pos = (offset + reference).clamp(-1., +1.)

        
        if True:
            x_sampled = F.grid_sample(
                input=x.reshape(B * self.n_groups, self.n_group_channels, H, W), 
                grid=pos[..., (1, 0)], # y, x -> x, y
                mode='bilinear', align_corners=True) # B * g, Cg, Hg, Wg
                
        # x~
        x_sampled = x_sampled.reshape(B, C, 1, n_sample)

        q = q.reshape(B * self.n_heads, self.n_head_channels, H * W)
        k = self.proj_k(x_sampled).reshape(B * self.n_heads, self.n_head_channels, n_sample)
        v = self.proj_v(x_sampled).reshape(B * self.n_heads, self.n_head_channels, n_sample)

        # att
        attn = torch.einsum('b c m, b c n -> b m n', q, k) # B * h, HW, Ns
        attn = attn.mul(self.scale)
        attn = F.softmax(attn, dim=2)
        attn = self.attn_drop(attn)
        out = torch.einsum('b m n, b c n -> b c m', attn, v)

        
        out = out.reshape(B, C, H, W)
        # Wo
        y = self.proj_drop(self.proj_out(out))

        return y



In [4]:
class TransformerMLP(nn.Module):

    def __init__(self, channels, expansion, drop):
        
        super().__init__()
        
        self.dim1 = channels
        self.dim2 = channels * expansion
        self.chunk = nn.Sequential()
        self.chunk.add_module('linear1', nn.Linear(self.dim1, self.dim2))
        self.chunk.add_module('act', nn.GELU())
        self.chunk.add_module('drop1', nn.Dropout(drop, inplace=True))
        self.chunk.add_module('linear2', nn.Linear(self.dim2, self.dim1))
        self.chunk.add_module('drop2', nn.Dropout(drop, inplace=True))
    
    def forward(self, x):

        _, _, H, W = x.size()
        x = einops.rearrange(x, 'b c h w -> b (h w) c')
        x = self.chunk(x)
        x = einops.rearrange(x, 'b (h w) c -> b c h w', h=H, w=W)
        return x

In [5]:
# class LocalAttention(nn.Module):

#     def __init__(self, dim, heads, window_size, attn_drop, proj_drop):
        
#         super().__init__()

#         window_size = to_2tuple(window_size)

#         self.proj_qkv = nn.Linear(dim, 3 * dim)
#         self.heads = heads
#         assert dim % heads == 0
#         head_dim = dim // heads
#         self.scale = head_dim ** -0.5
#         self.proj_out = nn.Linear(dim, dim)
#         self.window_size = window_size
#         self.proj_drop = nn.Dropout(proj_drop, inplace=True)
#         self.attn_drop = nn.Dropout(attn_drop, inplace=True)

#         Wh, Ww = self.window_size
#         self.relative_position_bias_table = nn.Parameter(
#             torch.zeros((2 * Wh - 1) * (2 * Ww - 1), heads)
#         )
#         trunc_normal_(self.relative_position_bias_table, std=0.01)

#         coords_h = torch.arange(self.window_size[0])
#         coords_w = torch.arange(self.window_size[1])
#         coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing='ij'))  # 2, Wh, Ww
#         coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
#         relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
#         relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
#         relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
#         relative_coords[:, :, 1] += self.window_size[1] - 1
#         relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
#         relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
#         self.register_buffer("relative_position_index", relative_position_index)

#     def forward(self, x, mask=None):

#         B, C, H, W = x.size()
#         r1, r2 = H // self.window_size[0], W // self.window_size[1]

#         x_total = einops.rearrange(x, 'b c (r1 h1) (r2 w1) -> b (r1 r2) (h1 w1) c', h1=self.window_size[0], w1=self.window_size[1]) # B x Nr x Ws x C

#         x_total = einops.rearrange(x_total, 'b m n c -> (b m) n c')

#         qkv = self.proj_qkv(x_total) # B' x N x 3C
#         q, k, v = torch.chunk(qkv, 3, dim=2)

#         q = q * self.scale
#         q, k, v = [einops.rearrange(t, 'b n (h c1) -> b h n c1', h=self.heads) for t in [q, k, v]]
#         attn = torch.einsum('b h m c, b h n c -> b h m n', q, k)

#         relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
#             self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
#         relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
#         attn_bias = relative_position_bias
#         attn = attn + attn_bias.unsqueeze(0)

#         if mask is not None:
#             # attn : (b * nW) h w w
#             # mask : nW ww ww
#             nW, ww, _ = mask.size()
#             attn = einops.rearrange(attn, '(b n) h w1 w2 -> b n h w1 w2', n=nW, h=self.heads, w1=ww, w2=ww) + mask.reshape(1, nW, 1, ww, ww)
#             attn = einops.rearrange(attn, 'b n h w1 w2 -> (b n) h w1 w2')
#         attn = self.attn_drop(attn.softmax(dim=3))

#         x = torch.einsum('b h m n, b h n c -> b h m c', attn, v)
#         x = einops.rearrange(x, 'b h n c1 -> b n (h c1)')
#         x = self.proj_drop(self.proj_out(x)) # B' x N x C
#         x = einops.rearrange(x, '(b r1 r2) (h1 w1) c -> b c (r1 h1) (r2 w1)', r1=r1, r2=r2, h1=self.window_size[0], w1=self.window_size[1]) # B x C x H x W

#         return x, None, None

In [6]:
class TransformerStage(nn.Module):

    def __init__(self, fmap_size, window_size,
                 dim_in, dim_embed, depths, stage_spec, n_groups, 
                 heads, stride,
                 attn_drop, proj_drop, expansion, drop,
                 use_dwc_mlp, ksize):

        super().__init__()
        fmap_size = to_2tuple(fmap_size)
        # depths：幾層att
        self.depths = depths
        # embedding dim要能整除head
        hc = dim_embed // heads
        assert dim_embed == heads * hc

        # 轉換成embedding dim
        self.proj = nn.Conv2d(dim_in, dim_embed, 1, 1, 0) if dim_in != dim_embed else nn.Identity()
        # 使用甚麼layer
        self.stage_spec = stage_spec

        
        self.layer_norms = nn.ModuleList(
            [LayerNormProxy(dim_embed)  for d in range(2 * depths)]
        )

        mlp_fn = TransformerMLP

        self.mlps = nn.ModuleList(
            [ 
                mlp_fn(dim_embed, expansion, drop) for _ in range(depths)
            ]
        )
        self.attns = nn.ModuleList()
        self.drop_path = nn.ModuleList()
        

        for i in range(depths):
            if stage_spec[i] == 'L':
                self.attns.append(
                    LocalAttention(dim_embed, heads, window_size, attn_drop, proj_drop)
                )
            elif stage_spec[i] == 'D':
                self.attns.append(
                    DAttentionBaseline(heads, 
                    hc, n_groups, attn_drop, proj_drop, 
                    stride, ksize)
                )
            elif stage_spec[i] == 'S':
                shift_size = math.ceil(window_size / 2)
                self.attns.append(
                    ShiftWindowAttention(dim_embed, heads, window_size, attn_drop, proj_drop, shift_size, fmap_size)
                )
            else:
                raise NotImplementedError(f'Spec: {stage_spec[i]} is not supported.')
        # self.drop_path.append(DropPath(drop_path_rate[i]) if drop_path_rate[i] > 0.0 else nn.Identity())         

    def forward(self, x):
        # 轉換成embedding dim
        x = self.proj(x)
        # transformer block
        for d in range(self.depths):
            if True:
                x0 = x
                # attention
                x = self.attns[d](self.layer_norms[2 * d](x))
                # x = self.drop_path[d](x) + x0
                x0 = x
                x = self.mlps[d](self.layer_norms[2 * d + 1](x))
                # x = self.drop_path[d](x) + x0
        return x

In [7]:
class LayerNormProxy(nn.Module):
    
    def __init__(self, dim):
        
        super().__init__()
        self.norm = nn.LayerNorm(dim)

    def forward(self, x):
        x = einops.rearrange(x, 'b c h w -> b h w c')
        x = self.norm(x)
        return einops.rearrange(x, 'b h w c -> b c h w')

In [8]:
# group要能被channel整除
class DAT(nn.Module):

    def __init__(self, img_size=128, patch_size=4, num_classes=50, expansion=4,
                 dim_stem=64, dims=[64, 128, 256, 512], depths=[1, 1, 1, 1], 
                 heads=[2, 4, 8, 16],
                 window_sizes=[4, 4, 4, 4],
                 drop_rate=0.0, attn_drop_rate=0.0,
                 strides=[8, 4, 2, 1],
                 stage_spec=[['D'], ['D'], ['D'], ['D']], 
                 groups=[1, 2, 4, 8],
                 use_dwc_mlps=[False, False, False, False],
                 ksizes=[9, 7, 5, 3],
                 **kwargs):
        
        super().__init__()
        # dim_stem = C
        self.patch_proj = nn.Sequential(
            nn.Conv2d(3, dim_stem, patch_size, patch_size, 0),
            # channel norm
            LayerNormProxy(dim_stem)
        )
        # 新spatial size
        img_size = img_size // patch_size
        

        self.stages = nn.ModuleList()
        for i in range(4):
            # 各層的dim
            dim1 = dim_stem if i == 0 else dims[i - 1] * 2
            dim2 = dims[i]
            self.stages.append(
                
                TransformerStage(
                    img_size, window_sizes[i],
                    dim1, dim2, depths[i],
                    stage_spec[i], groups[i], heads[i], strides[i],
                    
                    attn_drop_rate, drop_rate, expansion, drop_rate, use_dwc_mlps[i],
                    ksizes[i]
                )
            )
            # 每層後spatial size/=2
            img_size = img_size // 2

        self.down_projs = nn.ModuleList()
        for i in range(3):
            self.down_projs.append(
            # stride為2的醬採樣
               nn.Sequential(
                    nn.Conv2d(dims[i], dims[i + 1], 2, 2, 0, bias=False),
                    LayerNormProxy(dims[i + 1])
                )
            )
        
        self.cls_norm = LayerNormProxy(dims[-1]) 
        self.cls_head = nn.Linear(dims[-1], num_classes)

        self.reset_parameters()

    def reset_parameters(self):

        for m in self.parameters():
            if isinstance(m, (nn.Linear, nn.Conv2d)):
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)


    def forward(self, x):
        # patch embedding
        x = self.patch_proj(x)
        # 4 stage
        for i in range(4):
            x = self.stages[i](x)
            # down
            if i < 3:
                x = self.down_projs[i](x)
        # normalize
        x = self.cls_norm(x)
        # pool
        x = F.adaptive_avg_pool2d(x, 1)
        # classifier
        x = torch.flatten(x, 1)
        x = self.cls_head(x)
        return x


In [9]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=50):
        super(SimpleCNN, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1), # [32, 128, 128]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 16, 16]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [256, 8, 8]
        )
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [34]:
class DATCNN(nn.Module):
    def __init__(self, num_classes=50):
        super(DATCNN, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1), # [32, 128, 128]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]

            # nn.Conv2d(128, 256, 3, 1, 1), # [256, 16, 16]
            # nn.BatchNorm2d(256),
            # nn.ReLU(),
            # nn.MaxPool2d(2, 2, 0),       # [256, 8, 8]
        )
        n_heads = 4
        self.dat = DAttentionBaseline(n_heads=n_heads, n_head_channels=128//n_heads, n_groups=2, attn_drop=0, proj_drop=0, stride=1, ksize=3)
        self.nn2 = nn.Sequential(
            LayerNormProxy(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
        )
        self.fc = nn.Sequential(
            nn.Linear(128*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = self.dat(out)
        out = self.nn2(out)
        out = out.contiguous().view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [38]:
class DATCNN(nn.Module):
    def __init__(self, num_classes=50):
        super(DATCNN, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1), # [32, 128, 128]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]

            # nn.Conv2d(128, 256, 3, 1, 1), # [256, 16, 16]
            # nn.BatchNorm2d(256),
            # nn.ReLU(),
            # nn.MaxPool2d(2, 2, 0),       # [256, 8, 8]
        )
        n_heads = 4
        self.dat = DAttentionBaseline(n_heads=n_heads, n_head_channels=128//n_heads, n_groups=2, attn_drop=0, proj_drop=0, stride=1, ksize=3)
        self.norm = LayerNormProxy(128)
        self.nn2 = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
        )
        self.fc = nn.Sequential(
            nn.Linear(128*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = self.norm(self.dat(out)) + out
        out = self.nn2(out)
        out = out.contiguous().view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [45]:
class DATCNN_2(nn.Module):
    def __init__(self, num_classes=50):
        super(DATCNN_2, self).__init__()
        
        self.cnn_1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]
        )
        
        self.cnn_2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]
        )
        n_heads = 2
        self.dat_1 = nn.Sequential(
            DAttentionBaseline(n_heads=n_heads, n_head_channels=64//n_heads, n_groups=1, attn_drop=0, proj_drop=0, stride=4, ksize=7),# [64, 64, 64]
            LayerNormProxy(64),
        )
        self.down_1 = nn.Sequential(
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),# [64, 32, 32]
        )
        n_heads = 4
        self.dat_2 = nn.Sequential(
            DAttentionBaseline(n_heads=n_heads, n_head_channels=128//n_heads, n_groups=2, attn_drop=0, proj_drop=0, stride=1, ksize=3),# [128, 16, 16]
            LayerNormProxy(128)
        )
        self.down_2 = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),# [128, 8, 8]
        )
        self.fc = nn.Sequential(
            nn.Linear(128*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        out = self.cnn_1(x)
        out = self.down_1(self.dat_1(out)+out)
        out = self.cnn_2(out)
        out = self.down_2(self.dat_2(out)+out)
        out = out.contiguous().view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [58]:
class DATCNN_3(nn.Module):
    def __init__(self, num_classes=50):
        super(DATCNN_3, self).__init__()
        
        self.cnn_1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]
        )
        
        self.cnn_2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]
        )
        n_heads = 4
        self.dat_1 = nn.Sequential(
            DAttentionBaseline(n_heads=n_heads, n_head_channels=128//n_heads, n_groups=4, attn_drop=0, proj_drop=0, stride=2, ksize=5),# [128, 32, 32]
            LayerNormProxy(128)
        )
        self.down_1 = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),# [128, 16, 16]
        )
        n_heads = 8
        self.dat_2 = nn.Sequential(
            DAttentionBaseline(n_heads=n_heads, n_head_channels=128//n_heads, n_groups=8, attn_drop=0, proj_drop=0, stride=1, ksize=3),# [128, 16, 16]
            LayerNormProxy(128)
        )
        self.down_2 = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),# [128, 8, 8]
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        out = self.cnn_1(x)
        out = self.cnn_2(out)
        out = self.down_1(self.dat_1(out)+out)
        out = self.down_2(self.dat_2(out)+out)
        out = out.contiguous().view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [10]:
def load_img(f):
    shapes = []
    f=open(f)
    lines=f.readlines()
    imgs, lab=[], []
    for i in range(len(lines)):
        fn, label = lines[i].split(' ')
        im1=cv2.imread(fn)

        if im1.shape[2] not in shapes:
            shapes.append(im1.shape[2])
        # im1=cv2.resize(im1, (img_size,img_size))
        # im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)

        # im1 = preprocessing(im1, op_list)
        # vec = np.reshape(im1, [-1])

        imgs.append(im1)
        lab.append(int(label))
    print(i)

    # imgs= np.asarray(imgs, np.uint8)
    lab= np.asarray(lab, np.uint8)
    # print(shapes)
    return imgs, lab


In [11]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msunny2021137[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:

    #############
    eval_time = 1
    num_epoch = 30
    num_classes = 50
    img_size = 144
    batch_size = 128
    lr = 0.001
    

    # BATCH_SIZE = 64
    # EPOCHS = 10
    # LEARNING_RATE = 0.01
    
    img_shape = [3,128,128]
    patch_size = [16,16]
    hidden_dim = 8
    num_heads = 3

    #############




In [13]:
    x, y = load_img('train.txt')
    vx, vy = load_img('val.txt')
    tx, ty = load_img('test.txt')

63324
449
449


In [14]:
    print("--3--")
    # training 時做 data augmentation
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((144, 144)),  # 縮放
        transforms.RandomRotation(degrees=30),  # 旋轉
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # 平移
        transforms.RandomCrop(128),  # 隨機裁剪
        transforms.RandomHorizontalFlip(),  # 水平翻轉
        transforms.ToTensor(),  # 轉換為Tensor
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 標準化

    ])
    # testing 時不需做 data augmentation
    test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((144, 144)),  # 縮放
        transforms.CenterCrop(128),  # 中心裁剪
        transforms.ToTensor(),  # 轉換為Tensor
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 標準化
    ])

--3--


In [15]:
from PIL import Image

class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X



In [16]:
    train_set = ImgDataset(x, y, train_transform)
    val_set = ImgDataset(vx, vy, test_transform)
    test_set = ImgDataset(tx, ty, test_transform)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, worker_init_fn=lambda _: np.random.seed(42))
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)



In [56]:
    run = wandb.init(
    # Set the project where this run will be logged
    project="task2_2",
    # Track hyperparameters and run metadata
    config={
        "model": "dat",
        "learning_rate": lr,
        "epochs": num_epoch,
        "img_size": img_size,
    },)
    wandb.define_metric("Train/epoch")
    wandb.define_metric("Train/*", step_metric="Train/epoch")
    wandb.define_metric("Val/epoch")
    wandb.define_metric("Val/*", step_metric="Val/epoch")

<wandb.sdk.wandb_metric.Metric at 0x7f6c40fb11f0>

In [None]:

    # print("--4--")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    model = DATCNN_3(num_classes=50).to(device)

    loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) # optimizer 使用 Adam


    for epoch in range(num_epoch):
        print(epoch)
        epoch_start_time = time.time()
        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0

        model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
        for i, data in enumerate(train_loader):
            optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
            train_pred = model(data[0].to(device)) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
            batch_loss = loss(train_pred, data[1].to(device)) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
            batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
            optimizer.step() # 以 optimizer 用 gradient 更新參數值

            train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += batch_loss.item()

        if True:
            model.eval()
            with torch.no_grad():
                for i, data in enumerate(val_loader):
                    val_pred = model(data[0].to(device))
                    batch_loss = loss(val_pred, data[1].to(device))

                    val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                    val_loss += batch_loss.item()

                
                wandb.log({"Train/epoch": epoch,
                            "Train/acc": train_acc/train_set.__len__(),
                           "Train/loss": train_loss/train_set.__len__(),
                           "Val/epoch": epoch,
                           "Val/acc": val_acc/val_set.__len__(),
                           "Val/loss": val_loss/val_set.__len__(),
                          })

    print("--5--")
    model.eval()
    test_acc = 0.0
    test_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            test_pred = model(data[0].to(device))
            batch_loss = loss(test_pred, data[1].to(device))
            test_acc += np.sum(np.argmax(test_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            test_loss += batch_loss.item()
        wandb.log({"Test/test acc": test_acc/test_set.__len__(),
                  "Test/loss": test_loss/test_set.__len__(),})
    run.finish()

0
1
2
3
4
5
6
7
8
9
10


In [None]:
import gc
del model
gc.collect()

In [None]:
    import torchvision.models as models
    # print("--4--")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    # 加载 ResNet34 模型
    model = models.resnet34()
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)  # 设置最后一层输出为分类数目
    model.to(device)
    loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) # optimizer 使用 Adam


    for epoch in range(num_epoch):
        print(epoch)
        epoch_start_time = time.time()
        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0

        model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
        for i, data in enumerate(train_loader):
            optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
            train_pred = model(data[0].to(device)) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
            batch_loss = loss(train_pred, data[1].to(device)) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
            batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
            optimizer.step() # 以 optimizer 用 gradient 更新參數值

            train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += batch_loss.item()

        if True:
            model.eval()
            with torch.no_grad():
                for i, data in enumerate(val_loader):
                    val_pred = model(data[0].to(device))
                    batch_loss = loss(val_pred, data[1].to(device))

                    val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                    val_loss += batch_loss.item()

                
                wandb.log({"Train/epoch": epoch,
                            "Train/acc": train_acc/train_set.__len__(),
                           "Train/loss": train_loss/train_set.__len__(),
                           "Val/epoch": epoch,
                           "Val/acc": val_acc/val_set.__len__(),
                           "Val/loss": val_loss/val_set.__len__(),
                          })

    print("--5--")
    model.eval()
    test_acc = 0.0
    test_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            test_pred = model(data[0].to(device))
            batch_loss = loss(test_pred, data[1].to(device))
            test_acc += np.sum(np.argmax(test_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            test_loss += batch_loss.item()
        wandb.log({"Test/test acc": test_acc/test_set.__len__(),
                  "Test/loss": test_loss/test_set.__len__(),})
    run.finish()

0
1
2
3
4
5
6


In [None]:
import gc
del model
gc.collect()