In [7]:
import os
import datetime
import shutil
import logging
import yaml
import importlib
import time
from path import Path
from abc import ABC, abstractmethod
from PIL import Image as Im
import numpy as np
import torch.nn.functional as F
import tqdm

from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib import cm

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchsummary import summary
from tensorboardX import SummaryWriter

import torch.distributed as dist
from torch.utils.data.distributed import DistributedSampler

from dataloader import aachen_loader
from torch.utils.data import DataLoader

from feature_descriptors import backbone
from feature_descriptors import detection_net
# from feature_descriptors import my_model

from tqdm import tqdm
import cv2
import copy
import matplotlib
import matplotlib.pyplot as plt

In [8]:
def window_partition(x, window_size):
    # B, H, W, C : x.size -> B*Window_num, window_size, window_size, C
    B, H, W, C = x.size() 
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    return windows

In [16]:
class Mlp(nn.Module):
    def __init__(self, C, ffn_dim, act_layer=nn.GELU, drop=0.1):
        super().__init__()
        self.fc1 = nn.Linear(C, ffn_dim)
        self.act = act_layer()
        self.fc2 = nn.Linear(ffn_dim, C)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x

In [17]:
class SwinTransformerLayer(nn.Module):
    def __init__(self, C, num_heads, window_size, ffn_dim, act_layer = nn.GELU, dropout = 0.1):
        super().__init__()
        self.mlp1 = Mlp(C, ffn_dim, act_layer=nn.GELU, drop=dropout)
        self.mlp2 = Mlp(C, ffn_dim, act_layer=nn.GELU, drop=dropout)

        self.norm1 = nn.LayerNorm(C)
        self.norm2 = nn.LayerNorm(C)
        self.norm3 = nn.LayerNorm(C)
        self.norm4 = nn.LayerNorm(C)


        self.shift_size = window_size // 2
        self.window_size = window_size
        self.W_MSA = SwinAttention(num_heads=num_heads, C=C, dropout=dropout )
        self.SW_MSA = SwinAttention(num_heads=num_heads, C=C, dropout=dropout )

    def forward(self, x): # BS, L, C
        BS, L, C = x.shape 
        S = int(math.sqrt(L))

        shortcut = x

        x = self.norm1(x) # BS, L, C

        x_windows = self.window_to_attention(x, S, C)

        attn_x = self.W_MSA(x_windows)

        x = self.attention_to_og(attn_x, S, C)

        x = x + shortcut

        shorcut = x

        x = self.norm2(x)
        x = self.mlp1(x)

        x = x + shortcut

        shortcut = x

        x = self.norm3(x)

        x_windows = self.window_to_attention(x, S, C ,shift=True) # cyclic shift for SW_MSA

        x_attn = self.SW_MSA(x_windows)

        x = self.attention_to_og(x, S, C ,shift=True) # reverse cyclic shift for SW_MSA

        x = x+ shortcut

        shortcut = x

        x = self.norm4(x)
        x = self.mlp2(x)

        return x + shortcut

    def window_to_attention(self, x, S, C, shift = False):
        x = x.view(-1, S, S, C)
        if shift :
            x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
        x_windows = window_partition(x, self.window_size)
        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)
        return x_windows

    def attention_to_og(self, attn_x, S, C,shift=False):
        attn_x = attn_x.view(-1, self.window_size, self.window_size, C)
        x = window_reverse(attn_x, self.window_size, S, S)
        if shift :
            x = torch.roll(x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
        x = x.view(-1, S*S, C)
        return x
class SwinAttention(nn.Module):
    def __init__(self, num_heads, C, dropout):
        super().__init__()

        self.scale = C ** -0.5

        self.qkv = nn.Linear(C, C * 3, bias=True)
        self.num_heads = num_heads

        self.softmax = nn.Softmax(dim=-1)

        self.attn_drop = nn.Dropout(0.1)

        self.proj = nn.Linear(C, C)
        self.proj_drop = nn.Dropout(0.1)

    def forward(self ,x):# BS, L, C
        # x = [B, H, W, C]
        B, L, C = x.shape


        qkv = self.qkv(x).reshape(B, L, 3, self.num_heads, C//self.num_heads).permute(2,0,3,1,4) # 3, B, Head, L, C_v

        q, k, v= qkv[0], qkv[1], qkv[2]

        q = q*self.scale

        attn = (q @ k.transpose(-1,-2)) # dot product


        """
        여기서부터 attention 작업
        """

        attn_score = self.softmax(attn)
        attn_score = self.attn_drop(attn_score) # L, L
        # B, Head, L, C_v

        out = (attn @ v).transpose(1,2).flatten(-2) # B, L, C 


        out = self.proj(out)
        out = self.proj_drop(out)

        return out

In [18]:
net = SwinTransformerLayer(C=32, num_heads=16, window_size=224, ffn_dim=5120)

In [19]:
summary(net, (3, 224, 224))

NVIDIA GeForce RTX 3090 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the NVIDIA GeForce RTX 3090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



ValueError: too many values to unpack (expected 3)