In [40]:
timm.list_models()

['adv_inception_v3',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_224_in22k',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_224_in22k',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'botnet26t_256',
 'botnet50ts_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'convmixer_768_32',
 'convmixer_1024_20_ks9_p14',
 'convmixer_1536_20',
 'convnext_base',
 'convnext_base_384_in22ft1k',
 'convnext_base_in22ft1k',
 'convnext_base_in22k',
 'convnext_large',
 'convnext_large_384_in22ft1k',
 'convnext_large_in22ft1k',
 'convnext_large_in22k',
 'convnext_small',
 'convnext_tiny',
 'convnext_tiny_hnf',
 'convnext_xlarge_384_in22ft1k',
 'convnext_xlarge_in22ft1k',
 'convnext_x

In [2]:
import cv2
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from glob import glob
from tqdm import tqdm
from easydict import EasyDict
import timm
from sklearn.model_selection import train_test_split, StratifiedKFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


# forward_features 시 LayerNorm까지 통과한 결과임

In [3]:
class BackBone(nn.Module) :
    def __init__(self, model_name, num_classes) :
        super(BackBone, self).__init__()
        self.model = timm.create_model(model_name=model_name, num_classes=num_classes, pretrained=True)
    
    def forward(self, x) :
        output = self.model(x)
        return output

In [None]:
class MLP(nn.Module) :
    def __init__(self, in_features, dropout_rate, num_state) :
        super(MLP, self).__init__()
        #forward_features 시 LayerNorm까지 통과한 결과임
        # 따라서 LayerNorm 와 AdaptiveAvgPool1d는 필요없음    
        self.linear_1 = nn.Linear(in_features=in_features, out_features=in_features/2, bias=True)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(p=dropout_rate, inplace=False)
        self.linear_2 = nn.Linear(in_features=in_features/2, out_features=num_state, bias=True)
        
    def forward(self, x):
        x = self.linear_1(x)
        x = self.gelu(x)
        x = self.dropout(x)
        x = self.linear_2(x)
        return x

In [None]:
# class MLP(nn.Module) :
#     def __init__(self, in_features, dropout_rate, num_class, num_state) :
#         super(MLP, self).__init__()
#         self.num_class = num_class
#         self.mlps = [MLP_state(in_features, dropout_rate, num_state[i]) for i in range(num_class)]
    
#     def forward(self, x) :
#         outputs = [self.mlps[i](x) for i in range(num_class)]
#         return outputs
        

In [None]:
class CustomSwinTransformer(nn.Mudule) :
    def __init__(self, model_path, model_name, num_classes, num_class, num_state) :
        self.backbone = self.get_backbone(model_path,
                                         model_name,
                                         num_classes)
        self.mlps = [MLP_state(in_features=1024, dropout_rate, num_state[i]) for i in range(num_class)]
#         self.MLP = MLP(in_features=1024,
#                       dropout_rate=0.5,
#                       num_class=num_class,
#                       num_state=num_state)

    def forward(self, x) :
        feature_map = self.backbone.forward_features(x)
        
        pred = self.backbone(x)
        pred = torch.argmax(pred, dim=1)
        labels = list(map(lambda x : label_decoder[x.item()], pred))

    def get_backbone(self, model_path, model_name, num_classes) :
        checkpoint = torch.load(model_path)
        backbone = BackBone(model_name, num_classes)
        backbone.load_state_dict(checkpoint["model_state_dict"])
        return backbone.model

In [3]:
train_df = pd.read_csv("../data/train_df.csv")
labels = list(sorted(train_df['label'].unique()))
label_decoder = {i : k for i, k in enumerate(labels)}
label_decoder

{0: 'bottle-broken_large',
 1: 'bottle-broken_small',
 2: 'bottle-contamination',
 3: 'bottle-good',
 4: 'cable-bent_wire',
 5: 'cable-cable_swap',
 6: 'cable-combined',
 7: 'cable-cut_inner_insulation',
 8: 'cable-cut_outer_insulation',
 9: 'cable-good',
 10: 'cable-missing_cable',
 11: 'cable-missing_wire',
 12: 'cable-poke_insulation',
 13: 'capsule-crack',
 14: 'capsule-faulty_imprint',
 15: 'capsule-good',
 16: 'capsule-poke',
 17: 'capsule-scratch',
 18: 'capsule-squeeze',
 19: 'carpet-color',
 20: 'carpet-cut',
 21: 'carpet-good',
 22: 'carpet-hole',
 23: 'carpet-metal_contamination',
 24: 'carpet-thread',
 25: 'grid-bent',
 26: 'grid-broken',
 27: 'grid-glue',
 28: 'grid-good',
 29: 'grid-metal_contamination',
 30: 'grid-thread',
 31: 'hazelnut-crack',
 32: 'hazelnut-cut',
 33: 'hazelnut-good',
 34: 'hazelnut-hole',
 35: 'hazelnut-print',
 36: 'leather-color',
 37: 'leather-cut',
 38: 'leather-fold',
 39: 'leather-glue',
 40: 'leather-good',
 41: 'leather-poke',
 42: 'metal_nut

In [5]:
model = BackBone('swin_base_patch4_window7_224_in22k', 88)
checkpoint = torch.load("../model/swin_aug_v4_mixup/19E_0.0394_swin_base_patch4_window7_224_in22k.pt")
model.load_state_dict(checkpoint["model_state_dict"])
verified_model = model.model
del model

train_df = pd.read_csv("../data/train_df.csv")
labels = list(sorted(train_df['label'].unique()))
label_decoder = {i : k for i, k in enumerate(labels)}


img = cv2.imread("../data/train/aug_v5/carpet-color/aug_8_11065.png")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

v_transforms = A.Compose([
    A.Normalize(),
    A.Resize(224, 224),
    ToTensorV2()
])

img = v_transforms(image=img)['image']
img= torch.unsqueeze(img,0)
pred = verified_model(img)
pred = torch.argmax(pred, dim=1)
display(pred)
labels = list(map(lambda x : label_decoder[x.item()], pred))
display(labels)
features = verified_model.forward_features(img)
display(features)
display(features.shape)
display(verified_model.default_cfg)
display(verified_model.default_cfg['num_classes'])

tensor([19])

['carpet-color']

tensor([[ 1.4045, -1.2245, -0.0029,  ...,  0.1885,  0.4477,  0.9608]],
       grad_fn=<ReshapeAliasBackward0>)

torch.Size([1, 1024])

{'url': 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth',
 'num_classes': 21841,
 'input_size': (3, 224, 224),
 'pool_size': None,
 'crop_pct': 0.9,
 'interpolation': 'bicubic',
 'fixed_input_size': True,
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'first_conv': 'patch_embed.proj',
 'classifier': 'head',
 'architecture': 'swin_base_patch4_window7_224_in22k'}

21841

In [8]:
checkpoint = torch.load("../model/swin_aug_v4_mixup/19E_0.0394_swin_base_patch4_window7_224_in22k.pt")
model.load_state_dict(checkpoint["model_state_dict"])
model

ABC(
  (model): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (0): BasicLayer(
        dim=128, input_resolution=(56, 56), depth=2
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=128, out_features=384, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=128, out_features=128, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
              (fc1)

In [9]:
for i in (model.parameters()) :
    print(i)

Parameter containing:
tensor([[[[ 3.5529e-02,  6.2973e-02, -2.9607e-02, -4.2225e-02],
          [ 1.3486e-02,  6.3659e-02,  1.5895e-02,  4.4560e-02],
          [-2.6367e-02, -9.5893e-03,  1.6560e-02,  6.6024e-03],
          [-6.0858e-02, -6.3047e-02, -7.8614e-02,  3.4480e-03]],

         [[-1.0920e-03,  3.1727e-02, -6.9573e-02, -8.2584e-02],
          [ 4.3398e-02,  9.7116e-02,  1.1044e-02,  4.3220e-02],
          [-1.4330e-02,  3.3674e-03,  1.8763e-02, -4.3324e-03],
          [-4.7983e-02, -2.5950e-02, -2.2483e-02,  3.6387e-02]],

         [[-4.0901e-02, -2.7044e-02, -7.1259e-02, -7.2735e-02],
          [-1.1772e-02,  4.7536e-02,  2.5983e-02,  4.5346e-02],
          [-6.5341e-03, -1.2049e-02,  3.0898e-02,  1.0964e-02],
          [-1.9473e-02,  3.3950e-03,  1.8762e-02,  3.6945e-02]]],


        [[[-1.0510e-01,  5.4497e-02,  7.9076e-02,  1.1355e-01],
          [-3.9726e-02,  1.1237e-02, -4.8774e-02, -3.9157e-02],
          [-1.6030e-02, -2.3818e-02, -1.0007e-01,  4.4518e-02],
          

Parameter containing:
tensor([0.6175, 0.7124, 0.7548, 0.8229, 0.7352, 0.8911, 0.7609, 0.7401, 0.7996,
        0.7493, 0.7161, 0.7091, 0.8132, 0.7979, 0.7833, 0.6676, 0.6734, 0.7748,
        0.8440, 0.7635, 0.8503, 0.7675, 0.7378, 0.7834, 0.6475, 0.7894, 0.7734,
        0.7843, 0.8176, 0.7215, 0.7711, 0.7271, 0.6923, 0.9119, 0.8605, 0.8265,
        0.7357, 0.7952, 0.7614, 0.8334, 0.7772, 0.7853, 0.7271, 0.7365, 0.7622,
        0.7017, 0.7119, 0.7713, 0.8260, 0.7540, 0.7439, 0.6803, 0.7463, 0.7878,
        0.7245, 0.7322, 0.6876, 0.7470, 0.7446, 0.6983, 0.8119, 0.7524, 0.5869,
        0.7036, 0.7305, 0.7313, 0.7716, 0.7562, 0.8103, 0.7922, 0.6424, 0.6280,
        0.7426, 0.6503, 0.7569, 0.5659, 0.8218, 0.7155, 0.7287, 0.7813, 0.7809,
        0.8132, 0.7962, 0.8213, 0.7467, 0.8185, 0.7879, 0.7902, 0.7235, 0.7847,
        0.6919, 0.7004, 0.7941, 0.7108, 0.7067, 0.7431, 0.7929, 0.8489, 0.7239,
        0.7974, 0.6965, 0.8372, 0.7206, 0.7937, 0.8162, 0.7835, 0.8600, 0.7191,
        0.8171, 0.

Parameter containing:
tensor([-2.0590e-02, -5.7541e-03,  5.8006e-03,  2.6935e-02, -5.8180e-02,
         8.0774e-01, -5.0591e-02, -1.9828e-01, -2.2867e-01, -3.5502e-01,
        -1.1917e-01,  8.3445e-02, -1.3160e-02, -2.1791e-01, -3.0191e-01,
         1.3659e-01, -6.5559e-02, -3.2308e-01, -2.8134e-01, -3.8315e-03,
        -1.8955e-01,  5.1702e-02,  4.3744e-01, -1.2508e-01,  3.9309e-02,
         3.7531e-01,  1.2360e-01,  4.6619e-01, -2.8950e-01,  2.6474e-01,
         2.4013e-01, -1.6279e-01,  1.1098e-01, -1.0370e-01, -3.8086e-02,
        -5.5207e-03, -3.8258e-01, -2.9054e-02, -2.2575e-01, -5.5888e-02,
        -3.0633e-02, -1.6107e-01, -1.9898e-01, -1.2534e-01,  5.5636e-01,
        -3.2059e-02,  2.3923e-01,  1.9332e-02,  4.7481e-01, -2.5287e-01,
         1.2583e-01, -1.6430e-01,  5.6932e-02,  5.2258e-02,  3.0234e-01,
         2.6172e-01,  1.6688e-01,  1.2040e-01, -3.8349e-01, -2.7505e-01,
        -3.8356e-01, -5.4918e-01,  8.6833e-03,  2.6987e-01, -5.0258e-01,
        -7.7027e-02, -1.1307e

In [7]:
test_data = torch.randn(1,3,224,224)
output = verified_model.forward_features(test_data)
output.shape

torch.Size([1, 1024])

In [10]:
verified_model.default_cfg

{'url': 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth',
 'num_classes': 21841,
 'input_size': (3, 224, 224),
 'pool_size': None,
 'crop_pct': 0.9,
 'interpolation': 'bicubic',
 'fixed_input_size': True,
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'first_conv': 'patch_embed.proj',
 'classifier': 'head',
 'architecture': 'swin_base_patch4_window7_224_in22k'}

In [13]:
test_data = torch.randn(16,224,224)
m = nn.AdaptiveAvgPool1d(output_size=1)
output_ = m(test_data)
output_.shape

torch.Size([16, 224, 1])

In [9]:
ln = nn.LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
output_ln = ln(output)
output_ln.shape

torch.Size([1, 1024])

In [6]:
test_data2 = torch.randn(1,1024,7,7)
abcde = m(test_data2)
abcde.shape

RuntimeError: Expected 2 to 3 dimensions, but got 4-dimensional tensor for argument #1 'self' (while checking arguments for adaptive_avg_pool1d)

In [12]:
wo = model.model
test_data = torch.randn(1,3,224,224)
output = wo.forward_features(test_data)
output

tensor([[-0.4669, -0.1555,  0.0109,  ..., -0.6617, -0.0997, -0.9116]],
       grad_fn=<ReshapeAliasBackward0>)

In [14]:
del model
wo

SwinTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
    (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (layers): Sequential(
    (0): BasicLayer(
      dim=128, input_resolution=(56, 56), depth=2
      (blocks): ModuleList(
        (0): SwinTransformerBlock(
          (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (attn): WindowAttention(
            (qkv): Linear(in_features=128, out_features=384, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=128, out_features=128, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
            (softmax): Softmax(dim=-1)
          )
          (drop_path): Identity()
          (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=128, out_features=512, bias=True)
     

In [15]:
for i in (wo.parameters()) :
    print(i)

Parameter containing:
tensor([[[[ 3.5529e-02,  6.2973e-02, -2.9607e-02, -4.2225e-02],
          [ 1.3486e-02,  6.3659e-02,  1.5895e-02,  4.4560e-02],
          [-2.6367e-02, -9.5893e-03,  1.6560e-02,  6.6024e-03],
          [-6.0858e-02, -6.3047e-02, -7.8614e-02,  3.4480e-03]],

         [[-1.0920e-03,  3.1727e-02, -6.9573e-02, -8.2584e-02],
          [ 4.3398e-02,  9.7116e-02,  1.1044e-02,  4.3220e-02],
          [-1.4330e-02,  3.3674e-03,  1.8763e-02, -4.3324e-03],
          [-4.7983e-02, -2.5950e-02, -2.2483e-02,  3.6387e-02]],

         [[-4.0901e-02, -2.7044e-02, -7.1259e-02, -7.2735e-02],
          [-1.1772e-02,  4.7536e-02,  2.5983e-02,  4.5346e-02],
          [-6.5341e-03, -1.2049e-02,  3.0898e-02,  1.0964e-02],
          [-1.9473e-02,  3.3950e-03,  1.8762e-02,  3.6945e-02]]],


        [[[-1.0510e-01,  5.4497e-02,  7.9076e-02,  1.1355e-01],
          [-3.9726e-02,  1.1237e-02, -4.8774e-02, -3.9157e-02],
          [-1.6030e-02, -2.3818e-02, -1.0007e-01,  4.4518e-02],
          

Parameter containing:
tensor([[-7.2756e-02,  3.6091e+00, -5.0409e-01,  ..., -5.7917e-01,
         -3.7488e-01, -2.4284e-01],
        [-2.8829e-02,  2.0551e+00, -5.6591e-01,  ..., -5.2333e-01,
         -6.6773e-01, -3.0600e-01],
        [-1.1287e-01,  1.8659e+00, -5.4818e-01,  ..., -3.5699e-01,
         -6.5563e-01, -2.7390e-01],
        ...,
        [-2.2278e-02,  2.7251e+00, -2.9355e-01,  ..., -6.7502e-01,
         -6.5894e-01, -3.1120e-01],
        [-3.4644e-03,  2.5767e+00, -2.2184e-01,  ..., -5.7866e-01,
         -3.5686e-01, -3.4993e-01],
        [ 9.6417e-02,  4.1309e+00, -6.0555e-01,  ..., -5.2148e-01,
         -1.2413e-01, -3.3047e-01]], requires_grad=True)
Parameter containing:
tensor([[ 0.0517,  0.1057,  0.0341,  ...,  0.0364, -0.0451, -0.0655],
        [-0.0999,  0.0775,  0.0603,  ...,  0.1256,  0.1121, -0.0295],
        [ 0.0525,  0.0121,  0.0473,  ...,  0.0489, -0.1134, -0.1177],
        ...,
        [ 0.0412,  0.0313, -0.0004,  ...,  0.0480,  0.0091, -0.0143],
        [ 0

Parameter containing:
tensor([3.2232, 3.6738, 3.7922, 3.7944, 3.8298, 3.4596, 3.3812, 3.4778, 4.1489,
        3.7249, 3.7815, 3.5800, 3.8344, 3.6255, 3.6047, 3.4446, 3.5413, 3.8504,
        3.5469, 3.8546, 3.8482, 3.8428, 3.4149, 3.6262, 3.3308, 3.8283, 3.7635,
        3.4998, 3.7928, 3.8692, 3.7473, 3.7356, 3.5675, 3.8434, 3.7653, 3.8459,
        4.0931, 3.7427, 4.0847, 3.4790, 3.7104, 3.8151, 4.0060, 3.8575, 3.8025,
        3.6283, 3.4647, 3.7042, 3.8451, 3.8445, 3.5204, 3.5887, 3.7454, 3.8957,
        3.5341, 3.6278, 3.6485, 3.7970, 3.6448, 3.4900, 3.9035, 4.0695, 4.7018,
        4.1577, 3.6779, 3.6362, 4.3864, 3.7675, 4.1487, 3.4621, 3.2331, 3.4603,
        3.7470, 3.7895, 3.9131, 3.1741, 4.1621, 3.7928, 3.6111, 3.5697, 4.0854,
        3.6675, 3.0552, 3.7895, 3.8048, 3.8125, 3.6237, 3.8653, 3.6442, 3.6869,
        3.7890, 3.6261, 3.5641, 3.5882, 4.2910, 3.9946, 3.7540, 3.4685, 3.4168,
        3.5247, 3.7108, 3.5720, 3.7135, 3.4605, 3.6816, 3.8482, 3.8804, 3.7407,
        3.6872, 3.

In [10]:
model.model.reset_classifier(0)


In [12]:
a = model
a

CNN(
  (model): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (0): BasicLayer(
        dim=128, input_resolution=(56, 56), depth=2
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=128, out_features=384, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=128, out_features=128, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
              (fc1)

In [34]:
test_data = torch.randn(1,3,224,224)
test_data.shape

torch.Size([1, 3, 224, 224])

In [35]:
output = model.forward_features(test_data)
output

TypeError: 'NoneType' object is not callable

In [27]:
output = model.forward_features(test_data)
output.shape

torch.Size([1, 1024])

In [22]:
def a (b):
    for i in range(len(b)) :
        b[i] = torch.tensor(b[i])
    return b

q,w,e,r,t= a([1,2,3,4,5])
print(q,w,e,r,t)

tensor(1) tensor(2) tensor(3) tensor(4) tensor(5)
