In [None]:
# data
import glob
import os
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import cv2 as cv
from config import load_config
from torchvision import transforms

train_transforms = A.Compose(
    [
        A.Resize(height=384,width=384),
    ]
)

class DeeperDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
        self.as_tensor = transforms.Compose([
            transforms.ToTensor(),
            ])

    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        #print(img_path)
        img_transformed = self.transform(image = np.array(img))
        img_transformed = self.as_tensor(img_transformed['image'])
        label = img_path.split("/")[-1].split(".")[0].split("_")[-1]
        label = 1 if label == "fake" else 0

        return img_transformed, label

def load_data():

    train_dir = './'
    train_list = glob.glob(os.path.join(train_dir,'*.png'))
    train_data = DeeperDataset(train_list, transform=train_transforms)
    train_loader = DataLoader(dataset = train_data, batch_size=1, shuffle=True)

    return train_loader

train_loader = load_data()
# for data,label in train_loader:

In [21]:
import timm
from pprint import pprint
model_names = timm.list_models(pretrained=True)
pprint(model_names)

['adv_inception_v3',
 'cspdarknet53',
 'cspresnet50',
 'cspresnext50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'densenetblur121d',
 'dla34',
 'dla46_c',
 'dla46x_c',
 'dla60',
 'dla60_res2net',
 'dla60_res2next',
 'dla60x',
 'dla60x_c',
 'dla102',
 'dla102x',
 'dla102x2',
 'dla169',
 'dm_nfnet_f0',
 'dm_nfnet_f1',
 'dm_nfnet_f2',
 'dm_nfnet_f3',
 'dm_nfnet_f4',
 'dm_nfnet_f5',
 'dm_nfnet_f6',
 'dpn68',
 'dpn68b',
 'dpn92',
 'dpn98',
 'dpn107',
 'dpn131',
 'ecaresnet26t',
 'ecaresnet50d',
 'ecaresnet50d_pruned',
 'ecaresnet50t',
 'ecaresnet101d',
 'ecaresnet101d_pruned',
 'ecaresnet269d',
 'ecaresnetlight',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b1_pruned',
 'efficientnet_b2',
 'efficientnet_b2_pruned',
 'efficientnet_b2a',
 'efficientnet_b3',
 'efficientnet_b3_pruned',
 'efficientnet_b3a',
 'efficientnet_em',
 'efficientnet_es',
 'efficientnet_lite0',
 'ens_adv_inception_resnet_v2',
 'ese_vovnet19b_dw',
 'ese_vovnet39b',
 'fbnetc_100',
 'gernet_

In [22]:
# VIT_base_patch32_384
VIT = timm.create_model('vit_base_patch16_384',pretrained=False)
print(VIT)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (

In [25]:
# ResNet
ResNet = timm.create_model('resnetv2_50x1_bitm')
print(ResNet)

ResNetV2(
  (stem): Sequential(
    (conv): StdConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (pad): ConstantPad2d(padding=(1, 1, 1, 1), value=0.0)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (stages): Sequential(
    (0): ResNetStage(
      (blocks): Sequential(
        (0): PreActBottleneck(
          (downsample): DownsampleConv(
            (conv): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm): Identity()
          )
          (norm1): GroupNormAct(
            32, 64, eps=1e-05, affine=True
            (act): ReLU(inplace=True)
          )
          (conv1): StdConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): GroupNormAct(
            32, 64, eps=1e-05, affine=True
            (act): ReLU(inplace=True)
          )
          (conv2): StdConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (norm3)

In [23]:
# ResNet+VIT
resVIT = timm.create_model('vit_base_resnet50_384',pretrained=True)
print(resVIT)

VisionTransformer(
  (patch_embed): HybridEmbed(
    (backbone): ResNetV2(
      (stem): Sequential(
        (conv): StdConv2dSame(3, 64, kernel_size=(7, 7), stride=(2, 2), bias=False)
        (norm): GroupNormAct(
          32, 64, eps=1e-05, affine=True
          (act): ReLU(inplace=True)
        )
        (pool): MaxPool2dSame(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=True)
      )
      (stages): Sequential(
        (0): ResNetStage(
          (blocks): Sequential(
            (0): Bottleneck(
              (downsample): DownsampleConv(
                (conv): StdConv2dSame(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (norm): GroupNormAct(
                  32, 256, eps=1e-05, affine=True
                  (act): Identity()
                )
              )
              (conv1): StdConv2dSame(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (norm1): GroupNormAct(
                32, 64, eps=1e-05, a

In [24]:
# DenseNet
import torch.nn as nn
import torch.nn.functional as F
DenseNet = timm.create_model('densenet169',pretrained=False)
print(DenseNet)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNormAct2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (act): ReLU(inplace=True)
    )
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): DenseBlock(
      (denselayer1): DenseLayer(
        (norm1): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): ReLU(inplace=True)
        )
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNormAct2d(
          128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): ReLU(inplace=True)
        )
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): DenseLayer(
        (norm1): BatchNormAct2d(
          96, eps=1

In [114]:

class Dense(nn.Module):
    
    def __init__(self,conv0,norm0,pool0,block1,trans1,block2,trans2):
        
        super(Dense,self).__init__()
        self.conv0 = conv0
        self.norm0 = norm0
        self.pool0 = pool0
        self.block1 = block1
        self.trans1 = trans1
        self.block2 = block2
        self.trans2 = trans2
    
    def forward(self,x):
        conv0 = self.conv0(x)
        norm0 = self.norm0(conv0)
        pool0 = self.pool0(norm0)
        denseblock1 = self.block1(pool0)
        transition1 = self.trans1(denseblock1)
        denseblock2 = self.block2(transition1)
        transition2 = self.trans2(denseblock2)
        bn = nn.BatchNorm2d(256)(transition2)
        act = nn.ReLU()(bn)
        
        return output
            

In [115]:
Dense_backbone = Dense(DenseNet.features.conv0,DenseNet.features.norm0,DenseNet.features.pool0,DenseNet.features.denseblock1,DenseNet.features.transition1,DenseNet.features.denseblock2,DenseNet.features.transition2)
print(Dense_backbone)

Dense(
  (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (norm0): BatchNormAct2d(
    64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (act): ReLU(inplace=True)
  )
  (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (block1): DenseBlock(
    (denselayer1): DenseLayer(
      (norm1): BatchNormAct2d(
        64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (act): ReLU(inplace=True)
      )
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (norm2): BatchNormAct2d(
        128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (act): ReLU(inplace=True)
      )
      (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (denselayer2): DenseLayer(
      (norm1): BatchNormAct2d(
        96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (act): ReLU(

In [116]:
# DenseNetVIT
resVIT.patch_embed.backbone = Dense_backbone

In [117]:
resVIT.patch_embed.proj = nn.Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))

In [118]:
for data,label in train_loader:
    output = resVIT(data)
    print(output.shape)

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [768, 256, 1, 1], but got 2-dimensional input of size [1, 1000] instead

In [7]:
# DenseNet + 边缘 + VIT

In [8]:
# DenseNet + 边缘 + VIT + loss