In [3]:
import torchsummary
import torch
import torch.nn as nn
import segmentation_models_pytorch as smp
from torchvision.models import efficientnet_b0, efficientnet_b1, efficientnet_b2, efficientnet_b3
from torchvision.models import EfficientNet_B0_Weights, EfficientNet_B1_Weights, EfficientNet_B2_Weights, EfficientNet_B3_Weights
from torchvision.models import efficientnet_v2_s, efficientnet_v2_m, efficientnet_v2_l
from torchvision.models import EfficientNet_V2_S_Weights,EfficientNet_V2_M_Weights,EfficientNet_V2_L_Weights

In [1]:
import torch
from torch import nn
from torchsummary import summary


def conv(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)


def upconv2x2(in_channels, out_channels, mode='transpose'):
    if mode == 'transpose':
        return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1)
    else:
        return nn.Sequential(nn.Upsample(mode='bilinear', scale_factor=2),
                             conv(in_channels, out_channels, kernel_size=1, stride=1, padding=0))


class UNetDownBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(UNetDownBlock, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        self.conv1 = conv(self.in_channels, self.out_channels, kernel_size=self.kernel_size, stride=self.stride)
        self.bn1 = nn.BatchNorm2d(self.out_channels)
        self.relu1 = nn.ReLU()

        self.conv2 = conv(self.out_channels, self.out_channels)
        self.bn2 = nn.BatchNorm2d(self.out_channels)
        self.relu2 = nn.ReLU()

    def forward(self, x):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))

        return x


class UNetUpBlock(nn.Module):
    def __init__(self, in_channels, out_channels, merge_mode='concat', up_mode='transpose'):
        super(UNetUpBlock, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.merge_mode = merge_mode
        self.up_mode = up_mode

        self.upconv = upconv2x2(self.in_channels, self.out_channels, mode=self.up_mode)

        if self.merge_mode == 'concat':
            self.conv1 = conv(2 * self.out_channels, self.out_channels)
        else:
            self.conv1 = conv(self.out_channels, self.out_channels)
        self.bn1 = nn.BatchNorm2d(self.out_channels)
        self.relu1 = nn.ReLU()
        self.conv2 = conv(self.out_channels, self.out_channels)
        self.bn2 = nn.BatchNorm2d(self.out_channels)
        self.relu2 = nn.ReLU()

    def forward(self, from_up, from_down):
        from_up = self.upconv(from_up)

        if self.merge_mode == 'concat':
            x = torch.cat((from_up, from_down), 1)
        else:
            x = from_up + from_down
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))

        return x


class UNet(nn.Module):
    def __init__(self, n_channels=3, merge_mode='concat', up_mode='transpose'):
        super(UNet, self).__init__()
        self.n_chnnels = n_channels
        self.merge_mode = merge_mode
        self.up_mode = up_mode

        self.down1 = UNetDownBlock(self.n_chnnels, 64, 3, 1, 1)
        self.down2 = UNetDownBlock(64, 128, 4, 2, 1)
        self.down3 = UNetDownBlock(128, 256, 4, 2, 1)
        self.down4 = UNetDownBlock(256, 512, 4, 2, 1)
        self.down5 = UNetDownBlock(512, 512, 4, 2, 1)

        self.up1 = UNetUpBlock(512, 512, merge_mode=self.merge_mode, up_mode=self.up_mode)
        self.up2 = UNetUpBlock(512, 256, merge_mode=self.merge_mode, up_mode=self.up_mode)
        self.up3 = UNetUpBlock(256, 128, merge_mode=self.merge_mode, up_mode=self.up_mode)
        self.up4 = UNetUpBlock(128, 64, merge_mode=self.merge_mode, up_mode=self.up_mode)

        self.conv_final = nn.Sequential(conv(64, 3, 3, 1, 1), nn.Sigmoid())

    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.down2(x1)
        x3 = self.down3(x2)
        x4 = self.down4(x3)
        x5 = self.down5(x4)

        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.conv_final(x)

        return x


if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = UNet().to(device)
    # print(str(model))
    summary(model, (3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
     UNetDownBlock-7         [-1, 64, 256, 256]               0
            Conv2d-8        [-1, 128, 128, 128]         131,200
       BatchNorm2d-9        [-1, 128, 128, 128]             256
             ReLU-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 128, 128, 128]         147,584
      BatchNorm2d-12        [-1, 128, 128, 128]             256
             ReLU-13        [-1, 128, 128, 128]               0
    UNetDownBlock-14        [-1, 128, 1

In [2]:
from torchvision.models import efficientnet_v2_s, efficientnet_v2_m, efficientnet_v2_l
from torchvision.models import EfficientNet_V2_S_Weights,EfficientNet_V2_M_Weights,EfficientNet_V2_L_Weights
# EfficientNet 기반 Autoencoder 정의
class EfficientNetV2SAutoencoder(nn.Module):
    def __init__(self):
        super(EfficientNetV2SAutoencoder, self).__init__()
        self.encoder = efficientnet_v2_s(EfficientNet_V2_S_Weights.DEFAULT)
        
        # Decoder 정의
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(1280, 640, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(640, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),  # pixel 값을 [0, 1] 범위로 맞추기 위해 사용
        )

    def forward(self, x):
        # Encoder를 통해 특징 추출
        x = self.encoder.features(x)
        # Decoder를 통해 재구성
        x = self.decoder(x)
        return x
    
class EfficientNetV2MAutoencoder(nn.Module):
    def __init__(self):
        super(EfficientNetV2MAutoencoder, self).__init__()
        self.encoder = efficientnet_v2_m(EfficientNet_V2_M_Weights.DEFAULT)
        
        # Decoder 정의
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(1280, 640, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(640, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),  # pixel 값을 [0, 1] 범위로 맞추기 위해 사용
        )

    def forward(self, x):
        # Encoder를 통해 특징 추출
        x = self.encoder.features(x)
        # Decoder를 통해 재구성
        x = self.decoder(x)
        return x
    
    
class EfficientNetV2LAutoencoder(nn.Module):
    def __init__(self):
        super(EfficientNetV2LAutoencoder, self).__init__()
        self.encoder = efficientnet_v2_l(EfficientNet_V2_L_Weights.DEFAULT)
        
        # Decoder 정의
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(1280, 640, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(640, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),  # pixel 값을 [0, 1] 범위로 맞추기 위해 사용
        )

    def forward(self, x):
        # Encoder를 통해 특징 추출
        x = self.encoder.features(x)
        # Decoder를 통해 재구성
        x = self.decoder(x)
        return x


In [7]:
class EfficientNetB0Decoder(nn.Module):
    def __init__(self):
        super(EfficientNetB0Decoder, self).__init__()
        # EfficientNet-b0을 encoder로 사용
        self.encoder = efficientnet_b0(EfficientNet_B0_Weights.DEFAULT)
        
        
        def CBR(in_ch, out_ch):
            return nn.Sequential(
                nn.ConvTranspose2d(in_ch, out_ch, kernel_size=2, stride=2),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(),
                nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(),
                nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_ch),
                nn.ReLU()
            )
            
        self.decoder = nn.Sequential(
            CBR(1280, 512),
            CBR(512, 256),
            CBR(256, 128),
            CBR(128, 64),
            CBR(64, 3),
            nn.Sigmoid()
        )


    def forward(self, x):
        # Encoder를 통해 특징 추출
        x = self.encoder.features(x)
        # Decoder를 통해 재구성
        x = self.decoder(x)
        return x

In [8]:
model = EfficientNetB0Decoder()



In [10]:
torchsummary.summary(model, (3,224,224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12         [-1, 32, 112, 112]               0
           Conv2d-13         [-1, 16, 112, 112]             512
      BatchNorm2d-14         [-1, 16, 1

In [22]:
model = EfficientNetV2SAutoencoder()

Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /home/leejinhyeok/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:07<00:00, 11.6MB/s]


In [20]:
torchsummary.summary(model,(3,224,224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]           9,216
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
   StochasticDepth-7         [-1, 32, 112, 112]               0
       FusedMBConv-8         [-1, 32, 112, 112]               0
            Conv2d-9         [-1, 32, 112, 112]           9,216
      BatchNorm2d-10         [-1, 32, 112, 112]              64
             SiLU-11         [-1, 32, 112, 112]               0
  StochasticDepth-12         [-1, 32, 112, 112]               0
      FusedMBConv-13         [-1, 32, 112, 112]               0
           Conv2d-14         [-1, 32, 1

In [12]:
from torchvision.models import efficientnet_b1
from torchvision.models import EfficientNet_B1_Weights
# EfficientNet 기반 Autoencoder 정의
class EfficientNetB1Autoencoder(nn.Module):
    def __init__(self):
        super(EfficientNetB1Autoencoder, self).__init__()
        # EfficientNet-b1을 encoder로 사용
        self.encoder = efficientnet_b1(EfficientNet_B1_Weights.DEFAULT)
        
        # Decoder 정의
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(1280, 640, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(640, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),  # pixel 값을 [0, 1] 범위로 맞추기 위해 사용
        )

    def forward(self, x):
        # Encoder를 통해 특징 추출
        x = self.encoder.features(x)
        # Decoder를 통해 재구성
        x = self.decoder(x)
        return x


In [13]:
model = EfficientNetB1Autoencoder()
torchsummary.summary(model,(3,256,256),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 128, 128]             864
       BatchNorm2d-2         [-1, 32, 128, 128]              64
              SiLU-3         [-1, 32, 128, 128]               0
            Conv2d-4         [-1, 32, 128, 128]             288
       BatchNorm2d-5         [-1, 32, 128, 128]              64
              SiLU-6         [-1, 32, 128, 128]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12         [-1, 32, 128, 128]               0
           Conv2d-13         [-1, 16, 128, 128]             512
      BatchNorm2d-14         [-1, 16, 1

In [57]:
import torch
import torch.nn as nn
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

class EfficientNetUNet(nn.Module):
    def __init__(self, num_classes=1):
        super(EfficientNetUNet, self).__init__()
        
        # EfficientNet Encoder
        encoder = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
        self.encoder_layers = nn.ModuleList(encoder.features)  # EfficientNet features layers
        
        # Output channel sizes from each encoder layer based on your provided sizes
        self.selected_layers = [0, 2, 3, 5, 8]  # 각 레이어의 출력이 중요하다고 판단될 경우 선택
        encoder_out_channels = [32, 24, 40, 112, 1280]
        # encoder_out_channels = [32, 16, 24, 40, 80, 112, 192, 320, 1280]

        # U-Net style decoder (UpConvs and Skip Connections)
        self.up_convs = nn.ModuleList([
            nn.ConvTranspose2d(encoder_out_channels[i], encoder_out_channels[i - 1], kernel_size=2, stride=2)
            for i in range(len(encoder_out_channels) - 1, 0, -1)
        ])
        self.decoders = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(encoder_out_channels[i-1] + encoder_out_channels[i-1], encoder_out_channels[i-1], kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Conv2d(encoder_out_channels[i-1], encoder_out_channels[i-1], kernel_size=3, padding=1),
                nn.ReLU()
            ) for i in range(len(encoder_out_channels) - 1, 0, -1)
        ])

        # Final Convolution
        self.final_conv = nn.Sequential(
            nn.ConvTranspose2d(encoder_out_channels[0], 3, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.Conv2d(3, num_classes, kernel_size=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        # Encoder
        enc_outputs = []
        for idx, layer in enumerate(self.encoder_layers):
            x = layer(x)
            if idx in self.selected_layers:  # 선택한 단계만 저장
                enc_outputs.append(x)

        # Decoder with skip connections
        for i in range(len(self.decoders)):
            x = self.up_convs[i](x)
            x = torch.cat([x, enc_outputs[-(i + 2)]], dim=1)  # Skip connection
            x = self.decoders[i](x)

        # Final Convolution
        x = self.final_conv(x)
        return x
# 모델 생성
model = EfficientNetUNet(num_classes=3)


In [58]:
model(torch.randn((33,3,224,224))).shape

torch.Size([33, 3, 224, 224])

In [59]:
torchsummary.summary(model,(3,224,224),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12         [-1, 32, 112, 112]               0
           Conv2d-13         [-1, 16, 112, 112]             512
      BatchNorm2d-14         [-1, 16, 1

In [6]:
encoder = efficientnet_b0(EfficientNet_B0_Weights.DEFAULT)



In [15]:
x=torch.randn((1,3,224,224))
for i in range(len(encoder.features)):
    x = encoder.features[i](x)
    print(x.shape)

torch.Size([1, 32, 112, 112])
torch.Size([1, 16, 112, 112])
torch.Size([1, 24, 56, 56])
torch.Size([1, 40, 28, 28])
torch.Size([1, 80, 14, 14])
torch.Size([1, 112, 14, 14])
torch.Size([1, 192, 7, 7])
torch.Size([1, 320, 7, 7])
torch.Size([1, 1280, 7, 7])


In [13]:
from torchvision.models.segmentation.deeplabv3 import deeplabv3_mobilenet_v3_large
import torchsummary

In [16]:
model = deeplabv3_mobilenet_v3_large(num_classes=3)

In [26]:
import torch
import torch.nn as nn
import segmentation_models_pytorch as smp

class EfficientNetb0Unet(nn.Module):
    def __init__(self):
        super(EfficientNetb0Unet, self).__init__()
        self.unet_model = smp.Unet(encoder_name='efficientnet-b0')
        self.unet_model.segmentation_head[0] = nn.Conv2d(16,3,3,1,1)
    def forward(self, x):
        x = self.unet_model(x)
        return x
# 모델 생성
model = EfficientNetb0Unet()


In [3]:
import torch
import torch.nn as nn
import segmentation_models_pytorch as smp
class EfficientNetB0Unet(nn.Module):
    def __init__(self):
        super(EfficientNetB0Unet, self).__init__()
        self.unet_model = smp.Unet(encoder_name='efficientnet-b0')
        self.unet_model.segmentation_head[0] = nn.Conv2d(16,3,3,1,1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x = self.unet_model(x)
        x = self.sigmoid(x)
        return x
    
class EfficientNetB1Unet(nn.Module):
    def __init__(self):
        super(EfficientNetB1Unet, self).__init__()
        self.unet_model = smp.Unet(encoder_name='efficientnet-b1')
        self.unet_model.segmentation_head[0] = nn.Conv2d(16,3,3,1,1)
    def forward(self, x):
        x = self.unet_model(x)
        return x
    

class EfficientNetB2Unet(nn.Module):
    def __init__(self):
        super(EfficientNetB2Unet, self).__init__()
        self.unet_model = smp.Unet(encoder_name='efficientnet-b2')
        self.unet_model.segmentation_head[0] = nn.Conv2d(16,3,3,1,1)
    def forward(self, x):
        x = self.unet_model(x)
        return x
    

class EfficientNetB3Unet(nn.Module):
    def __init__(self):
        super(EfficientNetB3Unet, self).__init__()
        self.unet_model = smp.Unet(encoder_name='efficientnet-b3')
        self.unet_model.segmentation_head[0] = nn.Conv2d(16,3,3,1,1)
    def forward(self, x):
        x = self.unet_model(x)
        return x


In [89]:
import piqa
x = torch.rand(5, 3, 256, 256, requires_grad=True).cuda()

y = torch.rand(5, 3, 256, 256, requires_grad=True).cuda()
ssim = piqa.SSIM().cuda()
l = 1 - ssim(x, y)
l

tensor(0.9935, device='cuda:0', grad_fn=<RsubBackward1>)