In [1]:
from google.colab import drive
import os
drive.mount('/content/gdrive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
!unzip gdrive/MyDrive/images.zip

In [1]:
!pip install wandb

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import time
import tqdm
import random
import torch.nn.functional as F
import wandb

from torch.nn import init
from torch.nn.modules.utils import _pair
import math

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class AttentionLayer(nn.Module):
    def __init__(self, c_dim, hidden_dim, nof_kernels):
        super().__init__()
        self.global_pooling = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        self.to_scores = nn.Sequential(nn.Linear(c_dim, hidden_dim),
                                       nn.ReLU(inplace=True),
                                       nn.Linear(hidden_dim, nof_kernels)
                                      )

    def forward(self, c, x, temperature=1, pr=False):
        # out = self.global_pooling(x)
        # print("c", c)
        scores = self.to_scores(c)
        # print("score", scores)
        return F.softmax(scores / temperature, dim=-1)

In [3]:
class DynamicConv2d(nn.Module):
    def __init__(self, nof_kernels, reduce, in_channels, out_channels, kernel_size,
                 stride=1, padding=0, dilation=1, groups=1, bias=True):
        """
        Implementation of Dynamic convolution layer
        :param in_channels: number of input channels.
        :param out_channels: number of output channels.
        :param kernel_size: size of the kernel.
        :param groups: controls the connections between inputs and outputs.
        in_channels and out_channels must both be divisible by groups.
        :param nof_kernels: number of kernels to use.
        :param reduce: Refers to the size of the hidden layer in attention: hidden = in_channels // reduce
        :param bias: If True, convolutions also have a learnable bias
        """
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.groups = groups
        self.conv_args = {'stride': stride, 'padding': padding, 'dilation': dilation}
        self.nof_kernels = nof_kernels
        self.attention = AttentionLayer(in_channels, max(1, in_channels // reduce), nof_kernels)
        self.kernel_size = _pair(kernel_size)
        # kernels_weights: (nof_kernels, out_channels, in_channels // groups, kernel_size, kernel_size)
        # why groups? because we want to have the same number of kernels for each group. what is the group? it is the   # number of kernels that are applied to each input channel. So, if we have 2 groups, we will have 2 kernels for each input channel.
        self.kernels_weights = nn.Parameter(torch.Tensor(
            nof_kernels, out_channels, in_channels // self.groups, *self.kernel_size), requires_grad=True)
        if bias:
            self.kernels_bias = nn.Parameter(torch.Tensor(nof_kernels, out_channels), requires_grad=True)
        else:
            self.register_parameter('kernels_bias', None)
        self.initialize_parameters()

    def initialize_parameters(self):
        for i_kernel in range(self.nof_kernels):
            init.kaiming_uniform_(self.kernels_weights[i_kernel], a=math.sqrt(5))
        if self.kernels_bias is not None:
            bound = 1 / math.sqrt(self.kernels_weights[0, 0].numel())
            nn.init.uniform_(self.kernels_bias, -bound, bound)

    def forward(self, x, temperature=1):
        # x: (batch_size , in_channels , H , W)
        batch_size = x.shape[0]
        # alphas: (batch_size , nof_kernels)
        alphas = self.attention(x, temperature)
        # agg_weights: (batch_size , out_channels , in_channels // groups, kernel_size, kernel_size)
        # sum over the kernels with the attention weights
        agg_weights = torch.sum(
            torch.mul(self.kernels_weights.unsqueeze(0), alphas.view(batch_size, -1, 1, 1, 1, 1)), dim=1)
        # Group the weights for each batch to conv2 all at once

        # agg_weights: (batch_size * out_channels , in_channels // groups, kernel_size, kernel_size)
        agg_weights = agg_weights.view(-1, *agg_weights.shape[-3:])  # filters of shape ( out_channels , in_channels groups , 𝑘 𝐻 , 𝑘 𝑊 ) (out_channels, groups in_channels ​ ,kH,kW) \


        if self.kernels_bias is not None:
            agg_bias = torch.sum(torch.mul(self.kernels_bias.unsqueeze(0), alphas.view(batch_size, -1, 1)), dim=1)
            agg_bias = agg_bias.view(-1)
        else:
            agg_bias = None

        # why view(1, -1, *x.shape[-2:])? because we want to group the input channels. So, if we have 2 groups, we will have 2 kernels for each input channel.
        x_grouped = x.view(1, -1, *x.shape[-2:])  # (1 , batch_size*out_c , H , W)
        #   out: (1 , batch_size*out_C , H' , W')
        out = F.conv2d(x_grouped, agg_weights, agg_bias, groups=self.groups * batch_size,
                       **self.conv_args)
        # out: (batch_size , out_channels , H' , W')
        out = out.view(batch_size, -1, *out.shape[-2:])

        return out

In [4]:
class DynamicCNN(nn.Module):
    def __init__(self, num_classes=50):
        super(DynamicCNN, self).__init__()

        self.dycnn_1 = DynamicConv2d(nof_kernels=4, reduce=4, in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
        self.dycnn_2 = DynamicConv2d(nof_kernels=4, reduce=4, in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
        self.dycnn_3 = DynamicConv2d(nof_kernels=4, reduce=4, in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
        self.dycnn_4 = DynamicConv2d(nof_kernels=4, reduce=4, in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)

        
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, c, x):
        out = F.avg_pool2d(F.relu(self.dycnn_1(c, x)), 2, 2, 0)# [32, 64, 64]
        out = F.avg_pool2d(F.relu(self.dycnn_2(c, out)), 2, 2, 0)# [64, 32, 32]
        out = F.avg_pool2d(F.relu(self.dycnn_3(c, out)), 2, 2, 0)# [128, 16, 16]
        out = F.avg_pool2d(F.relu(self.dycnn_4(c, out)), 2, 2, 0)# [256, 8, 8]
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [5]:
class DynamicConv2d_2(nn.Module):
    def __init__(self, nof_kernels, reduce, in_channels, out_channels, kernel_size,
                 stride=1, padding=0, dilation=1, groups=1, bias=True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.groups = groups
        self.conv_args = {'stride': stride, 'padding': padding, 'dilation': dilation}
        self.nof_kernels = nof_kernels
        self.kernel_size = _pair(kernel_size)
        
        self.kernels_weights = nn.Parameter(torch.Tensor(
            nof_kernels, out_channels, in_channels // self.groups, *self.kernel_size), requires_grad=True)
        if bias:
            self.kernels_bias = nn.Parameter(torch.Tensor(nof_kernels, out_channels), requires_grad=True)
        else:
            self.register_parameter('kernels_bias', None)

        self.attention = AttentionLayer(3, max(8, in_channels // reduce), nof_kernels)
        self.initialize_parameters()

    def initialize_parameters(self):
        for i_kernel in range(self.nof_kernels):
            init.kaiming_uniform_(self.kernels_weights[i_kernel], a=math.sqrt(5))
        if self.kernels_bias is not None:
            bound = 1 / math.sqrt(self.kernels_weights[0, 0].numel())
            nn.init.uniform_(self.kernels_bias, -bound, bound)

    def forward(self, c, x, temperature=1, pr=False):
        # x: (batch_size , in_channels , H , W)
        batch_size = x.shape[0]
        # alphas: (batch_size , nof_kernels)
        alphas = self.attention(c, x, temperature)
        # alphas = F.softmax(self.to_scores(c)/ temperature, dim=-1)
        if pr:
            print("c", c)
            print("alphas：", alphas)
            print("kernels_weights", self.kernels_weights)
        
        agg_weights = torch.sum(
            torch.mul(self.kernels_weights.unsqueeze(0), alphas.view(batch_size, -1, 1, 1, 1, 1)), dim=1)

        # agg_weights: (batch_size * out_channels , in_channels // groups, kernel_size, kernel_size)
        agg_weights = agg_weights.view(-1, *agg_weights.shape[-3:])  # filters of shape ( out_channels , in_channels groups , 𝑘 𝐻 , 𝑘 𝑊 ) (out_channels, groups in_channels ​ ,kH,kW) \


        if self.kernels_bias is not None:
            agg_bias = torch.sum(torch.mul(self.kernels_bias.unsqueeze(0), alphas.view(batch_size, -1, 1)), dim=1)
            agg_bias = agg_bias.view(-1)
        else:
            agg_bias = None

        x_grouped = x.view(1, -1, *x.shape[-2:])  # (1 , batch_size*out_c , H , W)
        #   out: (1 , batch_size*out_C , H' , W')
        out = F.conv2d(x_grouped, agg_weights, agg_bias, groups=self.groups * batch_size,
                        **self.conv_args)
        # out: (batch_size , out_channels , H' , W')
        out = out.view(batch_size, -1, *out.shape[-2:])

        return out

In [6]:
class DynamicCNN_2(nn.Module):
    def __init__(self, num_classes=50):
        super(DynamicCNN_2, self).__init__()
        
        self.global_pooling = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        
        self.dycnn = nn.ModuleList([DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                    DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                      DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                      DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
                     ])
        self.norm =  nn.ModuleList([nn.BatchNorm2d((2**i) * 32) for i in range(4)])
        self.act = nn.ModuleList([nn.ReLU() for i in range(4)])
        self.pool = nn.ModuleList([nn.MaxPool2d(2, 2, 0) for i in range(4)])

            
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, c, x, temperature=1, pr=False):
        # channel identifier
        # x_pool = self.global_pooling(x)
        # channel_tensor = torch.cat((x_pool, c), dim=1)
        channel_tensor = c.float()
        out = x
        for i in range(4):
            out = self.dycnn[i](channel_tensor, out, temperature=temperature, pr = pr).to(device)
            out = self.norm[i](out)
            out = self.act[i](out)
            out = self.pool[i](out)
            
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [7]:
class DynamicCNN_3(nn.Module):
    def __init__(self, num_classes=50):
        super(DynamicCNN_3, self).__init__()
        
        self.global_pooling = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        
        self.dycnn = nn.ModuleList([DynamicConv2d(nof_kernels=4, reduce=4, in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                    DynamicConv2d(nof_kernels=4, reduce=4, in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                      DynamicConv2d(nof_kernels=4, reduce=4, in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                      DynamicConv2d(nof_kernels=4, reduce=4, in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
                     ])
        self.norm =  nn.ModuleList([nn.BatchNorm2d((2**i) * 32) for i in range(4)])
        self.act = nn.ModuleList([nn.ReLU() for i in range(4)])
        self.pool = nn.ModuleList([nn.MaxPool2d(2, 2, 0) for i in range(4)])

            
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, c, x):
        # channel identifier
        out = x
        for i in range(4):
            out = self.dycnn[i](out, temperature=30).to(device)
            out = self.norm[i](out)
            out = self.act[i](out)
            out = self.pool[i](out)
            
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [8]:
class SimpleDynamicCNN(nn.Module):
    def __init__(self, num_classes=50):
        super(SimpleDynamicCNN, self).__init__()
        
        self.dycnn = DynamicConv2d(nof_kernels=4, reduce=4, in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
        self.cnn = nn.Sequential(
            # [32, 128, 128]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 16, 16]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [256, 8, 8]
        )
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, c, x):
        out = self.dycnn(c, x)
        out = self.cnn(out)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [9]:
class SimpleDynamicCNN_2(nn.Module):
    def __init__(self, num_classes=50):
        super(SimpleDynamicCNN_2, self).__init__()
        self.dycnn = DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=3, out_channels=3, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
        self.cnn = nn.Sequential(
            nn.BatchNorm2d(3),
            nn.ReLU(),
            # [32, 128, 128]
            nn.Conv2d(3, 32, 3, 1, 1), 
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 16, 16]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [256, 8, 8]
        )
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, c, x, temperature=1, pr=False):
        # x_pool = self.global_pooling(x)
        # channel_tensor = torch.cat((x_pool, c), dim=1)
        channel_tensor = c.float()
        out = self.dycnn(channel_tensor, x, temperature=temperature, pr=pr)
        out = self.cnn(out)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [10]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=50):
        super(SimpleCNN, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1), # [32, 128, 128]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 16, 16]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [256, 8, 8]
        )
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, c, x, temperature=1, pr=False):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [11]:
class SimpleCNN_4(nn.Module):
    def __init__(self, num_classes=50):
        super(SimpleCNN_4, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1), # [32, 128, 128]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [32, 64, 64]

            nn.Conv2d(32, 64, 3, 1, 1), # [64, 64, 64]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 32, 32]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 32, 32]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 16, 16]
        )
        self.fc = nn.Sequential(
            nn.Linear(128*16*16, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, c, x, temperature=1, pr=False):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [12]:
class DynamicCNN_4(nn.Module):
    def __init__(self, num_classes=50):
        super(DynamicCNN_4, self).__init__()
        
        self.global_pooling = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        
        self.dycnn = nn.ModuleList([DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                    DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                      DynamicConv2d_2(nof_kernels=4, reduce=4, in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True),
                                   ])
        self.norm =  nn.ModuleList([nn.BatchNorm2d((2**i) * 32) for i in range(3)])
        self.act = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2, 0)

            
        self.fc = nn.Sequential(
            nn.Linear(128*16*16, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes)
        )

    def forward(self, c, x, temperature=1, pr=False):
        # channel identifier
        # x_pool = self.global_pooling(x)
        # channel_tensor = torch.cat((x_pool, c), dim=1)
        channel_tensor = c.float()
        out = x
        for i in range(4):
            out = self.dycnn[i](channel_tensor, out, temperature=temperature, pr = pr).to(device)
            out = self.norm[i](out)
            out = self.act(out)
            out = self.pool(out)
            
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [13]:
from PIL import Image

class ImgDataset(Dataset):
    def __init__(self, channel, x, y=None, transform=None):
        self.channel = channel
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        C = self.channel[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return C, X, Y
        else:
            return C, X



In [14]:
def random_channel(images, seed):
    # 设置种子
    random.seed(seed)

    new_images = []
    channel_dict = {0:'BGR', 1:'GR', 2:'BG', 3:'R', 4:'G', 5:'B'}
    numbers = list(channel_dict.keys())
    channels = []
    for image in images:
        channel_idx = random.choice(numbers)
        # 修改通道
        if channel_dict[channel_idx] == 'BGR':
            img = image[:, :, :]
        elif channel_dict[channel_idx] == 'GR':
            img = image[:, :, 1:]
        elif channel_dict[channel_idx] == 'BG':
            img = image[:, :, :2]
        elif channel_dict[channel_idx] == 'R':
            img = image[:, :, 2:3]
        elif channel_dict[channel_idx] == 'G':
            img = image[:, :, 1:2]
        elif channel_dict[channel_idx] == 'B':
            img = image[:, :, 0:1]
        new_images.append(img)
        channels.append(channel_dict[channel_idx])
    return channels, new_images

In [15]:
def load_img(f):
    shapes = []
    f=open(f)
    lines=f.readlines()
    imgs, lab=[], []
    for i in range(len(lines)):
        fn, label = lines[i].split(' ')
        im1=cv2.imread(fn)

        if im1.shape[2] not in shapes:
            shapes.append(im1.shape[2])
        # im1=cv2.resize(im1, (img_size,img_size))
        # im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)

        # im1 = preprocessing(im1, op_list)
        # vec = np.reshape(im1, [-1])

        imgs.append(im1)
        lab.append(int(label))
    print(i)

    # imgs= np.asarray(imgs, np.uint8)
    lab= np.asarray(lab, np.uint8)
    # print(shapes)
    return imgs, lab


In [16]:
def resize_input(channels, images, img_size):
    channel_map = {"BGR": torch.tensor([1,1,1]), "BG":torch.tensor([1,1,0]), "GR":torch.tensor([0,1,1]), "B":torch.tensor([1,0,0]), "G":torch.tensor([0,1,0]), "R":torch.tensor([0,0,1])}
    x = np.zeros((len(images), img_size, img_size, 3), dtype=np.uint8)
    c = np.zeros((len(images), 3),  dtype=np.uint8)
    for i, img in enumerate(images):
        img = cv2.resize(img, (img_size, img_size))

        if channels[i] == "B" or channels[i] == "G" or channels[i] == "R": # 1維
            img_3channel = np.stack((img,) * 3, axis=-1)
        elif channels[i] == "BGR":
            img_3channel = img
        elif channels[i] == "BG":
            # 生成第三個通道，可以根據具體需求來決定其值
            third_channel = np.mean(img, axis=2)  # 這裡用前兩個通道的平均值作為示例
            # 合併成三通道圖像
            img_3channel = np.zeros((img_size, img_size, 3), dtype=img.dtype)
            img_3channel[:, :, :2] = img  # 前兩個通道保持不變
            img_3channel[:, :, 2] = third_channel  # 第三個通道為生成的數據
        elif channels[i] == "GR":
            # 生成第三個通道，可以根據具體需求來決定其值
            third_channel = np.mean(img, axis=2)  # 這裡用前兩個通道的平均值作為示例
            # 合併成三通道圖像
            img_3channel = np.zeros((img_size, img_size, 3), dtype=img.dtype)
            img_3channel[:, :, 1:] = img  # 後兩個通道保持不變
            img_3channel[:, :, 0] = third_channel  # 第一個通道為生成的數據
        else:
            print("error")
        # 變c, w, h
        # img_3channel = np.transpose(img_3channel, (2, 0, 1))
        x[i, :, :, :] = img_3channel
        c[i, :] = channel_map[channels[i]]
    return c, x

In [17]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msunny2021137[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [18]:

    #############
    eval_time = 1
    num_epoch = 30
    num_classes = 50
    img_size = 144
    batch_size = 128
    lr = 0.001
    #############




In [24]:
    x, y = load_img('train.txt')
    vx, vy = load_img('val.txt')
    tx, ty = load_img('test.txt')

63324
449
449


In [29]:
    print("--1--")
    c, x_new = random_channel(x, seed=42)
    vc, vx_new = random_channel(vx, seed=42)
    tc, tx_new = random_channel(tx, seed=42)




--1--


In [None]:
tc

In [31]:
del x, vx, tx

In [32]:
    print("--2--")
    c, x_resize = resize_input(c, x_new, img_size=img_size)
    vc, vx_resize = resize_input(vc, vx_new, img_size=img_size)
    tc, tx_resize = resize_input(tc, tx_new, img_size=img_size)

--2--


In [33]:
print(tc)

[[1 0 0]
 [1 1 1]
 [1 1 1]
 ...
 [1 0 0]
 [1 1 0]
 [0 1 1]]


In [34]:
    print("--3--")
    # training 時做 data augmentation
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((144, 144)),  # 縮放
        transforms.RandomRotation(degrees=30),  # 旋轉
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # 平移
        transforms.RandomCrop(128),  # 隨機裁剪
        transforms.RandomHorizontalFlip(),  # 水平翻轉
        # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 調整顏色
        transforms.ToTensor(),  # 轉換為Tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 標準化

    ])
    # testing 時不需做 data augmentation
    test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((144, 144)),  # 縮放
        transforms.CenterCrop(128),  # 中心裁剪
        transforms.ToTensor(),  # 轉換為Tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 標準化
    ])

--3--


In [35]:
del x_new, vx_new, tx_new

In [36]:
    train_set = ImgDataset(c, x_resize, y, train_transform)
    val_set = ImgDataset(vc, vx_resize, vy, test_transform)
    test_set = ImgDataset(tc, tx_resize, ty, test_transform)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)



In [37]:
    run = wandb.init(
    # Set the project where this run will be logged
    project="task1_4",
    # Track hyperparameters and run metadata
    config={
        "model": "simple_4",
        "learning_rate": lr,
        "epochs": num_epoch,
        "img_size": img_size,
    },)
    wandb.define_metric("Train/epoch")
    wandb.define_metric("Train/*", step_metric="Train/epoch")
    wandb.define_metric("Val/epoch")
    wandb.define_metric("Val/*", step_metric="Val/epoch")

<wandb.sdk.wandb_metric.Metric at 0x7f4c455d2280>

In [None]:

    # print("--4--")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    model = SimpleCNN_4(num_classes=num_classes).to(device)

    loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) # optimizer 使用 Adam

    temperature = 1
    for epoch in range(num_epoch):
        print(epoch)
        
        epoch_start_time = time.time()
        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0

        model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
        for i, data in enumerate(train_loader):
            # print(data[0].shape, data[1].shape)
            optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
            temperature = 1
            # if epoch < 5:
            #     temperature = 30 - 6 * epoch
            
            
            train_pred = model(data[0].to(device), data[1].to(device), temperature=temperature) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
            batch_loss = loss(train_pred, data[2].to(device)) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
            batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
            optimizer.step() # 以 optimizer 用 gradient 更新參數值

            train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[2].numpy())
            train_loss += batch_loss.item()

        if True:
            print(temperature)
            model.eval()
            with torch.no_grad():
                for i, data in enumerate(val_loader):
                    pr1 = False
                    if i == 0:
                        pr1 = True
                    val_pred = model(data[0].to(device), data[1].to(device), temperature=temperature, pr =pr1)
                    batch_loss = loss(val_pred, data[2].to(device))

                    val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[2].numpy())
                    val_loss += batch_loss.item()

                #將結果 print 出來
                # print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
                #     (epoch + 1, num_epoch, time.time()-epoch_start_time, \
                #      train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

                wandb.log({"Train/epoch": epoch,
                            "Train/acc": train_acc/train_set.__len__(),
                           "Train/loss": train_loss/train_set.__len__(),
                           "Val/epoch": epoch,
                           "Val/acc": val_acc/val_set.__len__(),
                           "Val/loss": val_loss/val_set.__len__(),
                          })

    print("--5--")
    model.eval() 
    test_acc = 0.0
    test_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            test_pred = model(data[0].to(device), data[1].to(device), temperature=temperature)
            batch_loss = loss(test_pred, data[2].to(device))
            test_acc += np.sum(np.argmax(test_pred.cpu().data.numpy(), axis=1) == data[2].numpy())
            test_loss += batch_loss.item()
        wandb.log({"Test/test acc": test_acc/test_set.__len__(),
                  "Test/loss": test_loss/test_set.__len__(),})
    run.finish()

0
1
1
1
2
1
3
1
4
1
5
1
6


In [None]:
import gc
del model
gc.collect()