# Notebook Learning

## Variational Autoencoders

In [1]:
import torch 
import torch.nn as nn

class Reshape(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)


class Trim(nn.Module):
    def __init__(self, *args):
        super().__init__()

    def forward(self, x):
        return x[:, :, :28, :28]
    

class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, stride=1, kernel_size=3, padding=1)
        self.relu1 = nn.LeakyReLU(0.01)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, stride=2, kernel_size=3, padding=1)
        self.relu2 = nn.LeakyReLU(0.01)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, stride=2, kernel_size=3, padding=1)
        self.relu3 = nn.LeakyReLU(0.01)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, stride=1, kernel_size=3, padding=1)
        self.flatten = nn.Flatten()
        
    def forward(self, x):
        x = self.relu1(self.conv1(x))
        x = self.relu2(self.conv2(x))
        x = self.relu3(self.conv3(x))
        x = self.conv4(x)
        x = self.flatten(x)
        return x
    
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.linear = nn.Linear(in_features=2, out_features=3136)
        self.trans_conv1 = nn.ConvTranspose2d(in_channels=64, out_channels=64, stride=1, kernel_size=3, padding=1)
        self.relu1 = nn.LeakyReLU(0.01)
        self.trans_conv2 = nn.ConvTranspose2d(in_channels=64, out_channels=64, stride=2, kernel_size=3, padding=1)
        self.relu2 = nn.LeakyReLU(0.01)
        self.trans_conv3 = nn.ConvTranspose2d(in_channels=64, out_channels=32, stride=2, kernel_size=3, padding=0)                
        self.relu3 = nn.LeakyReLU(0.01)
        self.trans_conv4 = nn.ConvTranspose2d(in_channels=32, out_channels=1, stride=1, kernel_size=3, padding=0)        
        self.trim = Trim()

        
    def forward(self, x):
        x = self.linear(x)
        x = x.view(-1, 64, 7, 7)
        x = self.relu1(self.trans_conv1(x))
        x = self.relu2(self.trans_conv2(x))
        x = self.relu3(self.trans_conv3(x))
        x = self.trans_conv4(x)
        x = self.trim(x)
        return x

class VAE(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.encoder = Encoder()
        self.decoder = Decoder()
        self.z_mean = torch.nn.Linear(3136, 2)
        self.z_log_var = torch.nn.Linear(3136, 2)        
        self.out = nn.Sigmoid()

    def encoding_fn(self, x):
        x = self.encoder(x)
        z_mean, z_log_var = self.z_mean(x), self.z_log_var(x)
        encoded = self.reparameterize(z_mean, z_log_var)
        return encoded
        
    def reparameterize(self, z_mu, z_log_var):
        eps = torch.randn(z_mu.size(0), z_mu.size(1)).cuda()
        z = z_mu + eps * torch.exp(z_log_var/2.)
        return z
        
    def forward(self, x):
        x = self.encoder(x)
        z_mean, z_log_var = self.z_mean(x), self.z_log_var(x)
        encoded = self.reparameterize(z_mean, z_log_var)
        decoded = self.decoder(encoded)
        decoded = self.out(decoded)
        return encoded, z_mean, z_log_var, decoded
    

x = torch.randn(1, 1, 28, 28).cuda()
model = VAE().cuda()
encoded, z_mean, z_log_var, decoded = model(x)

## My Modules

### Effective Receptive Field with Convolution

In [None]:
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from modules.convolution import Conv2d

common_weight = None

def effective_receptive_field_torch(x: np.ndarray) -> None:
    x = torch.tensor(x).to(torch.float32).requires_grad_(True)
    conv = nn.Conv2d(in_channels=3, out_channels=4,
                     kernel_size=3, stride=1,
                     padding=1, bias=False)
    global common_weight
    conv.weight = torch.nn.Parameter(torch.tensor(common_weight))
    z = conv(x)
      
    loss_vec = z[:, :, z.size(-2)//2, z.size(-1)//2]
    loss = torch.sum(loss_vec)
    loss.backward()
    
    grad_x =  x.grad[0, 0].detach().numpy()
    grad_weight = conv.weight.grad[0, 0].detach().numpy()
    print(grad_x.shape)
    print(grad_x[112])
    print(grad_weight) 
    

def effective_receptive_field_np(x: np.array) -> None:
    conv = Conv2d(in_channels=3, out_channels=4, 
                  padding=1, stride=1, kernel_size=3)
    global common_weight
    common_weight = conv.weight
    z = conv.forward(x)
    
    grad_z = np.zeros_like(z)
    grad_z[:, :, z.shape[-2]//2, z.shape[-1]//2] = 1
    
    grad_x, grad_weight = conv.backpropagation(np.float32(grad_z))
    print(grad_x.shape)
    print(grad_x[0, 0, 112])
    print(grad_weight[0, 0]) 
    
if __name__ == "__main__":
    
    image = np.array(Image.open("samples/cock.jpg").resize((224, 224)))
    image2 = np.array(Image.open("samples/cats.jpg").resize((224, 224)))
    
    image = np.transpose(image, (2, 0, 1))
    image2 = np.transpose(image2, (2, 0, 1))
    batch = np.stack([image, image2], axis=0)

    effective_receptive_field_np(batch)
    effective_receptive_field_torch(batch)

### Torch Classifier for Benchmark

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])  

train_dataset = datasets.ImageFolder(root="dataset/train", transform=transform)
test_dataset = datasets.ImageFolder(root="dataset/test", transform=transform)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=24,
    shuffle=True,
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=24,
    shuffle=False,
)


class Model(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=64)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(num_features=128)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(128, num_classes)
        
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.avg(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

model = Model().cuda()
loss_func = nn.CrossEntropyLoss()
optimizer = optim.AdamW(params=model.parameters(), lr=0.001)
epochs = 60

for epoch in range(epochs):
    total_loss = 0
    step = len(train_loader)
    min_loss = 100
    for i, (input, target) in tqdm(enumerate(train_loader)):
        optimizer.zero_grad()
        input = input.cuda()
        target = target.cuda()
        logits = model(input)
        loss = loss_func(logits, target)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    cur_loss = total_loss/step
    if cur_loss < min_loss:
        min_loss = cur_loss
        torch.save(model.state_dict(), ".cache/models/model.pt")
    
    print(f"EPOCH: {epoch+1} ===> Loss: {cur_loss}") # min loss: 0.22870734333992004