## Import

In [4]:
import torch
import torch.nn as nn

from torchvision import models
import torch.optim as opt
from torch.autograd import Variable

from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

## Set the Device

In [27]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataset & DataLoader

In [5]:
train_dataset = MNIST(root = './data', train=True, download=True, transform=transforms.ToTensor())
test_dataset= MNIST(root = './data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [7]:
# hyperparameter 
train_batch_size = 64
test_batch_szie = 1000

# train dataloader
train_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=train_batch_size, 
    shuffle=True)

# test dataloader
test_loader = DataLoader(
    dataset=test_dataset, 
    batch_size=test_batch_szie, 
    shuffle=False)

## Ensemble Model

### Example 1.

In [8]:
class MyEnsemble(nn.Module):
    def __init__(self, modelA, modelB, num_classes=10):
        super(MyEnsemble, self).__init__()
        
        self.modelA = modelA
        self.modelB = modelB

        # remove the last linear layer
        self.modelA.fc = nn.Identity()
        self.modelB.fc = nn.Identity()

        self.relu = nn.ReLU()

        # create new classifier
        self.classifier = nn.Linear(2048+512, num_classes)

    
    def forward(self, x):

        # clone to make sure x is not changed by inplace methods
        outputA = self.modelA(x.clone())
        ooutputA = outputA.view(outputA.size(), -1)

        outputB = self.modelB(x)
        outputB = outputB.view(outputB.size(0), -1)

        output = torch.cat((outputA, outputB), dim=1)
        output = self.relu(output)
        output = self.classifier(output)
        
        return output

In [10]:
resnet50 = models.resnet50(pretrained=True)
resnet18 = models.resnet18(pretrained=True)

In [11]:
# Freeze these models
for param in resnet50.parameters():
    param.requires_grad_(False)

for param in resnet18.parameters():
    param.requires_grad_(False)

In [13]:
# Create ensemble model
model = MyEnsemble(resnet50, resnet18)

# smaple images
x = torch.randn(1, 3, 224, 224)
output = model(x)

print('- output shape:')
output

- output shape:


tensor([[-0.2263, -0.1464,  0.1453,  0.1640, -0.1385,  0.1174,  0.3282, -0.1426,
         -0.0771, -0.4128]], grad_fn=<AddmmBackward>)

### Example 2. LSTM + CNN 

#### CNN Mdel:

In [14]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(stride=2, kernel_size=2)
        )

        
        self.dense = nn.Sequential(
            nn.Linear(in_features=14*14*128, out_features=1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )
        

    
    def forward(self, x):
        output = self.conv_layers(x)
        output = output.view(-1, 14*14*128)
        output = self.dense(output)

        return output    

In [35]:
cnn = CNN().to(device)
print(cnn)

for idx, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)
    print('\n- image(input) shape:')
    print(images.shape)
    print('\n- output shape:')
    output = cnn(images)

    # (batch_size, num_class)
    print(output.shape) 
    break

CNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)

- image(input) shape:
torch.Size([64, 1, 28, 28])

- output shape:
torch.Size([64, 10])


#### LSTM Model

In [31]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_class):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # batch_first batch 為第一個維度
        self.fc = nn.Linear(hidden_size, num_class)

    def forward(self, x):
        x = x.view(x.size(0), 1, -1)

        h0 = Variable(
            # layer num, batch size, hidden size
            torch.zeros(self.num_layers, x.size(0), self.hidden_size)
            ).to(device)
        c0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size)
            ).to(device)
        

        out, (h_out, c) = self.lstm(x, (h0, c0))

        # (b, 1, 10)
        out = self.fc(out)
        
        # from (b, 1, 10) to shape (b, 10)
        return torch.squeeze(out)

In [36]:
lstm = LSTM(
    input_size = 28*28,
    hidden_size = 784,
    num_layers = 1,
    num_class = 10
).to(device)

print(lstm)

for idx, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)
    print('\n- image shape:')
    print(images.shape)
    print('\n- output shape:')
    output = lstm(images)
    
    # (batch_size, num_class)
    print(output.shape)
    break

LSTM(
  (lstm): LSTM(784, 784, batch_first=True)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)

- image shape:
torch.Size([64, 1, 28, 28])

- output shape:
torch.Size([64, 10])


#### Ensemble (CNN + LSTM)

In [21]:
class LSTMCNNEnsemble(nn.Module):
    def __init__(self, cnn, lstm, num_class):
        super(LSTMCNNEnsemble, self).__init__()
        self.cnn = cnn
        self.lstm = lstm
        self.relu = nn.ReLU()
        self.fc = nn.Linear(20, 10)

    def forward(self, x):
        
        out_cnn = self.cnn(x.clone())
        out_lstm = self.lstm(x)
        out = torch.cat((out_cnn, out_lstm), dim=1)

        out = self.fc(out)
        return out

In [37]:
ensemble_model = LSTMCNNEnsemble(cnn, lstm, 10).to(device)
print(ensemble_model)

for idx, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)
    print('\n- image(input) shape:')
    print(images.shape)
    output = ensemble_model(images)
    print('\n- output shape:')
    print(output.shape)
    break

LSTMCNNEnsemble(
  (cnn): CNN(
    (conv_layers): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (dense): Sequential(
      (0): Linear(in_features=25088, out_features=1024, bias=True)
      (1): ReLU()
      (2): Linear(in_features=1024, out_features=10, bias=True)
    )
  )
  (lstm): LSTM(
    (lstm): LSTM(784, 784, batch_first=True)
    (fc): Linear(in_features=784, out_features=10, bias=True)
  )
  (relu): ReLU()
  (fc): Linear(in_features=20, out_features=10, bias=True)
)

- image(input) shape:
torch.Size([64, 1, 28, 28])

- output shape:
torch.Size([64, 10])


## Loss Function & Optimizer

In [39]:
# hypyerperameter
learning_rate = 0.0001

loss_func = nn.CrossEntropyLoss()
optimizer = opt.Adam(ensemble_model.parameters(), lr=learning_rate)

## Training Ensemble Model

In [40]:
# hyperparameter 
num_epochs = 5

ensemble_model.to(device)

for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = Variable(images.to(device))
        labels = Variable(labels.to(device))

        optimizer.zero_grad()
        outputs = ensemble_model(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        if (idx+1)%100 == 0:
            print("Epoch: %d, Batch: %d, Loss: %.4f" %(epoch+1, idx+1, loss.data))

Epoch: 1, Batch: 100, Loss: 0.6099
Epoch: 1, Batch: 200, Loss: 0.3531
Epoch: 1, Batch: 300, Loss: 0.3945
Epoch: 1, Batch: 400, Loss: 0.1301
Epoch: 1, Batch: 500, Loss: 0.3692
Epoch: 1, Batch: 600, Loss: 0.1536
Epoch: 1, Batch: 700, Loss: 0.1410
Epoch: 1, Batch: 800, Loss: 0.0392
Epoch: 1, Batch: 900, Loss: 0.1138
Epoch: 2, Batch: 100, Loss: 0.0337
Epoch: 2, Batch: 200, Loss: 0.1520
Epoch: 2, Batch: 300, Loss: 0.0563
Epoch: 2, Batch: 400, Loss: 0.1713
Epoch: 2, Batch: 500, Loss: 0.0532
Epoch: 2, Batch: 600, Loss: 0.0240
Epoch: 2, Batch: 700, Loss: 0.1004
Epoch: 2, Batch: 800, Loss: 0.0665
Epoch: 2, Batch: 900, Loss: 0.0518
Epoch: 3, Batch: 100, Loss: 0.0247
Epoch: 3, Batch: 200, Loss: 0.0818
Epoch: 3, Batch: 300, Loss: 0.0423
Epoch: 3, Batch: 400, Loss: 0.0074
Epoch: 3, Batch: 500, Loss: 0.1040
Epoch: 3, Batch: 600, Loss: 0.0303
Epoch: 3, Batch: 700, Loss: 0.0726
Epoch: 3, Batch: 800, Loss: 0.0949
Epoch: 3, Batch: 900, Loss: 0.0147
Epoch: 4, Batch: 100, Loss: 0.0413
Epoch: 4, Batch: 200