In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

In [2]:
transform = {
'train': transforms.Compose(
    [transforms.Resize(256),
     transforms.CenterCrop(224),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]),
'test' : transforms.Compose(
    [transforms.Resize(224),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
}

batch_size = 16

In [3]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform['train'])
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform['test'])
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
class ModelA(nn.Module):
    def __init__(self, in_feature):
        super().__init__()
        self.FC = nn.Sequential(
            nn.Linear(in_feature, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 10)
        )
    def forward(self, x):
        x = self.FC(x)
        return x

In [10]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

modelA = models.resnet18(pretrained=True)
num_feature = modelA.fc.in_features
modelA.fc = ModelA(num_feature)
# summary(modelA, (3, 224, 224), device='cpu')

modelB = models.resnet50(pretrained=True)
num_feature = modelB.fc.in_features
modelB.fc = ModelA(num_feature)
# summary(modelB, (3, 224, 224), device='cpu')

In [11]:
def train(model, criterion, optimizer, Epoch, Data, total=1500):
    criterion = criterion.to(device)
    optimizer = optimizer
    model.to(device)
    model.train()
    
    for epoch in range(Epoch):
        running_loss = 0.0
        running_corrects = 0.0
        T = 0
        for idx, data in enumerate(tqdm(Data)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item()
            running_corrects += torch.sum(preds == labels.data)
            T += labels.size(0)

            if idx % total == (total - 1):
                print('Epoch: [%d/%d, %d] loss: %.3f'%((epoch+1), Epoch, idx, (running_loss / T)))
                print('Training AC: %.3f %%'%(running_corrects / T * 100))
                running_loss = 0.0
                running_corrects = 0.0
                T = 0

In [12]:
criiterion = nn.CrossEntropyLoss()
lr = 3e-4
Epoch = 10
optimizer_A = optim.Adam(modelA.parameters(), lr=lr)
optimizer_B = optim.Adam(modelB.parameters(), lr=lr)

In [13]:
train(modelA, criterion=criiterion, optimizer=optimizer_A, Epoch=Epoch, Data=train_loader)

 48%|████▊     | 1504/3125 [00:48<00:50, 31.84it/s]

Epoch: [1/10, 1499] loss: 0.061
Training AC: 69.171 %


 96%|█████████▌| 3004/3125 [01:33<00:03, 34.16it/s]

Epoch: [1/10, 2999] loss: 0.041
Training AC: 79.246 %


100%|██████████| 3125/3125 [01:37<00:00, 32.03it/s]
 48%|████▊     | 1505/3125 [00:47<00:50, 32.08it/s]

Epoch: [2/10, 1499] loss: 0.032
Training AC: 83.912 %


 96%|█████████▌| 3005/3125 [01:32<00:03, 34.05it/s]

Epoch: [2/10, 2999] loss: 0.029
Training AC: 85.050 %


100%|██████████| 3125/3125 [01:36<00:00, 32.54it/s]
 48%|████▊     | 1505/3125 [00:46<00:48, 33.53it/s]

Epoch: [3/10, 1499] loss: 0.023
Training AC: 88.354 %


 96%|█████████▌| 3005/3125 [01:31<00:03, 34.30it/s]

Epoch: [3/10, 2999] loss: 0.022
Training AC: 88.750 %


100%|██████████| 3125/3125 [01:34<00:00, 32.95it/s]
 48%|████▊     | 1505/3125 [00:46<00:48, 33.32it/s]

Epoch: [4/10, 1499] loss: 0.017
Training AC: 91.254 %


 96%|█████████▌| 3005/3125 [01:31<00:03, 33.94it/s]

Epoch: [4/10, 2999] loss: 0.018
Training AC: 90.946 %


100%|██████████| 3125/3125 [01:34<00:00, 32.97it/s]
 48%|████▊     | 1504/3125 [00:46<00:47, 33.96it/s]

Epoch: [5/10, 1499] loss: 0.013
Training AC: 93.746 %


 96%|█████████▌| 3004/3125 [01:30<00:03, 34.01it/s]

Epoch: [5/10, 2999] loss: 0.014
Training AC: 92.800 %


100%|██████████| 3125/3125 [01:34<00:00, 33.04it/s]
 48%|████▊     | 1505/3125 [00:46<00:47, 34.14it/s]

Epoch: [6/10, 1499] loss: 0.010
Training AC: 94.938 %


 96%|█████████▌| 3005/3125 [01:30<00:03, 33.86it/s]

Epoch: [6/10, 2999] loss: 0.011
Training AC: 94.108 %


100%|██████████| 3125/3125 [01:34<00:00, 33.19it/s]
 48%|████▊     | 1505/3125 [00:46<00:47, 34.20it/s]

Epoch: [7/10, 1499] loss: 0.008
Training AC: 95.896 %


 96%|█████████▌| 3005/3125 [01:30<00:03, 34.09it/s]

Epoch: [7/10, 2999] loss: 0.009
Training AC: 95.404 %


100%|██████████| 3125/3125 [01:34<00:00, 33.17it/s]
 48%|████▊     | 1505/3125 [00:46<00:47, 33.93it/s]

Epoch: [8/10, 1499] loss: 0.007
Training AC: 96.617 %


 96%|█████████▌| 3005/3125 [01:30<00:03, 32.30it/s]

Epoch: [8/10, 2999] loss: 0.008
Training AC: 96.104 %


100%|██████████| 3125/3125 [01:34<00:00, 33.13it/s]
 48%|████▊     | 1505/3125 [00:46<00:48, 33.56it/s]

Epoch: [9/10, 1499] loss: 0.006
Training AC: 96.946 %


 96%|█████████▌| 3005/3125 [01:30<00:03, 33.03it/s]

Epoch: [9/10, 2999] loss: 0.006
Training AC: 96.717 %


100%|██████████| 3125/3125 [01:34<00:00, 33.05it/s]
 48%|████▊     | 1505/3125 [00:45<00:47, 34.04it/s]

Epoch: [10/10, 1499] loss: 0.005
Training AC: 97.463 %


 96%|█████████▌| 3005/3125 [01:29<00:03, 34.17it/s]

Epoch: [10/10, 2999] loss: 0.006
Training AC: 97.133 %


100%|██████████| 3125/3125 [01:33<00:00, 33.51it/s]


In [14]:
train(modelB, criterion=criiterion, optimizer=optimizer_B, Epoch=Epoch, Data=train_loader)

 48%|████▊     | 1501/3125 [01:59<02:06, 12.86it/s]

Epoch: [1/10, 1499] loss: 0.072
Training AC: 62.658 %


 96%|█████████▌| 3001/3125 [03:56<00:09, 12.71it/s]

Epoch: [1/10, 2999] loss: 0.051
Training AC: 73.567 %


100%|██████████| 3125/3125 [04:06<00:00, 12.69it/s]
 48%|████▊     | 1501/3125 [02:01<02:07, 12.76it/s]

Epoch: [2/10, 1499] loss: 0.041
Training AC: 79.113 %


 96%|█████████▌| 3001/3125 [04:00<00:09, 12.77it/s]

Epoch: [2/10, 2999] loss: 0.037
Training AC: 80.883 %


100%|██████████| 3125/3125 [04:11<00:00, 12.44it/s]
 48%|████▊     | 1501/3125 [02:00<02:11, 12.32it/s]

Epoch: [3/10, 1499] loss: 0.031
Training AC: 84.262 %


 96%|█████████▌| 3001/3125 [04:05<00:09, 12.52it/s]

Epoch: [3/10, 2999] loss: 0.029
Training AC: 84.917 %


100%|██████████| 3125/3125 [04:14<00:00, 12.26it/s]
 48%|████▊     | 1501/3125 [02:03<02:12, 12.22it/s]

Epoch: [4/10, 1499] loss: 0.025
Training AC: 87.383 %


 96%|█████████▌| 3001/3125 [04:02<00:09, 12.71it/s]

Epoch: [4/10, 2999] loss: 0.024
Training AC: 87.783 %


100%|██████████| 3125/3125 [04:12<00:00, 12.40it/s]
 48%|████▊     | 1501/3125 [02:00<02:07, 12.71it/s]

Epoch: [5/10, 1499] loss: 0.020
Training AC: 89.846 %


 96%|█████████▌| 3001/3125 [03:58<00:09, 12.78it/s]

Epoch: [5/10, 2999] loss: 0.021
Training AC: 89.246 %


100%|██████████| 3125/3125 [04:07<00:00, 12.61it/s]
 48%|████▊     | 1501/3125 [01:59<02:07, 12.72it/s]

Epoch: [6/10, 1499] loss: 0.016
Training AC: 91.804 %


 96%|█████████▌| 3001/3125 [03:57<00:09, 12.72it/s]

Epoch: [6/10, 2999] loss: 0.017
Training AC: 91.146 %


100%|██████████| 3125/3125 [04:07<00:00, 12.64it/s]
 48%|████▊     | 1501/3125 [01:59<02:08, 12.68it/s]

Epoch: [7/10, 1499] loss: 0.013
Training AC: 93.083 %


 96%|█████████▌| 3001/3125 [03:57<00:09, 12.76it/s]

Epoch: [7/10, 2999] loss: 0.014
Training AC: 92.608 %


100%|██████████| 3125/3125 [04:07<00:00, 12.64it/s]
 48%|████▊     | 1501/3125 [01:59<02:07, 12.75it/s]

Epoch: [8/10, 1499] loss: 0.011
Training AC: 94.592 %


 96%|█████████▌| 3001/3125 [03:57<00:09, 12.57it/s]

Epoch: [8/10, 2999] loss: 0.013
Training AC: 93.525 %


100%|██████████| 3125/3125 [04:07<00:00, 12.64it/s]
 48%|████▊     | 1501/3125 [01:59<02:07, 12.69it/s]

Epoch: [9/10, 1499] loss: 0.009
Training AC: 95.333 %


 96%|█████████▌| 3001/3125 [03:57<00:09, 12.75it/s]

Epoch: [9/10, 2999] loss: 0.011
Training AC: 94.392 %


100%|██████████| 3125/3125 [04:07<00:00, 12.64it/s]
 48%|████▊     | 1501/3125 [02:00<02:07, 12.78it/s]

Epoch: [10/10, 1499] loss: 0.008
Training AC: 95.704 %


 96%|█████████▌| 3001/3125 [03:57<00:09, 12.83it/s]

Epoch: [10/10, 2999] loss: 0.009
Training AC: 95.179 %


100%|██████████| 3125/3125 [04:07<00:00, 12.64it/s]


In [15]:
torch.save(modelA.state_dict(), 'resnet18.pth')
torch.save(modelB.state_dict(), 'resnet50.pth')

# Test ModelA and ModelB

In [125]:
test_A = models.resnet18(pretrained=True)
num_feature = test_A.fc.in_features
test_A.fc = ModelA(num_feature)
test_A.load_state_dict(torch.load('./resnet18.pth'))

test_B = models.resnet50(pretrained=True)
num_feature = test_B.fc.in_features
test_B.fc = ModelA(num_feature) 
test_B.load_state_dict(torch.load('./resnet50.pth'))

<All keys matched successfully>

In [115]:
def testing(model, Data):
    model.to(device)
    model.eval()
    testing_corrects = 0.0
    T = 0.0
    
    for idx, data in enumerate(tqdm(Data)):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)

        _, preds = torch.max(outputs, 1)
        testing_corrects += torch.sum(preds == labels.data)
        T += labels.size(0)
    print('Testing AC = %.3f %%'%(testing_corrects / T * 100))
    return

In [126]:
testing(test_A, test_loader)
testing(test_B, test_loader)

100%|██████████| 625/625 [00:06<00:00, 91.67it/s] 


Testing AC = 90.880 %


100%|██████████| 625/625 [00:15<00:00, 41.47it/s]

Testing AC = 89.120 %





# Remove last linear layer of each model

In [127]:
from torchsummary import summary
# summary(test_A, (3, 224, 224), device='cpu')
test_A.fc = nn.Linear(models.resnet18().fc.in_features, 512)

test_B.fc = nn.Linear(models.resnet50().fc.in_features, 512)

# Ensemble Model with two

In [128]:
class Ensemble(nn.Module):
    def __init__(self, modelA, modelB, in_features):
        super(Ensemble, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.fc = nn.Sequential(
            nn.Linear(in_features * 2, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 10)
        )
    def forward(self, x):
        output1 = self.modelA(x)
        output2 = self.modelB(x)
        output = torch.cat((output1, output2), 1)
        x = self.fc(output)
        return x

In [129]:
num_feature = test_A.fc.out_features
Ensemble_model = Ensemble(test_A, test_B, num_feature)

lr = 5e-4
Epoch = 10
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Ensemble_model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [130]:
# Freeze Resnet18 and Resnet50, training FC layer
ct = 0
for child in Ensemble_model.children():
    ct+=1
    if ct < 3:
        for param in child.parameters():
            param.requires_grad = False

# Train Ensemble Model

In [131]:
# Freeze backbone layer and train last linear layer
def Entrain(model, criterion, optimizer, scheduler, Epoch, Data, total=1500):
    criterion = criterion.to(device)
    optimizer = optimizer
    scheduler = scheduler
    model.to(device)
    model.train()
    
    for epoch in range(Epoch):
        running_loss = 0.0
        running_corrects = 0.0
        T = 0
        for idx, data in enumerate(tqdm(Data)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item()
            running_corrects += torch.sum(preds == labels.data)
            T += labels.size(0)

            if idx % total == (total - 1):
                print('Epoch: [%d/%d, %d] loss: %.3f'%((epoch+1), Epoch, idx, (running_loss / T)))
                print('Training AC: %.3f %%'%(running_corrects / T * 100))
                running_loss = 0.0
                running_corrects = 0.0
                T = 0
    scheduler.step()

In [132]:
Entrain(Ensemble_model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, Epoch=Epoch, Data=train_loader)

  4%|▍         | 122/3125 [00:06<01:40, 29.99it/s]

In [None]:
testing(Ensemble_model, test_loader)

100%|██████████| 625/625 [00:19<00:00, 32.40it/s]

Testing AC = 92.730 %





In [None]:
torch.save(Ensemble_model, './Ensemble.pth')