# Pytorch

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchtools import EarlyStopping

In [2]:
train_kwargs = {'batch_size': 32}
test_kwargs = {'batch_size': 16}
if torch.cuda.is_available():
    device = 'cuda'
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)
else:
    device = 'cpu'

In [3]:
dataset1 = datasets.CIFAR100('datasets/data', train=True, download=True,
                   transform=transforms.ToTensor())
dataset2 = datasets.CIFAR100('datasets/data', train=False,
                   transform=transforms.ToTensor())
train_data = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_data = torch.utils.data.DataLoader(dataset2, **test_kwargs)

Files already downloaded and verified


In [50]:
# Our model class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 256, 3, padding='same')
        self.bn1 = nn.BatchNorm2d(256)
        self.conv2 = nn.Conv2d(256, 256, 3, padding='same')
        self.bn2 = nn.BatchNorm2d(256)
        self.dropout1 = nn.Dropout(0.2)

        self.conv3 = nn.Conv2d(256, 512, 3, padding='same')
        self.bn3 = nn.BatchNorm2d(512)
        self.conv4 = nn.Conv2d(512, 512, 3, padding='same')
        self.bn4 = nn.BatchNorm2d(512)
        self.dropout2 = nn.Dropout(0.2)

        self.conv5 = nn.Conv2d(512, 512, 3, padding='same')
        self.bn5 = nn.BatchNorm2d(512)
        self.conv6 = nn.Conv2d(512, 512, 3, padding='same')
        self.bn6 = nn.BatchNorm2d(512)
        self.dropout3 = nn.Dropout(0.2)

        self.conv7 = nn.Conv2d(512, 512, 3, padding='same')
        self.bn7 = nn.BatchNorm2d(512)
        self.conv8 = nn.Conv2d(512, 512, 3, padding='same')
        self.bn8 = nn.BatchNorm2d(512)
        self.dropout4 = nn.Dropout(0.2)
        
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(2048, 1024)
        self.dropout5 = nn.Dropout(0.2)
        self.bn9 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 100)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)

        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = F.max_pool2d(x, 2)
        x = self.dropout2(x)

        x = self.conv5(x)
        x = F.relu(self.bn5(x))
        x = self.conv6(x)
        x = F.relu(self.bn6(x))
        x = F.max_pool2d(x, 2)
        x = self.dropout3(x)

        x = self.conv7(x)
        x = F.relu(self.bn7(x))
        x = self.conv8(x)
        x = F.relu(self.bn8(x))
        x = F.max_pool2d(x, 2)
        x = self.dropout4(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout5(x)
        x = self.bn9(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

# train function. get from pytorch github
def train(train_data, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_data):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
            epoch, batch_idx * 32, len(train_data.dataset),
            loss.item()), end='\r')

# test function. get from pytorch github
def test(test_data):
    global test_loss
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_data:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_data.dataset)

    print('\nTest set: Average loss: {:.4f}, Correct: {}/{}, Accuracy: {:.0f}%\n'.format(
        test_loss, correct, len(test_data.dataset),
        100. * correct / len(test_data.dataset)))

# early stopping function
def early_stopping(val_loss, args):
    score = -val_loss
    if args['best_score'] is None:
        args['best_score'] = score
    elif args['best_score'] + args['delta'] <= score <= args['best_score'] + args['delta']:
        args['counter'] += 1
        if args['counter'] >= args['patience']:
            args['early_stop'] = True
    else:
        args['best_score'] = score
        args['counter'] = 0

def save_checkpoint(save_path, args):
    # save model to checkpoint
    torch.save({
        'model': model.state_dict(), 
        'optimizer': optimizer.state_dict(),
        'best_score': args['best_score'],
        'delta': args['delta']
            }, save_path)
    print(f"info: trained model saved {save_path}")

### Training initial model

In [52]:
# create model
model = Net()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# train model
epochs = 10

# early stop parameters
early = {
'best_score': None,   # best low loss value
'patience': 5,        # maximum number of similarities
'delta': 0.001,       # minimum difference between losses
'counter': 0,         # early stop counter
'early_stop': False   # should early stopping process
}
save_path = 'models/cifar100.pt' # model save path

global test_loss
for epoch in range(1, epochs + 1):
    test_loss = 0
    train(train_data, epoch)
    test(test_data)
    early_stopping(val_loss, early)
    if early['early_stop']:
        print("Early Stopping: the model loss remained almost unchanged. Stop training process and saving model...")
        save_checkpoint(save_path, early)
        break
if not early['early_stop']:
    save_checkpoint(save_path, early)

Train Epoch: 1 [49984/50000]	Loss: 3.592701
Test set: Average loss: 3.5153, Correct: 1640/10000, Accuracy: 16%

Train Epoch: 2 [49984/50000]	Loss: 3.486880
Test set: Average loss: 3.0785, Correct: 2452/10000, Accuracy: 25%

Train Epoch: 3 [49984/50000]	Loss: 2.210239
Test set: Average loss: 2.7014, Correct: 3089/10000, Accuracy: 31%

Train Epoch: 4 [49984/50000]	Loss: 3.180290
Test set: Average loss: 2.7197, Correct: 2989/10000, Accuracy: 30%

Train Epoch: 5 [49984/50000]	Loss: 2.283243
Test set: Average loss: 2.2998, Correct: 3987/10000, Accuracy: 40%

Train Epoch: 6 [49984/50000]	Loss: 2.812876
Test set: Average loss: 2.0376, Correct: 4550/10000, Accuracy: 46%

Train Epoch: 7 [49984/50000]	Loss: 1.685385
Test set: Average loss: 1.9598, Correct: 4671/10000, Accuracy: 47%

Train Epoch: 8 [49984/50000]	Loss: 1.773578
Test set: Average loss: 1.9078, Correct: 4896/10000, Accuracy: 49%

Train Epoch: 9 [49984/50000]	Loss: 2.942346
Test set: Average loss: 1.7703, Correct: 5167/10000, Accurac

### Training checkpoint model

In [55]:
# load model from checkpoint
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.001)

checkpoint = torch.load('models/cifar100.pt')
model.load_state_dict(checkpoint['model'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer'])

early = {
    'best_score': checkpoint['best_score'],
    'patience': 5,
    'delta': checkpoint['delta'],
    'counter': 0,
    'early_stop': False
}
save_path = 'models/new_cifar100.pt'

# again train model epoch
epochs = 5

for epoch in range(1, epochs + 1):
    val_loss = 0
    train(train_data, epoch)
    test(test_data)
    early_stopping(test_loss, early)
    if early['early_stop']:
        print("Early Stopping: the model loss remained almost unchanged. Stop training process and saving model...")
        save_checkpoint(save_path, early)
        break
if not early['early_stop']:
    save_checkpoint(save_path, early)

Train Epoch: 1 [49984/50000]	Loss: 2.571144
Test set: Average loss: 2.4259, Correct: 3899/10000, Accuracy: 39%

Train Epoch: 2 [49984/50000]	Loss: 1.016148
Test set: Average loss: 1.6442, Correct: 5611/10000, Accuracy: 56%

Train Epoch: 3 [49984/50000]	Loss: 1.354643
Test set: Average loss: 1.8350, Correct: 5479/10000, Accuracy: 55%

Train Epoch: 4 [49984/50000]	Loss: 1.263191
Test set: Average loss: 1.5290, Correct: 5805/10000, Accuracy: 58%

Train Epoch: 5 [49984/50000]	Loss: 1.456159
Test set: Average loss: 1.5875, Correct: 5820/10000, Accuracy: 58%

info: trained model saved models/new_cifar100.pt


# Tensorflow

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
from tensorflow.keras.utils import to_categorical

In [4]:
# load dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()
# convert labels to categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

### Training initial model

In [5]:
from tensorflow.keras.initializers import RandomNormal, Constant

def create_model():
    model = keras.models.Sequential()
    
    model.add(keras.layers.Rescaling(1/255.0, input_shape=(32,32,3)))
    model.add(keras.layers.Conv2D(256,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(256,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.MaxPool2D(pool_size=(2,2)))
    model.add(keras.layers.Dropout(0.2))
     
    model.add(keras.layers.Conv2D(512,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(512,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.MaxPool2D(pool_size=(2,2)))
    model.add(keras.layers.Dropout(0.2))
    
    model.add(keras.layers.Conv2D(512,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(512,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.MaxPool2D(pool_size=(2,2)))
    model.add(keras.layers.Dropout(0.2))
    
    model.add(keras.layers.Conv2D(512,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(512,(3,3),padding='same'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.MaxPool2D(pool_size=(2,2)))
    model.add(keras.layers.Dropout(0.2))
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(1024))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.BatchNormalization(momentum=0.95, 
            epsilon=0.005,
            beta_initializer=RandomNormal(mean=0.0, stddev=0.05), 
            gamma_initializer=Constant(value=0.9)))
    model.add(keras.layers.Dense(100,activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Adam(learning_rate=0.001),
                  metrics=['acc'])

    return model

In [6]:
save_path="models/cifar100.hdf5"
checkpoint = ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=True, mode='min')
es = EarlyStopping(monitor='val_acc', patience=5)
callbacks_list = [checkpoint, es]

In [8]:
model = create_model()

model.fit(
    x_train, y_train,
    batch_size=64,
    callbacks=callbacks_list,
    steps_per_epoch=100, 
    epochs=10,
    validation_split=0.33,
    verbose=1
    )

Epoch 1/10
Epoch 1: val_acc improved from inf to 0.02006, saving model to models\cifar100.hdf5
Epoch 2/10
Epoch 2: val_acc improved from 0.02006 to 0.01794, saving model to models\cifar100.hdf5
Epoch 3/10
Epoch 3: val_acc did not improve from 0.01794
Epoch 4/10
Epoch 4: val_acc did not improve from 0.01794
Epoch 5/10
Epoch 5: val_acc did not improve from 0.01794
Epoch 6/10
Epoch 6: val_acc did not improve from 0.01794
Epoch 7/10
Epoch 7: val_acc did not improve from 0.01794
Epoch 8/10
Epoch 8: val_acc did not improve from 0.01794
Epoch 9/10
Epoch 9: val_acc did not improve from 0.01794
Epoch 10/10
Epoch 10: val_acc did not improve from 0.01794


<keras.callbacks.History at 0x16081231220>

In [9]:
model.evaluate(x_test, y_test)



[3.5173535346984863, 0.18019999563694]

### Training checkpoint model

In [10]:
model = create_model()
model.load_weights('models/cifar100.hdf5')

model.fit(
    x_train, y_train,
    batch_size=64,
    steps_per_epoch=100, 
    epochs=5,
    validation_split=0.33,
    verbose=1
    )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1620d105e50>

In [11]:
model.evaluate(x_test, y_test)



[3.7801249027252197, 0.12290000170469284]

In [12]:
model.save_weights("models/new_cifar100.hdf5")