# Mosaic ML CIFAR 100 performance

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import seaborn as sns
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
from torchsummary import summary
from torchvision import datasets, transforms
# from torchcontrib.optim import SWA

import composer
from composer import DataloaderSpec
from composer import Trainer
from composer.algorithms import LabelSmoothing, CutOut, MixUp, BlurPool, scale_schedule, SWA, SAM , SqueezeExcite, ScaleSchedule 
from composer.trainer.devices.device_gpu import DeviceGPU

import shutil
# from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
# !pip install pytorch-lightning
# !pip install mosaicml
# !pip uninstall pillow
# !pip install pillow-simd
# !pip install torchcontrib

In [None]:
# !nvidia-smi

In [None]:
from google.colab import drive 
drive.mount('/content/drive/')

# !ls "/content/drive/MyDrive/Colab Notebooks/Masters"

In [None]:
# base_dir = '/media/ext_mount/Project/BuildingCNN'
base_dir = '/content/drive/MyDrive/Colab Notebooks/Masters'

raw_dataset_directory = f"{base_dir}/Dataset/GC10-DET/images"
dataset_directory = f'{base_dir}/Dataset/GC10-DET/dataset'

train_set_directory = f'{dataset_directory}/train'
test_set_directory = f'{dataset_directory}/test'

model_directory = f'{base_dir}/models/mosaic_cnn_600K'

In [None]:
# create model directory if not exists
if not os.path.isdir(model_directory):
    os.makedirs(model_directory)


#### PYTORCH DATASET LOADER



In [None]:
use_cuda = True
batch_size = 32

image_size = (32, 32)
epochs = 100

In [None]:
transforms = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.2860,), (0.3530,)),
                                transforms.RandomRotation(degrees = 10),
                                # transforms.RandomHorizontalFlip(p=0.5)
                                # transforms.ColorJitter(brightness=(0.3,0.6), contrast = (0.3,0.6)),
                                # transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3)),
                                # transforms.RandomPerspective(distortion_scale=0.5, p=0.5)
                                ])

# train = datasets.CIFAR10('./data10', train=True, download=True, transform=transforms)
# test = datasets.CIFAR10('./data10', train=False, download=True, transform=transforms)

train_ds_all = datasets.ImageFolder(train_set_directory, transform=transforms_obj)

train_set_count = int(0.8 * len(train_ds_all))
test_set_count = len(train_ds_all) - train_set_count

train_ds, val_ds = random_split(train_ds_all, [train_set_count, test_set_count], generator=torch.Generator().manual_seed(555))

test_ds = datasets.ImageFolder(test_set_directory, transform=transforms_obj)  

In [None]:
dataloader_args = dict(shuffle=True, batch_size=batch_size, num_workers=2, pin_memory=True) if use_cuda else dict(shuffle=True, batch_size=64)
train_loader = torch.utils.data.DataLoader(train_ds, **dataloader_args)
val_loader = torch.utils.data.DataLoader(val_ds, **dataloader_args)
test_loader = torch.utils.data.DataLoader(test_ds, **dataloader_args)

#### MOSAIC ML DATASET LOADER

In [None]:
train_dataloader_spec = DataloaderSpec(
    dataset=train_ds,
    drop_last=False,
    shuffle=True,
)

val_dataloader_spec = DataloaderSpec(
    dataset=val_ds,
    drop_last=False,
    shuffle=True,
)

test_dataloader_spec = DataloaderSpec(
    dataset=test_ds,
    drop_last=False,
    shuffle=False,
)

#### CIFAR 10  - 600 K parameters

##### CIFAR 10 600K DEPTHWISE

In [None]:
model_type = '600k_depthwise'

In [None]:
class SimpleModel(composer.models.MosaicClassifier):
    def __init__(self, num_classes: int):
        module = nn.Sequential(
    ################################## 1 ST CONVOLUTIONAL BLOCK #####################################  
            nn.Conv2d(in_channels = 3,out_channels = 16, dilation  = 1,padding = 1, kernel_size= (3,3)),                     # in 32, out 32, RF 3
            nn.ReLU(),
            nn.BatchNorm2d(16),                                                                               #26x26 /8 
            nn.Conv2d(in_channels = 16,out_channels = 32, dilation  = 1,padding = 1, kernel_size= (3,3)),                     # in 32, out 32, RF 3
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(0.03),
            # nn.MaxPool2d(2, 2),
            nn.Conv2d(in_channels = 32,out_channels = 32,groups = 32, dilation  = 1,padding = 1,kernel_size= (3,3)),
            nn.Conv2d(in_channels = 32,out_channels = 64, dilation = 1,padding = 0,kernel_size= (1,1)), # 8, 8, 3  
            nn.ReLU(),
            nn.BatchNorm2d(64)  ,          
            nn.Conv2d(in_channels = 64,out_channels = 64, dilation  = 1,padding = 1, kernel_size= (3,3)),                     
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(0.03),
            nn.MaxPool2d(2, 2),                                                                                             # in 32, out 16, RF 3

            nn.Conv2d(in_channels = 64,out_channels = 64,groups = 64, dilation  = 1,padding = 1,kernel_size= (3,3)),        
            nn.Conv2d(in_channels = 64,out_channels = 96, dilation = 1,padding = 0,kernel_size= (1,1)), # 8, 8, 3           
            nn.ReLU(),
            nn.BatchNorm2d(96),
            nn.Conv2d(in_channels = 96,out_channels = 96, dilation  = 2,padding = 2, kernel_size= (3,3)),                     
            nn.ReLU(),
            nn.BatchNorm2d(96),
            nn.Dropout(0.07),
            # nn.MaxPool2d(2, 2),
            nn.Conv2d(in_channels = 96,out_channels = 96, groups = 96, dilation  = 1,padding = 1,kernel_size= (3,3)),    
            nn.Conv2d(in_channels = 96,out_channels = 128, dilation  = 1,padding = 0,kernel_size= (1,1)),                  
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Conv2d(in_channels = 128,out_channels = 128, groups = 128, dilation  = 1,padding = 1,kernel_size= (3,3)),    
            nn.Conv2d(in_channels = 128,out_channels = 192, dilation  = 1,padding = 0,kernel_size= (1,1)),                  
            nn.ReLU(),
            nn.BatchNorm2d(192),
            nn.Dropout(0.05),
            nn.MaxPool2d(2, 2),                                                                                              # in 16, out 8, RF 3
            
            nn.Conv2d(in_channels = 192,out_channels = 192, groups = 192, dilation  = 1,padding = 1,kernel_size= (3,3)),    
            nn.Conv2d(in_channels = 192,out_channels = 260, dilation  = 1,padding = 0,kernel_size= (1,1)),                  
            nn.ReLU(),
            nn.BatchNorm2d(260),            
            nn.Conv2d(in_channels = 260,out_channels = 260, groups = 260, dilation  = 1,padding = 1,kernel_size= (3,3)),    
            nn.Conv2d(in_channels = 260,out_channels = 320, dilation  = 1,padding = 0,kernel_size= (1,1)),                  
            nn.ReLU(),
            nn.BatchNorm2d(320),
            nn.Dropout(0.02),
            nn.MaxPool2d(2, 2), 
            
            
            nn.Conv2d(in_channels = 320,out_channels = 320, groups = 320, dilation  = 1,padding = 1,kernel_size= (3,3)),    
            nn.Conv2d(in_channels = 320,out_channels = 370, dilation  = 1,padding = 0,kernel_size= (1,1)),                  
            nn.ReLU(),
            nn.BatchNorm2d(370),                       
            # nn.Dropout(0.02),
             nn.Conv2d(in_channels = 370,out_channels = 370, groups = 370, dilation  = 1,padding = 1,kernel_size= (3,3)),    
            nn.Conv2d(in_channels = 370,out_channels = 415, dilation  = 1,padding = 0,kernel_size= (1,1)),                  
            nn.ReLU(),
            nn.BatchNorm2d(415),

            nn.AvgPool2d(kernel_size=4),                                                                      #1x1/15
            nn.Flatten(),
            nn.Linear(415,10)
        )
        self.num_classes = num_classes
        super().__init__(module=module)

In [None]:
my_model = SimpleModel(num_classes= 10).to('cuda')
summary(my_model,input_size = (1,3,32,32))

### Callbacks

In [None]:
class EarlyStopping(composer.Callback):
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_acc_max = 0
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
            
    def eval_end(self, state: composer.State, logger: composer.Logger):
        
        model, eval_dataloader = state.model, state.eval_dataloader
        
        val_acc = self.get_val_acc(eval_dataloader, model, 'cuda')
        
        score = val_acc
        
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_acc, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'Validation accuracy did not improve from: {self.val_acc_max}. EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_acc, model)
            self.counter = 0
    
    def get_val_acc(self, test_loader, model: nn.Module, device):
        num_correct = 0
        total = 0
        model.eval()

        with torch.no_grad():
            for data, labels in test_loader:
                # data, labels = test_loader
                data = data.to(device=device)
                labels = labels.to(device=device)

                targets = model((data, labels))
                predictions = torch.argmax(targets, dim=1)

                num_correct += (predictions == labels).sum()
                total += labels.size(0)

            val_accuracy = float(num_correct)/float(total)*100

        model.train()
        
        return val_accuracy
    
    def save_checkpoint(self, val_acc, model):
        '''Saves model when validation acc increases.'''
        if self.verbose:
            self.trace_func(f'Validation acc increased ({self.val_acc_max:.6f} --> {val_acc:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_acc_max = val_acc

# early_stop_callback = EarlyStopping(
#     monitor="val_loss", mode="min",
#     min_delta=0.00, patience=30, verbose=False)

# checkpoint_callback = ModelCheckpoint(
#     monitor='val_loss', mode='min',
#     dirpath=model_directory,
#     filename='pytorch-{}-{}'.format(
#         model_type, '{epoch:02d}-val_loss{val/loss:.2f}'))

filepath = f'{model_directory}/{model_type}_final_model.pt'

early_stop_callback = EarlyStopping(
    patience=30, verbose=True, delta=0.00, path=filepath)

callbacks = [early_stop_callback] # checkpoint_callback 

#### Algorithm Trials

In [None]:
# !nvidia-smi
# torch.cuda.empty_cache()
# torch.cuda.set_per_process_memory_fraction(1., device=None)
# torch.cuda.device_count()

In [None]:

if os.path.exists(model_directory):
    input_val = input(f'Delete directory: {model_directory}')
    if input_val.lower() == 'y':
        shutil.rmtree(model_directory)
        os.mkdir(model_directory)
        print('Directory Truncated')
    else:
        print('Skipping Deletion')

In [None]:
trainer = Trainer(
    model=my_model,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=val_dataloader_spec,
    max_epochs=epochs,
    train_batch_size=batch_size,
    eval_batch_size=batch_size,
    algorithms=[
        # CutOut(n_holes=2, length=4),
        # LabelSmoothing(alpha=0.2),
        # MixUp(0.5),
        BlurPool(replace_convs=True, replace_maxpools=True, blur_first=True),
        # ScaleSchedule(ratio=0.4),
        SWA(swa_start=0.8),
        # SAM(rho=0.05, epsilon=1e-12),
        SqueezeExcite(latent_channels=0.5, min_channels=128)
    ],
    checkpoint_interval_unit="ep",
    checkpoint_folder=model_directory,
    checkpoint_interval=1,
    num_workers=2,
    callbacks=callbacks,
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream=True)
trainer.fit()

In [None]:
trainer.eval(test_dataloader_spec)

### Test Model

In [None]:
def check_accuracy(test_loader, model: nn.Module, device):
    num_correct = 0
    total = 0
    model.eval()

    with torch.no_grad():
        for data, labels in test_loader:
            data = data.to(device=device)
            labels = labels.to(device=device)

            targets = model((data, labels))
            predictions = torch.argmax(targets, dim=1)

            num_correct += (predictions == labels).sum()
            total += labels.size(0)

        print(f"Test Accuracy of the model: {float(num_correct)/float(total)*100:.2f}")

    model.train()
    
# def test_model(model, test_loader, device):
#     model.eval()
#     test_loss = 0
#     correct = 0
#     with torch.no_grad():
#         for data, target in test_loader:
#             data, target = data.to(device), target.to(device)
#             output = model(data)
#             test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
#             pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
#             correct += pred.eq(target.view_as(pred)).sum().item()

#     test_loss /= len(test_loader.dataset)
#     # test_losses.append(test_loss)

#     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
#         test_loss, correct, len(test_loader.dataset),
#         100. * correct / len(test_loader.dataset)))

#     # test_acc.append(100. * correct / len(test_loader.dataset))

# def check_accuracy(loader, model, device):
#     num_correct = 0
#     num_samples = 0
#     model.eval()
    
#     with torch.no_grad():
#         for x, y in loader:
#             x = x.to(device=device)
#             y = y.to(device=device)
            
#             scores = model(x)
#             _, predictions = scores.max(1)
#             num_correct += (predictions == y).sum()
#             num_samples += predictions.size(0)
        
#         print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 
    
#     model.train()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
check_accuracy(val_loader, my_model, device)

### Save Model

In [None]:
filepath = f'{model_directory}/{model_type}_final_model.pt'
if not os.path.isdir(model_directory):
    os.makedirs(model_directory)
# filepath

In [None]:
# torch.save(my_model.state_dict(), filepath)

### Load Model

In [None]:
# filepath = f'{model_directory}/ep78.pt'

In [None]:
# my_model.load_state_dict(torch.load(filepath))

In [None]:
1/0

#### Mixup

In [None]:
trainer = Trainer(
    model=m,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=eval_dataloader_spec,
    max_epochs=70,
    train_batch_size=128,
    eval_batch_size=128,
    checkpoint_interval=1,
    # device =  'DeviceGPU',
    # device ='cuda',
    algorithms=[
        MixUp(0.5)
    ],
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream= True)
trainer.fit()

#### Random Rotation

In [None]:
trainer = Trainer(
    model=m,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=eval_dataloader_spec,
    max_epochs=70,
    train_batch_size=128,
    eval_batch_size=128,
    checkpoint_interval=1,
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream= True)
trainer.fit()

#### Horizontal Flip

In [None]:
trainer = Trainer(
    model=m,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=eval_dataloader_spec,
    max_epochs=70,
    train_batch_size=128,
    eval_batch_size=128,
    checkpoint_interval=1,
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream= True)
trainer.fit()

#### ColorJitters

In [None]:
trainer = Trainer(
    model=m,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=eval_dataloader_spec,
    max_epochs=70,
    train_batch_size=128,
    eval_batch_size=128,
    checkpoint_interval=1,
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream= True)
trainer.fit()

#### Random Affine

In [None]:
trainer = Trainer(
    model=m,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=eval_dataloader_spec,
    max_epochs=70,
    train_batch_size=128,
    eval_batch_size=128,
    checkpoint_interval=1,
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream= True)
trainer.fit()

#### Random Perspective

In [None]:
trainer = Trainer(
    model=m,
    train_dataloader_spec=train_dataloader_spec,
    eval_dataloader_spec=eval_dataloader_spec,
    max_epochs=70,
    train_batch_size=128,
    eval_batch_size=128,
    checkpoint_interval=1,
)
trainer.device = DeviceGPU(prefetch_in_cuda_stream= True)
trainer.fit()