In [None]:
!nvidia-smi

Thu Nov 18 12:42:17 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.1+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.9.1%2Bcu111-cp37-cp37m-linux_x86_64.whl (2041.3 MB)
[K     |█████████████                   | 834.1 MB 1.2 MB/s eta 0:16:39tcmalloc: large alloc 1147494400 bytes == 0x556eee5de000 @  0x7fb26443a615 0x556eb48534cc 0x556eb493347a 0x556eb48562ed 0x556eb4947e1d 0x556eb48c9e99 0x556eb48c49ee 0x556eb4857bda 0x556eb48c9d00 0x556eb48c49ee 0x556eb4857bda 0x556eb48c6737 0x556eb4948c66 0x556eb48c5daf 0x556eb4948c66 0x556eb48c5daf 0x556eb4948c66 0x556eb48c5daf 0x556eb4858039 0x556eb489b409 0x556eb4856c52 0x556eb48c9c25 0x556eb48c49ee 0x556eb4857bda 0x556eb48c6737 0x556eb48c49ee 0x556eb4857bda 0x556eb48c5915 0x556eb4857afa 0x556eb48c5c0d 0x556eb48c49ee
[K     |████████████████▌               | 1055.7 MB 1.2 MB/s eta 0:13:54tcmalloc: large alloc 1434370048 bytes == 0x556f32c34000 @  0x7fb26443a615 0x556eb48534cc 0x556eb493347a 0x556eb48562e

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0


In [None]:
!python -m torch.utils.collect_env

Collecting environment information...
PyTorch version: 1.9.1+cu111
Is debug build: False
CUDA used to build PyTorch: 11.1
ROCM used to build PyTorch: N/A

OS: Ubuntu 18.04.5 LTS (x86_64)
GCC version: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Clang version: 6.0.0-1ubuntu2 (tags/RELEASE_600/final)
CMake version: version 3.12.0
Libc version: glibc-2.26

Python version: 3.7 (64-bit runtime)
Python platform: Linux-5.4.104+-x86_64-with-Ubuntu-18.04-bionic
Is CUDA available: True
CUDA runtime version: 11.1.105
GPU models and configuration: GPU 0: A100-SXM4-40GB
Nvidia driver version: 460.32.03
cuDNN version: Probably one of the following:
/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5
/usr/lib/x86_64-linux-gnu/libcudnn.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.0.5
/usr/lib

## If the user wants to run the code on Colab, then run the next cells

- If the code files are present in the same folder as this notebook, then proceed to the next step.

In [1]:
!git clone https://github.com/miossl/miossl.git

Cloning into 'miossl'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 49 (delta 20), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (49/49), done.


## Training

In [1]:
import os
from argparse import ArgumentParser
from tabulate import tabulate
from datetime import datetime
import torch
import torch.nn as nn
import torchvision
import miossl.mio as mio
from miossl.dataloader_modules import CIFAR10ArrayDataModule, STL10DataModule, CIFAR100DataModule
from miossl.trainer import Trainer
from miossl.utils import run_command
from miossl.model_utils import ClassificationModel
from miossl.model_transforms import MIOTransform

Global seed set to 16


## Declaring the model

In [2]:
model = mio.MIOModel(optim = 'lars', 
                       proj_last_bn = True, 
                       warmup_epochs = 10, 
                       pretrain_batch_size = 128, 
                       lr = 1.5, 
                       data_dims = '32x32', 
                       max_epochs = 1000, 
                       temperature = 0.5,
                       lambda_loss = 0.0,
                       proj_num_layers = 2,
                       projector_hid_dim = 2048,
                       projector_out_dim = 128)

Adjusting learning rate of group 0 to 1.0000e-04.


## Transformations for Augmentation

In [3]:
# 's' is the scaling factor for brightness, contrast, hue and saturation. 'l' is the dimension of the input images as well as the output image dimension.
transforms = MIOTransform(s = 0.5, 
                          l = 32)

## Datamodules



In [4]:
# If the dataset is already downloaded, set download = False and set dataset_path to the location of the dataset folder 'cifar-10-python'

dm = CIFAR10ArrayDataModule(pretrain_batch_size = 128, 
                            other_batch_size = 32, 
                            download = True, 
                            dataset_path = '/content/cifar-10-python', 
                            transformations = transforms)

In [None]:
# # If the dataset is already downloaded, set download = False and set dataset_path to the location of the dataset folder 'cifar-10-python'
# dm = STL10DataModule(pretrain_batch_size = 128, 
#                      other_batch_size = 32, 
#                      download = True,
#                      dataset_path = '/content/stl10_binary', 
#                      transformations = transforms)

In [None]:
# # If the dataset is already downloaded, set download = False and set dataset_path to the location of the dataset folder 'cifar-10-python'
# # if fine_labels = True, then total number of classes will be 100, if fine_labels is set to False, total number of classes is 20
# dm = CIFAR100DataModule(pretrain_batch_size = 128, 
#                         other_batch_size = 32, 
#                         fine_labels = True, 
#                         download = True, 
#                         dataset_path = '/content/cifar-100-python/', 
#                         transformations = transforms)

## Initialize the Trainer

- Trainer will download the dataset is download is set to True in the previous step. 

In [5]:
trainer = Trainer(model = model, 
                  datamodule = dm, 
                  train_epochs = 250, 
                  modelsaveinterval = 25, 
                  max_epochs = 1000) #, resume = True, model_path = '') If training needs to be resumed

166503it [00:11, 14070.51it/s]


## Start Training

In [None]:
trainer.fit()


Epoch 1


 39%|███▉      | 122/312 [01:19<01:57,  1.62batch/s, loss=1.39]

## Declare the model for linear classification

In [None]:
ds_model = ClassificationModel('resnet50',dm.num_classes, '32x32').to('cuda:0')

## Linear Evaluation training

In [None]:
 # LINEAR EVALUATION
lin_eval_metrics = trainer.linear_eval(ds_model, patience=50) #, net_model_path = '/content/moco_29-10-21-04-29-53_final_net.pt')

## Print the Linear Evaluation Metrics

In [None]:
lin_eval_metrics

{'linear_eval_1p0_test_acc': 0.8479567307692307,
 'linear_eval_1p0_test_loss': 0.44369119403358454,
 'linear_eval_1p0_val_acc': 0.8549679487179487,
 'linear_eval_1p0_val_loss': 0.43928597504511857}

In [None]:
trainer.writer.add_hparams({'LR':0.01}, lin_eval_metrics,
                                run_name = '_'.join(['mio','cifar10']))

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir /content/runs