In [None]:
import os
import itertools
from attrs import define
# from code.train import train
from codes.train import train
from codes.optimizers import Optimizer
# from code.problems import Problem
from codes import Loss, Scheduler
from codes.datasets import Dataset
from codes.models import Model

# %matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
def zip_dict(**kwargs):
    keys = kwargs.keys()
    for instance in zip(*kwargs.values()):
        yield dict(zip(keys, instance))


def product_dict(**kwargs):
    keys = kwargs.keys()
    for instance in itertools.product(*kwargs.values()):
        yield dict(zip(keys, instance))

In [3]:
os.environ["MKL_THREADING_LAYER"] = "AMD"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["TORCH_DEVICE"] = "cuda"
os.environ["CUDA_VISIBLE_DEVICES"] = "5"
# os.environ["MKL_THREADING_LAYER"] = "AMD"
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["TORCH_DEVICE"] = "cpu"

In [4]:
os.environ['MLFLOW_VERBOSE'] = 'True'
# os.environ['MLFLOW_CHECK_EXIST'] = 'False'
os.environ['MLFLOW_CHECK_EXIST'] = 'True'
os.environ['MLFLOW_EXPERIMENT_NAME'] = os.path.basename(os.getcwd())

# Comparison with tuned Adam

In [5]:
@define
class BaseConfig():
    nepochs:         int = 200
    seed:            int = None

    loss:           Loss = Loss.CrossEntropyLoss
    model:         Model = Model.ResNet18
    dataset:     Dataset = Dataset.CIFAR10

    scheduler: Scheduler = None
    decay:         float = None

    optimizer: Optimizer = None
    weight_decay:  float = 0
    batchsize:       int = 128
    lr:            float = 3e-4
    # lr:            float = 1e-3

    eps:           float = 1e-4

    beta1_:    float = 0.9
    beta2_:    float = 0.999

    eta_:       float = None

args_grid = dict(
    seed=[0],
    eta_=[1e-3],
    eps=[1e-3],
    decay=[0.2],
)

os.environ['MLFLOW_RUN_TAGS'] = str(dict(about=f'cutout aug'))

for d in product_dict(**args_grid):

    AdaGrad
    config = BaseConfig(**d)
    config.optimizer = None
    config.beta1_ = None
    config.beta2_ = None
    config.eta_ = None
    os.environ['MLFLOW_RUN_NAME'] = 'AdaGrad'
    %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.ADAM
    config.eta_ = None
    config.eps = 1e-4
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.KATE
    config.beta1_ = None
    config.beta2_ = None
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    %time train(config)

Step 78200: train-loss: 0.06561 train-accuracy: 97.79600 test-loss: 0.32186 test-accuracy: 92.09000
CPU times: user 1h 46min 8s, sys: 35min 26s, total: 2h 21min 35s
Wall time: 1h 35min 38s


# CIFAR10

In [None]:
@define
class BaseConfig():
    nepochs:         int = 50
    seed:            int = None

    loss:           Loss = Loss.CrossEntropyLoss
    model:         Model = Model.ResNet18
    dataset:     Dataset = Dataset.CIFAR10

    optimizer: Optimizer = None
    batchsize:       int = 500
    lr:            float = 1e-5

    eps:           float = 1e-4

    beta1_:    float = 0.9
    beta2_:    float = 0.999

    eta_:       float = None
    
args_grid = dict(
    seed=[0],
    eta_=[1e-3, 1e-1, 0],
    eps=[1e-4, 1e-6, 1e-8, 1e-10],
)

os.environ['MLFLOW_RUN_TAGS'] = str(dict(about=f'full dataset'))

for d in product_dict(**args_grid):

    config = BaseConfig(**d)
    config.optimizer = None
    config.beta1_ = None
    config.beta2_ = None
    config.eta_ = None
    os.environ['MLFLOW_RUN_NAME'] = 'AdaGrad'
    %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.ADAM
    config.eta_ = None
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.KATE
    config.beta1_ = None
    config.beta2_ = None
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    %time train(config)

# Emotion

In [None]:
@define
class BaseConfig():
    nepochs:         int = 20
    seed:            int = None

    loss:           Loss = Loss.CrossEntropyLoss
    model:         Model = Model.BERT
    dataset:     Dataset = Dataset.Emotion

    optimizer: Optimizer = None
    batchsize:       int = 160
    lr:            float = 1e-5

    eps:           float = 1e-4

    beta1_:    float = 0.9
    beta2_:    float = 0.999

    eta_:       float = None
    
    
args_grid = dict(
    seed=[0],
    eta_=[1e-3, 1e-1, 0],
    eps=[1e-4, 1e-6, 1e-8, 1e-10], #for adam and adagrad
    # eps=[1e-3, 1e-4, 1e-5, 1e-6], #for kate
)

os.environ['MLFLOW_RUN_TAGS'] = str(dict(about=f'full dataset'))

for d in product_dict(**args_grid):

#     config = BaseConfig(**d)
#     config.optimizer = None
#     config.beta1_ = None
#     config.beta2_ = None
#     config.eta_ = None
#     os.environ['MLFLOW_RUN_NAME'] = 'AdaGrad'
#     %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.ADAM
    config.eta_ = None
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    %time train(config)

    # config = BaseConfig(**d)
    # config.optimizer = Optimizer.KATE
    # # config.eps = 1e-4
    # config.beta1_ = None
    # config.beta2_ = None
    # os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    # %time train(config)