In [1]:
from tinybig.config import config
from tinybig.util import set_random_seed

print('loading configs...')
config_file_name = 'cifar10_configs'
config_obj = config()
config = config_obj.load_yaml(cache_dir='./configs', config_file=config_file_name + '.yaml')
print(config)

loading configs...
{'configurations': {'device': 'mps', 'random_seed': 1234, 'data_configs': {'data_class': 'tinybig.data.cifar10', 'data_parameters': {'name': 'mnist', 'train_batch_size': 64, 'test_batch_size': 64}}, 'learner_configs': {'learner_class': 'tinybig.learner.backward_learner', 'learner_parameters': {'name': 'error_backward_propagation', 'n_epochs': 11, 'optimizer_configs': {'optimizer_class': 'torch.optim.AdamW', 'optimizer_parameters': {'lr': 0.0017, 'weight_decay': 1e-05}}, 'lr_scheduler_configs': {'lr_scheduler_class': 'torch.optim.lr_scheduler.ExponentialLR', 'lr_scheduler_parameters': {'gamma': 0.65}}, 'loss_configs': {'loss_class': 'torch.nn.CrossEntropyLoss', 'loss_parameters': {'reduction': 'mean'}}}}, 'model_configs': {'model_class': 'tinybig.model.rpn', 'model_parameters': {'device': 'mps', 'name': 'reconciled_polynomial_network', 'depth': 3, 'depth_alloc': [1, 1, 1], 'layer_configs': [{'layer_class': 'tinybig.module.rpn_layer', 'layer_parameters': {'name': 'rpn_

In [2]:
print('setting up environments...')
device = config['configurations'].pop('device')
random_seed = config['configurations'].pop('random_seed')
set_random_seed(random_seed)
print('device: ', device, '; random_seed: ', random_seed)

setting up environments...
device:  mps ; random_seed:  1234


In [3]:
print('instantiating objects from config...')
data_obj, model_obj, learner_obj, metric_obj, result_obj = [config_obj.instantiation_from_configs(config['configurations'][f'{stem}_configs'], device=device, class_name=f'{stem}_class', parameter_name=f'{stem}_parameters') for stem in ['data', 'model', 'learner', 'metric', 'output']]

print('parameter num: ', sum([parameter.numel() for parameter in model_obj.parameters()]))


instantiating objects from config...
parameter num:  1706496


In [4]:
print('loading dataset...')
data_loader = data_obj.load()

loading dataset...
Files already downloaded and verified
Files already downloaded and verified


In [5]:
print('training model...')
training_records = learner_obj.train(model=model_obj, data_loader=data_loader, device=device, metric=metric_obj)
model_obj.save_ckpt(cache_dir='./ckpt', checkpoint_file=f'{config_file_name}_checkpoint')

training model...


100%|██████████| 782/782 [00:07<00:00, 110.46it/s, epoch=0/11, loss=1.22, lr=0.0017, metric_score=0.562, time=7.09]


Epoch: 0, Test Loss: 1.4943674499062216, Test Score: 0.4684, Time Cost: 0.7942781448364258


100%|██████████| 782/782 [00:06<00:00, 119.26it/s, epoch=1/11, loss=1.44, lr=0.00111, metric_score=0.438, time=14.4]


Epoch: 1, Test Loss: 1.4050911573847389, Test Score: 0.5037, Time Cost: 0.7902069091796875


100%|██████████| 782/782 [00:06<00:00, 120.40it/s, epoch=2/11, loss=1.68, lr=0.000718, metric_score=0.562, time=21.7] 


Epoch: 2, Test Loss: 1.3539206761463431, Test Score: 0.5221, Time Cost: 0.8560678958892822


100%|██████████| 782/782 [00:06<00:00, 117.75it/s, epoch=3/11, loss=1, lr=0.000467, metric_score=0.5, time=29.2]      


Epoch: 3, Test Loss: 1.3075649457372678, Test Score: 0.5445, Time Cost: 0.7910540103912354


100%|██████████| 782/782 [00:06<00:00, 119.53it/s, epoch=4/11, loss=1.04, lr=0.000303, metric_score=0.688, time=36.6] 


Epoch: 4, Test Loss: 1.3032362020699082, Test Score: 0.5447, Time Cost: 0.7970099449157715


100%|██████████| 782/782 [00:06<00:00, 121.06it/s, epoch=5/11, loss=1.2, lr=0.000197, metric_score=0.562, time=43.8]  


Epoch: 5, Test Loss: 1.3027622308700708, Test Score: 0.5514, Time Cost: 0.7972416877746582


100%|██████████| 782/782 [00:06<00:00, 123.37it/s, epoch=6/11, loss=0.839, lr=0.000128, metric_score=0.562, time=51]  


Epoch: 6, Test Loss: 1.2988257206928957, Test Score: 0.5552, Time Cost: 0.7944300174713135


100%|██████████| 782/782 [00:06<00:00, 123.74it/s, epoch=7/11, loss=0.795, lr=8.33e-5, metric_score=0.625, time=58.1]


Epoch: 7, Test Loss: 1.3129910245822494, Test Score: 0.5597, Time Cost: 0.7942390441894531


100%|██████████| 782/782 [00:06<00:00, 118.96it/s, epoch=8/11, loss=1.1, lr=5.42e-5, metric_score=0.562, time=65.4]  


Epoch: 8, Test Loss: 1.3102326032462392, Test Score: 0.5609, Time Cost: 0.8306679725646973


100%|██████████| 782/782 [00:06<00:00, 122.90it/s, epoch=9/11, loss=0.337, lr=3.52e-5, metric_score=0.875, time=72.6]


Epoch: 9, Test Loss: 1.3199590900141722, Test Score: 0.5605, Time Cost: 0.7934930324554443


100%|██████████| 782/782 [00:06<00:00, 126.97it/s, epoch=10/11, loss=1.05, lr=2.29e-5, metric_score=0.562, time=79.6] 


Epoch: 10, Test Loss: 1.3242781253377343, Test Score: 0.5623, Time Cost: 0.7955598831176758
model checkpoint saving to ./ckpt/cifar10_configs_checkpoint...


In [6]:
print('testing model...')
test_result = learner_obj.test(model=model_obj, test_loader=data_loader['test_loader'], device=device,
                               metric=metric_obj)

testing model...


In [7]:
print('evaluating result...')
print(metric_obj.__class__.__name__, metric_obj.evaluate(y_true=test_result['y_true'], y_pred=test_result['y_pred'], y_score=test_result['y_score'], ))

evaluating result...
accuracy 0.5623


In [8]:
print('saving result...')
result_obj.save(test_result, cache_dir='./result', output_file='{}_result'.format(config_file_name))
result_obj.save(training_records, cache_dir='./result', output_file='{}_record'.format(config_file_name))

saving result...
