In [15]:
# reload magic
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
# add the parent directory to the path
import sys
import os
import numpy as np
import torch
import random

import logging

sys.path.append('..')
from mldec.pipelines.seq2seq import build_model, train_model
from mldec.pipelines.dataloader import Sampler
from mldec.pipelines.logger import get_logger
from mldec.pipelines.utils import gpu_init_pytorch

In [17]:
log_folder = 'logs'
model_folder = 'models'
result_folder = './out/'

data_path = 'data/'
__file__ = os.path.abspath('')
data_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), data_path))
print(data_path)
def load_data(config, logger, add_sos_eos=False, preload_device=None):	
	'''
		Loads the data from the datapath in torch dataset form. 
		
		I believe this needs to be in the main run file, to account for how data will 
		be passed around during tuning runs.
		
		Note that the test loader is "unweighted", and performance will need to be manually 
		    weighted by the underlying distribution.

		Args:
			config (dict) : configuration/args
			logger (logger) : logger object for logging

		Returns:
			dataloader(s) for train, val, and test. 
	'''
	if config.mode == 'train' or config.mode == 'tune':
		logger.debug('Loading Training Data...')
		train_path = os.path.join(data_path, config.experiment_name, config.dataset)
		# val_path = os.path.join(data_path, config.experiment_name, config.dataset, 'val.pkl')
		test_path = os.path.join(data_path, config.experiment_name, config.dataset)

		'''Load Datasets'''
		train_loader = Sampler(train_path, config.batch_size, mode='train', add_sos_eos=add_sos_eos, preload_device=preload_device)
		# test dataset is designed to contain all examples
		test_loader = Sampler(test_path, 2 ** config.n, mode='test', add_sos_eos=add_sos_eos, preload_device=preload_device)
		msg = 'Training and Validation Data Loaded:\nTrain Size: {}\nTest Size: {}. Output shape: {}'.format(train_loader.n_data, test_loader.n_data, train_loader.output_shape)
		logger.info(msg)
		
		return train_loader, test_loader
	else:
		logger.critical('Invalid Mode Specified')
		raise Exception('{} is not a valid mode'.format(config.mode))


c:\Users\peter\Desktop\projects\quantum_error_correction\decoding-nonpauli-errors\data


In [18]:
# Initialize logger
run_name = "test"
log_file = "test.log"
logger = get_logger(run_name, log_file_path=None, logging_level=logging.DEBUG)

In [70]:
class DummyArgs:
    """Dummy class to hold arguments"""
    def __init__(self):
        self.mode = 'train'
        self.seed = 1235
        self.gpu = 0 # TODO
        self.checkpoint = None # TODO
        self.experiment_name = 'repetition_code_v1'

        self.n = 8
        # self.dataset = f'n{self.n}_N10000_p101_p2007'
        self.dataset = f'grok_n8_N102_p101_p2007'
        self.batch_size = 8

        self.heads = 4
        self.depth = 4
        self.d_model = 16
        self.d_ffn = 8
        # no dropout for noiseless data
        self.dropout = 0.0

        self.epochs = 1000
        self.opt = 'adam'
        self.lr = 5e-5
        self.max_grad_norm = 0


# better for jupyter noteboook:
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

config = DummyArgs()
# parser = build_parser()
# config = parser.parse_args()
is_train = (config.mode == 'train')
is_tune = (config.mode == 'tune')

# Seeds should be consistent
np.random.seed(config.seed)
random.seed(config.seed)
torch.manual_seed(config.seed)

# Set device
if config.gpu is not None:
    device = gpu_init_pytorch(config.gpu)
else:
    device = torch.device('cpu')
print("device", device)

device cuda:0


In [71]:
# Load the data
train_loader, test_loader = load_data(config, logger, add_sos_eos=True, preload_device=device)

12-12 17 | 646936226.py : load_data() ::	 Loading Training Data...
12-12 17 | 646936226.py : load_data() ::	 Loading Training Data...
12-12 17 | 646936226.py : load_data() ::	 Training and Validation Data Loaded:
Train Size: 102
Test Size: 256. Output shape: (102, 10)
12-12 17 | 646936226.py : load_data() ::	 Training and Validation Data Loaded:
Train Size: 102
Test Size: 256. Output shape: (102, 10)


In [72]:
# Initialize model
model = build_model(config, device, logger)

# X, Y = test_loader.get_batch(0)
# model.evaluator(X, Y, weights=test_loader.weights)

12-12 17 | seq2seq.py : __init__() ::	 Initalizing Model...
12-12 17 | seq2seq.py : __init__() ::	 Initalizing Model...
12-12 17 | seq2seq.py : __init__() ::	 Initalizing Optimizer and Criterion...
12-12 17 | seq2seq.py : __init__() ::	 Initalizing Optimizer and Criterion...
12-12 17 | seq2seq.py : build_model() ::	 Model has 16196 trainable parameters
12-12 17 | seq2seq.py : build_model() ::	 Model has 16196 trainable parameters


In [73]:
logger.info('Starting Training Procedure')
train_model(model, train_loader, test_loader, device, config, logger)

12-12 17 | 3923464849.py : <module>() ::	 Starting Training Procedure
12-12 17 | 3923464849.py : <module>() ::	 Starting Training Procedure
12-12 17 | seq2seq.py : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0 mins and 0.5483837127685547 secs
12-12 17 | seq2seq.py : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0 mins and 0.5483837127685547 secs
12-12 17 | seq2seq.py : train_model() ::	 Starting Validation
12-12 17 | seq2seq.py : train_model() ::	 Starting Validation
12-12 17 | utils.py : save_checkpoint() ::	 Validation loss decreased (inf --> -0.000000).  Saving model ...
12-12 17 | utils.py : save_checkpoint() ::	 Validation loss decreased (inf --> -0.000000).  Saving model ...
12-12 17 | seq2seq.py : train_model() ::	 Epoch: 1 | Train Loss: 1.2295284271240234 | Train Acc: 0.0 | Val Acc: 0.0 | LR: 5e-05
12-12 17 | seq2seq.py : train_model() ::	 Epoch: 1 | Train Loss: 1.2295284271240234 | Train Acc: 0.0 | Val Acc: 0.0 | LR: 5e-05
12-12 17 | seq2s

In [66]:
X, Y = test_loader.get_batch(0)
Y_preds = model.predict(X)
for x, y, y_pred in zip(X, Y, Y_preds):
    print(x)
    print(y)
    print(y_pred)
    print()


tensor([0, 0, 0, 0, 0, 0, 0], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 0, 3], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 0, 3], device='cuda:0')

tensor([0, 0, 0, 0, 0, 0, 1], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 1, 3], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 1, 3], device='cuda:0')

tensor([0, 0, 0, 0, 0, 1, 1], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 1, 0, 3], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 1, 3], device='cuda:0')

tensor([0, 0, 0, 0, 1, 1, 0], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 1, 0, 0, 3], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 1, 3], device='cuda:0')

tensor([0, 0, 0, 1, 1, 0, 0], device='cuda:0')
tensor([2, 0, 0, 0, 0, 1, 0, 0, 0, 3], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 1, 3], device='cuda:0')

tensor([0, 0, 1, 1, 0, 0, 0], device='cuda:0')
tensor([2, 0, 0, 0, 1, 0, 0, 0, 0, 3], device='cuda:0')
tensor([2, 0, 0, 0, 0, 0, 0, 0, 1, 3], device='cuda:0')

tensor([0, 1, 1, 0, 0, 0, 0], device='cu

In [None]:
# clean this shit up.


class DummyArgs:
	"""Dummy for argparser results."""
	def __init__(self):
		self.mode = 'train'
		self.seed = 1234
		self.gpu = None
		
def main():
	'''Read arguments'''

	'''Run Config files/paths'''
	# run_name = config.run_name
	# config.log_path = os.path.join(log_folder, run_name)
	# config.model_path = os.path.join(model_folder, config.dataset, run_name)
	# config_file = os.path.join(config.model_path, 'config.p')
	# log_file = os.path.join(config.log_path, 'log.txt')

	# if config.results:
	# 	config.result_path = os.path.join(result_folder, 'val_results_{}.json'.format(config.dataset))
	
	# if is_train or is_tune:
	# 	create_save_directories(config.log_path, config.model_path, result_folder)
	# else:
	# 	create_save_directories(config.log_path, None, result_folder)

	# logger = get_logger(run_name, log_file, logging.DEBUG)
	# logger.debug('Created Relevant Directories')
	# logger.info('Experiment Name: {}'.format(config.run_name)
    # )
	

	if is_train:
		# checkpoint = get_latest_checkpoint(config.model_path, logger)
		checkpoint = None
		min_val_loss = torch.tensor(float('inf')).item()
		epoch_offset = 0

		if checkpoint:
			ckpt = torch.load(checkpoint, map_location=lambda storage, loc: storage)
			config.lr = ckpt['lr']
			model = build_model(config=config, device=device, logger=logger)
			model.load_state_dict(ckpt['model_state_dict'])
			model.optimizer.load_state_dict(ckpt['optimizer_state_dict'])
		else:
			model = build_model(config=config, device=device, logger=logger)
			

            
		logger.info('Initialized Model')
		
		with open(config_file, 'wb') as f:
			pickle.dump(vars(config), f, protocol=pickle.HIGHEST_PROTOCOL)
		logger.debug('Config File Saved')

		num_params =count_parameters(model)
		logger.info('Number of parameters {}'.format(num_params))
		
		logger.info('Starting Training Procedure')
		train_model(model, train_loader, val_loader, voc, device, config, logger, epoch_offset, min_val_loss)

	elif is_tune:
		# Hyperparameter tuning happens here. I don't use command line inputs for
		# this config because it would be like pulling teeth.
		hyper_config = {
			'lr': tune.choice([1e-4]),
			'd_model': tune.choice([32]),
			'depth': tune.choice([1, 2]),
			'd_ffn': tune.choice([64]),
			'heads':tune.choice([2])
		}

		# The way raytune distributes compute resources is to use all possible resources,
		# then maximize the number of trials such that cpus/gpus per worker below are satisfied.
		# To avoid overutilization, set `max_concurrent_trials`
		# https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
		hyper_settings = {
			"cpus_per_worker": 0,
			"gpus_per_worker": 1,
			"max_concurrent_trials": 1,
			"epochs": 100,
			"num_samples": 10,
		}

		min_val_loss = torch.tensor(float('inf')).item()
		epoch_offset= 0

		logger.info('Starting Tuning Procedure')

		# Overwriting the config settings with the hyperparameters
		config.tune = True
		for key, value in hyper_config.items():
			setattr(config, key, value)

		tune_model(hyper_settings, hyper_config, train_loader, val_loader, voc, config, logger, epoch_offset, min_val_loss)
		

