In [1]:
# standard libraries
import numpy as np
import random
import time
from collections import namedtuple, Counter
import operator
import os
from copy import deepcopy
import heapq
# pytorch
import torch
import torch.nn as nn
import torch.optim as optim
# import from other files
from toric_model import Toric_code
from toric_model import Action
from toric_model import Perspective
from Replay_memory import Replay_memory_uniform, Replay_memory_prioritized
# import networks 
from NN import NN_11, NN_17
from ResNet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
from util import incremental_mean, convert_from_np_to_tensor, Transition

from RL import RL

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# valid network names: 
#   NN_11
#   NN_17
#   ResNet18
#   ResNet34
#   ResNet50
#   ResNet101
#   ResNet152
NETWORK = NN_11

# common system sizes are 3,5,7 and 9 
# grid size must be odd! 
SYSTEM_SIZE = 5

# For continuing the training of an agent
continue_training = False
# this file is stored in the network folder and contains the trained agent.  
NETWORK_FILE_NAME = 'size_5_NN_11'

# initialize RL class and training parameters 
rl = RL(Network=NETWORK,
        Network_name=NETWORK_FILE_NAME,
        system_size=SYSTEM_SIZE,
        p_error=0.05,
        replay_memory_capacity=20000, 
        learning_rate=0.00025,
        discount_factor=0.95,
        max_nbr_actions_per_episode=50,
        device=device,
        replay_memory='proportional')   # proportional  
                                        # uniform


In [9]:
# generate folder structure 
timestamp = time.strftime("%y_%m_%d__%H_%M_%S__")
PATH = 'data/training__' +str(NETWORK_FILE_NAME) +'_'+str(SYSTEM_SIZE)+'__' + timestamp
PATH_epoch = PATH + '/network_epoch'
if not os.path.exists(PATH):
    os.makedirs(PATH)
    os.makedirs(PATH_epoch)

# load the network for continue training 
if continue_training == True:
    print('continue training')
    PATH2 = 'network/'+str(NETWORK_FILE_NAME)+'.pt'
    rl.load_network(PATH2)

In [10]:
# train for n epochs the agent (test parameters)
rl.train_for_n_epochs(training_steps=5,
                    num_of_predictions=10,
                    num_of_steps_prediction=50,
                    epochs=20,
                    target_update=10,
                    optimizer='Adam',
                    batch_size=32,
                    directory_path = PATH,
                    prediction_list_p_error=[0.01, 0.1],
                    replay_start_size=48,
                    minimum_nbr_of_qubit_errors=0)

training done, epoch:  1
[0. 0.]
training done, epoch:  2
[0.5 0.2]
training done, epoch:  3
[0. 0.]


KeyboardInterrupt: 

In [12]:
( error_corrected_list, ground_state_list, average_number_of_steps_list, 
            mean_q_list, failed_syndroms, ground_state_list, prediction_list_p_error, 
            failure_rate) = rl.prediction(num_of_predictions=100, 
                prediction_list_p_error=[0.01, 0.05, 0.1], 
                minimum_nbr_of_qubit_errors=int(rl.system_size/2)+1,
                #save_prediction=True,
                num_of_steps=50)



In [36]:
error_corrected_list
# ground_state_list
# average_number_of_steps_list
# mean_q_list
# failed_syndroms[3]
# ground_state_list
# prediction_list_p_error
# failure_rate

array([0.11, 0.08, 0.11])