# Pip install all needed packages

In [1]:
!pip install gym

Defaulting to user installation because normal site-packages is not writeable


# Imports

In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import math

# Global variables

In [3]:
depthOfCode = 10
rows = 3
cols = 3

# Functions

In [4]:
# swap is given actions which is a tuple of actions or a action, where every action is a tuple with the values
# of two qubits (x, y) whos values should be swaped. x and y are ints between 0 and 8 corresponding to 
# the following qubit notation:
#         [[0, 1, 2],
#          [3, 4, 5],
#          [6, 7, 8]]

# ex. of a tuple of actions: ((0, 3), (4, 5), (7, 8))

# ex. of a single action: (0, 1)

# in the case of a single action we make a list out of it so it's iterable to minimize code
def swap(state, actions):
    if type(actions[0]) != tuple:
        actions = [actions]
    for action in actions:
        pos0, pos1 = action
        
        col0 = pos0%cols
        row0 = int((pos0-col0)/cols)  
        col1 = pos1%cols
        row1 = int((pos1-col1)/cols)
        
        for i in range(len(state)):
            state[i][row0][col0], state[i][row1][col1] = state[i][row1][col1], state[i][row0][col0]


In [5]:
# getNeighbors returns a list of the qubit notations of all neighbors to a specific qubit. 
# I.e. qubits above, below, right and left of the specific qubit.

def getNeighbors(state, row_number, column_number):
    a = [[state[i][j] if  i >= 0 and i < len(state) and j >= 0 and j < len(state[0]) else -1
                    for j in range(column_number-1, column_number+2)]
                        for i in range(row_number-1, row_number+2)]
    return [a[0][1], a[1][0], a[1][2], a[2][1]]

In [6]:
#                         [[1,0,0], [[1,0,0],   [[1,0,0],         [[1,0,0],  
# Takes in a state like [  [1,0,2],  [1,0,2], ,  [1,0,2], , ... ,  [1,0,2], ] and checks if all the pairs of 
#                          [2,0,0]]  [2,0,0]]    [2,0,0]]          [2,0,0]] 
# numbers in the first slice are neighbors and if so returns True else returns False

def isExecutableState(state):
    for row in range(len(state[0])):
        for col in range(len(state[0][0])):
            if state[0][row][col] > 0:
                if not state[0][row][col] in getNeighbors(state[0], row, col):
                    return False

    return True

In [7]:
# We use this once to get all the different swap combinations. I.e. all acceptable combinations of one to four
# swaps. This are the different actions we cound make in one timestep.

def getPossibleActions(maxSwapsPerTimeStep=math.floor(rows*cols/2)):
    state = np.arange(rows*cols).reshape((rows,cols))
    
    possibleActions = getPossibleActionsSub(state, [], maxSwapsPerTimeStep)
    
    possibleActions = set(map(lambda x: tuple(sorted(x)), possibleActions ))
    
    possibleActions = list(possibleActions)
    possibleActions.append((0, 0))
    
    return possibleActions
    
def getPossibleActionsSub(state, used, maxSwapsPerTimeStep):
    if maxSwapsPerTimeStep == 0:
        return np.asarray([])
    
    possibleActions = []
    
    for i in range(len(state)):
        for j in range(len(state[0])):
            
            usedtmp = used.copy()
            
            if not state[i][j] in usedtmp:
                neighbors = getNeighbors(state, i, j)
                for neighbor in neighbors:
                    if neighbor >= 0 and not (neighbor, state[i][j]) in possibleActions and not neighbor in usedtmp:
                        possibleActions.append((state[i][j], neighbor))
                        usedtmp.append(state[i][j])
                        usedtmp.append(neighbor)
 
                        for action in getPossibleActionsSub(state, usedtmp, maxSwapsPerTimeStep-1):
                            if type(action) == tuple:
                                possibleActions.append([(state[i][j], neighbor), action])
                            elif type(action) == list:
                                action.append((state[i][j], neighbor))
                                possibleActions.append(action)
                                
        
    return possibleActions

In [8]:
# Creates a shuffled Matrix simulatinga slice of quantum code with one to max amount 
# of operations per timestep

# Ex1. [[0, 1, 0],
#       [1, 2, 2],
#       [3, 0, 3]]

# Ex2. [[2, 1],
#       [2, 1]]

def makeStateSlice():
    random = np.random.choice([x for x in range(2, rows*cols+2) if x % 2])
    stateSlice = np.ceil(np.arange(1, random)/2)
    stateSlice = np.append(stateSlice, np.zeros(rows*cols-random+1))
    np.random.shuffle(stateSlice)
    return stateSlice.reshape((rows,cols))

In [9]:
# Makes a state out of depthOfCode amount of slices
def makeState():
    state = np.zeros((depthOfCode,rows,cols))
    for i in range(len(state)):
        state[i] = makeStateSlice()
    return state

# Tests some of the functions

In [10]:
state = np.zeros((depthOfCode,rows,cols))
for i in range(len(state)):
    state[i] = np.arange(rows*cols).reshape((rows,cols))

#print(state)

possibleActions = getPossibleActions()
print(len(possibleActions))
#print(possibleActions)

a = possibleActions[0]

print(a)

statetmp = np.arange(rows*cols).reshape((rows,cols))

swap(state, a)

print(state[0])

#for i in range(len(possibleActions)):
#output = set(map(lambda x: tuple(sorted(x)),possibleActions))

#print(len(output))

131
((2, 5), (7, 8))
[[0. 1. 5.]
 [3. 4. 2.]
 [6. 8. 7.]]


Test function makeState

In [11]:
print(makeState())

[[[0. 3. 1.]
  [2. 2. 0.]
  [0. 3. 1.]]

 [[1. 4. 0.]
  [2. 4. 3.]
  [1. 2. 3.]]

 [[1. 0. 2.]
  [3. 4. 4.]
  [2. 1. 3.]]

 [[0. 1. 1.]
  [0. 2. 0.]
  [3. 3. 2.]]

 [[3. 1. 4.]
  [2. 1. 4.]
  [2. 0. 3.]]

 [[2. 1. 0.]
  [0. 0. 3.]
  [1. 3. 2.]]

 [[4. 1. 3.]
  [2. 4. 0.]
  [2. 1. 3.]]

 [[3. 4. 4.]
  [3. 0. 2.]
  [2. 1. 1.]]

 [[4. 3. 2.]
  [1. 2. 4.]
  [0. 3. 1.]]

 [[3. 2. 2.]
  [4. 1. 1.]
  [4. 3. 0.]]]


# Enviotment definition and sub functions

In [12]:
#Our enviorment
class Kvant(Env):
    def __init__(self):
        #array of possible actions
        self.possibleActions = getPossibleActions()
        #self.possibleActions = getPossibleActions(1) #this for only 1 swap at a time
        
        #Number of actions we can take
        self.action_space = Discrete(len(self.possibleActions))
        
        #
        self.observation_space = Box(low=np.zeros((depthOfCode,rows,cols), dtype=int), high=np.zeros((depthOfCode,rows,cols), dtype=int)+4)
        
        #The start state
        self.state = makeState()
        
        #max amount of swaps between layers
        self.maxSwaps = 5
        
        #max amount of layers per episode
        self.maxLayers = 10
        
    def step(self, action):
        
        actions = self.possibleActions[action]

        swap(self.state, actions)
        
        self.maxSwaps -= 1
        
        if isExecutableState(self.state):
            # if the current slice is Exicutable then give the reward 5
            reward = 5
            
            # reset the maxSwaps to 5
            self.maxSwaps = 5
            
            # remove the exicutable slice and add a new random slice at the tail
            self.state = np.roll(self.state, -1, axis = 0)
            self.state[depthOfCode - 1] = makeStateSlice()
            
            self.maxLayers -= 1
            
            # we are not done except if this was the last layer we can work on this episode
            if self.maxLayers <= 0:
                done = True
            else:
                done = False
            
        elif self.maxSwaps <= 0:
            done = True
            reward = -1
        
        else:
            done = False
            reward = -1
            
        info = {}
        
        return self.state, reward, done, info
        
    def render(self):
        pass
    
    def reset(self):
        self.state = makeState()
        self.maxSwaps = 5
        self.maxLayers = 10
        return self.state

# Testing the enviorment

In [13]:
env = Kvant()

  logger.warn(


## test just random actions

In [14]:
episodes = 1000
scores = []
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    scores.append(score)

In [15]:
for i in range(min(scores), max(scores)+1):
    if scores.count(i) != 0:
        print('Number of', i, 'is', scores.count(i))

Number of -5 is 638
Number of -4 is 17
Number of -3 is 32
Number of -2 is 37
Number of -1 is 57
Number of 0 is 113
Number of 1 is 19
Number of 2 is 14
Number of 3 is 17
Number of 4 is 14
Number of 5 is 17
Number of 6 is 4
Number of 7 is 5
Number of 9 is 3
Number of 10 is 3
Number of 11 is 2
Number of 15 is 2
Number of 18 is 1
Number of 19 is 1
Number of 21 is 1
Number of 25 is 1
Number of 34 is 1
Number of 40 is 1
