In [1]:
import sympy
from sympy.combinatorics import Permutation, PermutationGroup
from sympy.combinatorics.named_groups import SymmetricGroup

import numpy as np
np.set_printoptions(threshold=np.inf)

import matplotlib.pyplot as plot

In [2]:
n = 10 #S_n
sigma = [Permutation()]


for i in range(int(n-1)):
    sigma.append(Permutation(i,i+1)) # Define adjacent transpositions 
    
G = PermutationGroup(sigma) 

In [3]:
def rand_onehot(gens, length): #Returns a random onehot matrix of length rows x gens columns 
    return np.eye(gens)[np.random.choice(gens, length)] # https://stackoverflow.com/questions/45093615/random-one-hot-matrix-in-numpy

In [4]:
def gen_onehot_tensor(gens, max_word_len, samples): #Returns a gens x max_word_len onehot matrix on each slice of last dim
    onehot_tensor = np.eye(gens)[np.random.choice(gens, max_word_len)]

    for i in range(samples-1):
        onehot_tensor = np.dstack((onehot_tensor,rand_onehot(gens,max_word_len)))
        
    return onehot_tensor

In [5]:
def perm_tensor_orders(onehot_tensor): #Returns a list of permutation orders corresponding to the samples of onehot_tensor
    permutation_orders = []
    for i in range((onehot_tensor.shape)[-1]):
        P = Permutation()
        E = onehot_tensor[:,:,i]
        
        for j in range(len(E)):
            for k in range(len(E[j])):
                if E[j][k] == 1:
                    P = P*sigma[k]
                    
        permutation_orders.append(P.order())
    return permutation_orders

In [13]:
max_word_len = 64
gens = len(sigma)
samples = 10000

onehot_tensor = gen_onehot_tensor(gens, max_word_len, samples)

onehot_matrix = onehot_tensor[:,:,1]

In [14]:
print(onehot_tensor.shape)

(64, 10, 10000)


In [15]:
perm_orders = perm_tensor_orders(onehot_tensor)

In [20]:
print(len(perm_orders))

10000


In [39]:
from tqdm import tqdm
import json
from pathlib import Path
def save_to_json(onehot_tensor, perm_orders, num_gens, max_word_len, filename, save_dir = None):
    data_dict = {"num_gens": gens, "max_word_len": max_word_len, "data": []}
    samples = len(perm_orders)
    if save_dir:
        Path(save_dir).mkdir(parents = True, exist_ok = True)
        filename = save_dir+filename
    for i in range(samples):
        inds = onehot_tensor[:,:,i].nonzero()[1].tolist()
        order = int(perm_orders[i])
        data_dict["data"].append((inds, order))
    with open(filename, "w") as f:
        json.dump(data_dict, f)

In [57]:
data_dict = {"num_gens": gens, "max_word_len": max_word_len, "data": []}
for i in tqdm(range(samples)):
    inds = onehot_tensor[:,:,i].nonzero()[1].tolist()
    order = int(perm_orders[i])
    data_dict["data"].append((inds, order))
with open("S10_data.json", "w") as f:
    json.dump(data_dict, f)

100%|██████████| 10000/10000 [00:00<00:00, 144349.94it/s]


In [44]:
print(onehot_to_npvec(onehot_tensor[:,:,0]))

[2 7 3 1 5 0 1 4 7 2 7 7 5 3 8 0 0 2 3 5 1 9 7 1 8 1 7 6 9 1 1 5 3 0 5 8 1
 5 4 6 3 4 0 4 7 5 0 0 0 1 0 1 5 1 3 5 1 2 7 0 4 1 6 1]


In [56]:
type(data_dict["data"][0][1])

int

In [29]:
test = onehot_tensor[:,:,1]
print(test)

[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0.

array([2, 8, 1, 1, 8, 8, 8, 6, 1, 3, 2, 3, 0, 7, 4, 1, 3, 9, 1, 3, 3, 5,
       1, 6, 5, 1, 4, 7, 2, 8, 9, 3, 3, 1, 1, 2, 2, 5, 8, 7, 8, 3, 2, 7,
       7, 1, 9, 0, 3, 5, 8, 6, 6, 5, 6, 4, 4, 1, 9, 6, 0, 2, 9, 9])

In [33]:
temp = onehot_tensor.nonzero()

In [38]:
temp[1]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [22]:
def onehot_tolist(onehot):
    gens_sequence = []
    for i in range(len(onehot)):
        gens_sequence.append(np.argwhere(onehot[i,:]==1).item())
    return gens_sequence
    
def onehot_to_npvec(onehot): 
    gens_sequence = []
    for i in range(len(onehot)):
        gens_sequence.append(np.argwhere(onehot[i,:]==1).item())
    return np.array(gens_sequence)

In [23]:
def onehot_tensor_to_matrix(onehot_tensor): # Returns a matrix of sample rows x max word length cols
    # ith row corresponds to ith onehot matrix. jth entry of ith row corresponds to the hot position in row j of the ith onehot matrix
    gens_mat = np.empty(((onehot_tensor.shape)[-1],(onehot_tensor.shape)[0]))
    for i in range((onehot_tensor.shape)[-1]):
        gens_mat[i,:] = onehot_to_npvec(onehot_tensor[:,:,i])
    return gens_mat
        

In [12]:
onehot_tensor_to_matrix(onehot_tensor)[2]

array([7., 6., 0., 9., 1., 1., 1., 5., 8., 3., 7., 3., 2., 8., 5., 9., 7.,
       8., 5., 7., 7., 4., 7., 1., 6., 7., 8., 3., 8., 0., 1., 4., 3., 2.,
       2., 3., 6., 4., 6., 2., 2., 1., 8., 1., 6., 4., 3., 6., 4., 5., 2.,
       1., 0., 1., 8., 1., 3., 1., 4., 7., 5., 7., 9., 2., 2., 3., 1., 0.,
       3., 8., 7., 9., 1., 7., 3., 4., 5., 8., 2., 0., 6., 4., 6., 9., 8.,
       4., 5., 1., 9., 4., 6., 4., 7., 5., 8., 3., 4., 9., 5., 9., 9., 9.,
       4., 7., 8., 9., 8., 8., 2., 6., 1., 2., 5., 0., 1., 2., 1., 6., 1.,
       5., 1., 6., 2., 1., 6., 2., 5., 6.])

In [30]:
print(onehot_to_npvec(onehot_tensor[:,:,1]))

[2 8 1 1 8 8 8 6 1 3 2 3 0 7 4 1 3 9 1 3 3 5 1 6 5 1 4 7 2 8 9 3 3 1 1 2 2
 5 8 7 8 3 2 7 7 1 9 0 3 5 8 6 6 5 6 4 4 1 9 6 0 2 9 9]
