## This notebook shows how to generate data for the RSK dataset with Sage.

Note that although the variables and files are called "output_tableau_pairs" and "input_permutations", we ultimately used the tableau pairs as input and permutations as output. It would also be interesting to train a model to predict the tableau pairs from the permutations. 

In [1]:
import numpy as np
import random
from sage.all import Permutations, RSK
import math

N = 8
input_permutations = list(Permutations(N))

In [2]:
len(input_permutations)

40320

In [3]:
output_tableau_pairs = []
for w in input_permutations:
    output_tableau_pairs.append( str(RSK(w) ) )

In [4]:
np.random.seed(32)
split = 0.8
ds_size = int(len(input_permutations))

random_idx = list(range(len(input_permutations)))
np.random.shuffle(random_idx)

In [5]:
input_permutations_train = np.array(input_permutations)[random_idx][:math.ceil(ds_size*split)]
input_permutations_test = np.array(input_permutations)[random_idx][math.ceil(ds_size*split):]

In [6]:
output_tableau_pairs_train = np.array(output_tableau_pairs, dtype = str)[random_idx][:math.ceil(ds_size*split)]
output_tableau_pairs_test = np.array(output_tableau_pairs, dtype = str)[random_idx][math.ceil(ds_size*split):]

In [7]:
np.savetxt(f"./output_tableau_pairs_{N}_train.csv", output_tableau_pairs_train, delimiter = ",", fmt = "%s")
np.savetxt(f"./output_tableau_pairs_{N}_test.csv", output_tableau_pairs_test, delimiter = ",", fmt = "%s")

In [8]:
np.savetxt(f"./input_permutations_{N}_train.csv", input_permutations_train, delimiter = ",", fmt = "%d")
np.savetxt(f"./input_permutations_{N}_test.csv", input_permutations_test, delimiter = ",", fmt = "%d")