In [82]:
import numpy as np
from collections import defaultdict
from pathlib import Path
import pickle

class Splitter:
    def __init__(self, k_splits:int):
        """
        Class to load and split into training and testing into k_splits folds.
        Can save state and add more data later.
        """
        
        # k_splits: how many folds are created
        self.k_splits = k_splits
        
        # dictionary of lists for each split
        # e.g. {0: [a,d,e], 1:[b,c,f]}
        self.splits = defaultdict(list)
        
        # keep track how many items are in each split
        self.split_lengths = np.zeros(k_splits)
    
    def add(self, item):
        # reduce probability for splits with more data
        distribution = ((sum(self.split_lengths) + 1) / (self.split_lengths + 1)) / self.k_splits
        probabilities = distribution / sum(distribution)
        
        # chose a split depending on the probabilities
        split = np.random.choice(self.k_splits, 1, p= probabilities)[0]
        
        # add item to the split
        self.splits[split].append(item)
        
        # update the split_lengths
        self.split_lengths[split] += 1
        
    def save(self, path):
        # save state to path
        with open(path, 'wb') as handle:
            pickle.dump(self.__dict__, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    def load(self, path):
        with open(path, 'rb') as handle:
            loaded_state = pickle.load(handle)
            
        self.__dict__.update(loaded_state)

In [83]:
path = "E:\\python\\work\\fafnir_data\\"
splitter = Splitter(10)

p = Path(path)
file_list = [_ for _ in p.glob('*.pkl')]

for file in file_list:
    splitter.add(file)

splitter.splits

defaultdict(list,
            {1: [WindowsPath('E:/python/work/fafnir_data/100.pkl'),
              WindowsPath('E:/python/work/fafnir_data/35.pkl'),
              WindowsPath('E:/python/work/fafnir_data/39.pkl'),
              WindowsPath('E:/python/work/fafnir_data/70.pkl'),
              WindowsPath('E:/python/work/fafnir_data/83.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_11.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_21.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_24.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_26.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_35.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_4.pkl')],
             3: [WindowsPath('E:/python/work/fafnir_data/101.pkl'),
              WindowsPath('E:/python/work/fafnir_data/33.pkl'),
              WindowsPath('E:/python/work/fafnir_data/45.pkl'),
              WindowsPath('E:/python/work/fafnir_data/

In [84]:
splitter.save("splitstate.pkl")

In [85]:
splitter = Splitter(0)
splitter.load("splitstate.pkl")

In [86]:
splitter.splits

defaultdict(list,
            {1: [WindowsPath('E:/python/work/fafnir_data/100.pkl'),
              WindowsPath('E:/python/work/fafnir_data/35.pkl'),
              WindowsPath('E:/python/work/fafnir_data/39.pkl'),
              WindowsPath('E:/python/work/fafnir_data/70.pkl'),
              WindowsPath('E:/python/work/fafnir_data/83.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_11.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_21.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_24.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_26.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_35.pkl'),
              WindowsPath('E:/python/work/fafnir_data/Pat_4.pkl')],
             3: [WindowsPath('E:/python/work/fafnir_data/101.pkl'),
              WindowsPath('E:/python/work/fafnir_data/33.pkl'),
              WindowsPath('E:/python/work/fafnir_data/45.pkl'),
              WindowsPath('E:/python/work/fafnir_data/

In [92]:
dictionary = {0:{5:{7:{8:{3:{0}}}}}}

np.array(dictionary)

array({0: {5: {7: {8: {3: {0}}}}}}, dtype=object)

In [110]:
np.ix_(0, 10)

ValueError: Cross index must be 1 dimensional

In [187]:
from collections import defaultdict
from copy import deepcopy


class DynamicArray:
    def __init__(self, dimensionality=5):
        self.dimensionality = dimensionality
        
        self.dimensions = {}
                
        for dimension in range(dimensionality):
            self.dimensions[dimension] = 1
            
        # dimensions 0 and 1 are constant
        self.dimensions[0] = 16
        self.dimensions[1] = 8
        
        self.data = np.zeros(list(self.dimensions.values()), dtype=float)
        
    def add_data(self, data, position):
        old_dimensions = deepcopy(self.dimensions)
        
        # first check if our matrix is big enough
        for index, min_dimension_size in enumerate(position):
            # if the previous size is smaller than the new size, increase the dimensions size
            # this ignores the first two dimensions
            if min_dimension_size+1 > self.dimensions[index+2]:
                self.dimensions[index+2] = min_dimension_size +1
        
        # check if we have to change the data size
        if old_dimensions != self.dimensions:
            new_data = np.zeros(list(self.dimensions.values()), dtype=float)
            
            ind=tuple([slice(0,old_len) for old_len in old_dimensions.values()])
            
            new_data[ind] = self.data
            self.data = new_data

        self.data[:, :, position[0], position[1], position[2]] = data.reshape(16,8)

In [190]:
pos_1 = (1, 0, 3)
dat_1 = np.ones((size_a*size_b,), dtype=float) * 1.1

pos_2 = (0, 0, 1)
dat_2 = np.ones((size_a*size_b,), dtype=float) * 2.2

In [212]:
dd = DynamicDictionary(5)

time_a = time()
dd.add_data(dat_1, pos_1)

time_b = time()
for i in range(1000):
    dd.add_data(dat_2, pos_2)
time_c = time()

print(time_b - time_a, time_c - time_b)

0.0 0.01199960708618164


In [197]:
dd.data.shape

(16, 8, 2, 1, 4)

In [213]:
import numpy as np
from time import time

def add_data(dat, pos):
    dat = np.reshape(dat, (size_a, size_b))         # reshape input data to correct shape
    arr_size = np.array(arr.shape)[2:]              # get current target array size, skip first two dims since they do not change
    pos_size = np.array(pos) + 1                    # new size is (insert_position + 1)
    new_size = np.vstack((arr_size, pos_size))
    new_size = np.amax(new_size, axis=0)            # new size is max of current size and new size determined by insert position
    arr.resize(size_a, size_b, new_size[0], new_size[1], new_size[2])
    arr[:, :, pos[0], pos[1], pos[2]] = dat         # add data to resized target array

size_a = 16                                       # first two dimensions of target array do not change
size_b = 8
arr = np.ndarray(shape=(1,1,1,1,1), dtype=float)  # create empty array with correct number of dimensions

time_a = time()

add_data(dat_1, pos_1)
time_b = time()

for i in range(1000):
    add_data(dat_2, pos_2)
time_c = time()

print(time_b - time_a, time_c - time_b)

print(f"array shape: {arr.shape}")
print(arr)


0.0 0.02100086212158203
array shape: (16, 8, 2, 1, 4)
[[[[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  ...


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]]



 [[[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  ...


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]]



 [[[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  ...


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]


  [[[0.  2.2 0.  0. ]]

   [[0.  0.  0.  1.1]]]]



 ...



 [[[[0. 

In [210]:
arr[:,:,0,0,0]

array([[1.1, 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]])

In [211]:
dd.data[:,:,0,0,0]

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])