In [None]:
import random
import numpy as np

In [76]:
# An example with a set of "fat" columns accompanied by three different sets of "lean" columns.
# The latter are distinguished by their number-of-degrees-of-freedom
# (i.e. simply the number of distinct categories encoded as integers that can be found in those columns).

total_col_count = 1245
total_row_count = 100

# number of fat columns
fat_signif_count = 222
fat_nonsignif_count = 542
fat_contrast_count = 200
fat_dof = 5
fat_needs_dicretization = True

# number of lean columns A
lean_A_signif_count = 13
lean_A_nonsignif_count = 42
lean_A_contrast_count = 124
lean_A_dof = 6 
lean_A_needs_dicretization = False

# number of lean columns B
lean_B_signif_count = 12
lean_B_nonsignif_count = 34
lean_B_contrast_count = 0
lean_B_dof = 7
lean_B_needs_dicretization = False

# number of lean columns C
lean_C_signif_count = 11
lean_C_nonsignif_count = 0 
lean_C_contrast_count = 45
lean_C_dof = 8
lean_C_needs_dicretization = False

In [77]:
assert fat_signif_count + fat_nonsignif_count + fat_contrast_count +\
lean_A_signif_count + lean_A_nonsignif_count + lean_A_contrast_count +\
lean_B_signif_count + lean_B_nonsignif_count + lean_B_contrast_count +\
lean_C_signif_count + lean_C_nonsignif_count + lean_C_contrast_count \
== total_col_count

In [89]:
# The assumed order of the regions in the data-table is the following:
# signif-fat, signif-lean, nonsignif-fat, nonsignif-lean, contrast-fat, contrast-lean.
# Hence, it is useful to define,

region_column_counts = \
[fat_signif_count,
 lean_A_signif_count,
 lean_B_signif_count,
 lean_C_signif_count,
 fat_nonsignif_count,
 lean_A_nonsignif_count,
 lean_B_nonsignif_count,
 lean_C_nonsignif_count,
 fat_contrast_count,
 lean_A_contrast_count,
 lean_B_contrast_count,
 lean_C_contrast_count
]

region_discretizations = \
[fat_needs_dicretization,
 lean_A_needs_dicretization,
 lean_B_needs_dicretization,
 lean_C_needs_dicretization
] * 3

region_dofs = \
[fat_dof,
 lean_A_dof,
 lean_B_dof,
 lean_C_dof    
] * 3

region_bounds = np.cumsum(region_column_counts).tolist()
region_bounds

[222, 235, 247, 258, 800, 842, 876, 876, 1076, 1200, 1200, 1245]

In [92]:
# So the subsequent regions in the hypothetical data can be characterized as follows:
# by lower (left) and upper (right) column indeces which form
# the boundary (left-closed, right-open) of each region, number
# of dof in that region, and whether or not columns in that region
# need to be discretized. If the answer to the latter question is
# positive the region will also need to be further split into smaller
# subregions that correspond to the notion of tiles in downstream computation.

for region_bound_1, region_bound_2, region_dof, region_dicretization\
in zip([0] + region_bounds[:-1], region_bounds, region_dofs, region_discretizations):
    print(region_bound_1, region_bound_2, region_dof, region_dicretization)

0 222 5 True
222 235 6 False
235 247 7 False
247 258 8 False
258 800 5 True
800 842 6 False
842 876 7 False
876 876 8 False
876 1076 5 True
1076 1200 6 False
1200 1200 7 False
1200 1245 8 False


In [None]:
fat_tile_width = 100

tile_bounds = [0]
tile_group_bounds = [0]
tile_discretizations = []
tile_dofs = []
in_lean_tile_group = False

for region_bound_1, region_bound_2, region_dof, region_dicretization\
in zip([0] + region_bounds[:-1], region_bounds, region_dofs, region_discretizations):
    
    if region_dicretization:
        if in_lean_tile_group:
            
        number_of_tiles = (region_bound_2 - region_bound_1) // fat_tile_width
        for _ in range(number_of_tiles):
            tile__bounds.append(tile_bounds[-1] + fat_tile_width)
            tile_discretizations.append(True)
            tile_dofs.append(region_dof)
        tile_group_bounds.append(tile_group_bounds[-1] + number_of_tiles - 1)
        width_of_leftover_margin = (tile_bound_2 - tile_bound_1) % fat_tile_width
        if width_of_leftover_margin:
            tile__bounds.append(tile_bounds[-1] + width_of_leftover_margin)
            tile_discretizations.append(True)
            tile_dofs.append(region_dof)
    else:
        if in_fat_tile_group:
            
        

In [68]:
for tilediscretization_flags

[True,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 False]

In [59]:
arr = np.zeros((total_row_count, total_col_count))

In [None]:
for col_indx in range

In [41]:
total_col_count = 1245
total_row_count = 10

tile_bounds = \
[0,
 100,
 200,
 222,
 235,
 247,
 258,
 358,
 458,
 558,
 658,
 758,
 800,
 842,
 876,
 876,
 976,
 1076,
 1200,
 1200,
 1245
]

needs_discretization = \
[True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False
]

region_bounds = \
[0, 2, 6, 11, 15, 17, 20]

dofs =\
[5,
 5,
 5,
 6,
 7,
 8,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 7,
 8,
 5,
 5,
 6,
 7,
 8]

In [42]:
len(tile_bounds)

21

In [43]:
len(dofs)

20

In [16]:
arr = np.zeros((total_row_count, total_col_count))

In [31]:
for tile_index, (tile_bound_a, tile_bound_b) in enumerate(zip(tile_bounds[:-1], tile_bounds[1:])):
    dof = dofs[tile_index]
    #print(tile_bound_a, tile_bound_b, dof)
    for column_index in range(tile_bound_a, tile_bound_b):
        for row_index in range(total_row_count):
            arr[row_index, column_index] = random.randint(1, dof)

In [32]:
arr

array([[1., 1., 4., ..., 8., 6., 5.],
       [2., 2., 4., ..., 6., 3., 8.],
       [1., 2., 5., ..., 3., 7., 3.],
       ...,
       [1., 5., 2., ..., 3., 4., 5.],
       [4., 3., 1., ..., 4., 5., 3.],
       [5., 4., 3., ..., 2., 3., 6.]])

In [23]:
for region_index, (region_bound_a, region_bound_b) in enumerate(zip(region_bounds[:-1], region_bounds[1:])):
    column_a = tile_bounds[region_bound_a]
    column_b = tile_bounds[region_bound_b]
    dof = dofs[region_index]
    print(column_a, column_b, dof)
    for column_index in range(column_a, column_b):
        for row_index in range(total_row_count):
            arr[row_index, column_index] = random.randint(1, dof)

0 200 5
200 258 5
258 758 5
758 876 5
876 1076 6
1076 1245 7


In [12]:
random.randint(1, 5)

5