In [1]:
import numpy as np
import h5py
import random
random.seed(42)

In [2]:
def removeBrokenCells(cell_noiseSigma, neighbor):
    broken_cells = getBrokenCells(cell_noiseSigma)
    return getNeighborPairs(broken_cells, neighbor)

In [3]:
def getBrokenCells(cell_noiseSigma):
    broken_cell_indices = np.argwhere(cell_noiseSigma[0] == 0)
    broken_cells = []
    for arrays in broken_cell_indices:
        for index in broken_cell_indices:
            broken_cells.append(index)
    return broken_cells

In [4]:
def getNeighborPairs(broken_cells, neighbor):
    neighbor_pairs_set = []
    for i in range(len(neighbor)):
        if i in broken_cells:
            continue
        for cell in neighbor[i]:
            if cell in broken_cells:
                continue
            neighbor_pairs_set.append(((i, cell)))
    return neighbor_pairs_set

In [5]:
def writeH5File(fileName, datasetName, data):
    with h5py.File(fileName, "w") as f:
        dset = f.create_dataset(datasetName, data = data)

In [6]:
def readH5File(fileName, datasetName):
    file = h5py.File(fileName, "r")
    data = file.get(datasetName)[:]
    file.close()
    return np.array(data)

In [7]:
def sampleDataTraining(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster):
    true_sample_size, bkg_sample_size = getTrainingSampleSizes(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster)
    return sampleData(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster, true_sample_size, bkg_sample_size, bkg_sample_size, bkg_sample_size)
    

In [8]:
def sampleDataTesting(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster):
    true_sample_size = getTestingSampleSize(true)
    bkg_lone_sample_size = getTestingSampleSize(bkg_lone)
    bkg_cluster_lone_sample_size = getTestingSampleSize(bkg_cluster_lone)
    bkg_cluster_cluster_sample_size = getTestingSampleSize(bkg_cluster_cluster)
    return sampleData(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster, true_sample_size, bkg_lone_sample_size, bkg_cluster_lone_sample_size, bkg_cluster_cluster_sample_size)

In [9]:
def getTestingSampleSize(data):
    minimum = getMinimum(data)
    sample_size = minimum - (minimum % 100)
    return sample_size

In [10]:
def getMinimum(data):
    return min([len(row) for row in data])

In [11]:
def getTrainingSampleSizes(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster):
    true_min = getMinimum(true)
    bkg_lone_min = getMinimum(bkg_lone)
    bkg_cluster_lone_min = getMinimum(bkg_cluster_lone)
    bkg_cluster_cluster_min = getMinimum(bkg_cluster_cluster)
    bkg_min = min([bkg_lone_min, bkg_cluster_lone_min, bkg_cluster_cluster_min])
    bkg_sample_size = bkg_min - (bkg_min % 100)
    true_sample_size = bkg_sample_size*3
    if true_sample_size > true_min:
        true_sample_size = true_min - (true_min % 100)
        true_sample_size = true_sample_size - (true_sample_size % 3)
        bkg_sample_size = true_sample_size/3
    return true_sample_size, bkg_sample_size

In [12]:
def sampleData(true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster, true_sample_size, bkg_lone_sample_size, bkg_cluster_lone_sample_size, bkg_cluster_cluster_sample_size):
    true_sample = sampleDataset(true, true_sample_size)
    bkg_lone_sample = sampleDataset(bkg_lone, bkg_lone_sample_size)
    bkg_cluster_lone_sample = sampleDataset(bkg_cluster_lone, bkg_cluster_lone_sample_size)
    bkg_cluster_cluster_sample = sampleDataset(bkg_cluster_cluster, bkg_cluster_cluster_sample_size)
    return np.array(true_sample), np.array(bkg_lone_sample), np.array(bkg_cluster_lone_sample), np.array(bkg_cluster_cluster_sample)

In [13]:
def sampleDataset(data, data_sample_size):
    return [random.sample(row, data_sample_size) for row in data]

In [14]:
def createRandomIndices(total_indices_shape):
    rand_index = []
    for i in range(total_indices_shape[0]):
        arr = np.arange(total_indices_shape[1])
        np.random.shuffle(arr)
        rand_index.append(arr)
    return np.array(rand_index)

In [15]:
def randomize2DArray(rand_indices, unrandomized_array):
    randomized_list = []
    for i in range(unrandomized_array.shape[0]):
        randomized_list.append(unrandomized_array[i][rand_indices[i]])
    return np.array(randomized_list)

In [16]:
def randomizeEdges(rand_indices, unrandomized_edges):
    randomized_list = []
    for i in range(rand_indices.shape[0]):
        randomized_list.append(unrandomized_edges[rand_indices[i]])
    return np.array(randomized_list)

In [17]:
def createEdgeArrays(inputData):
    source_BD = []
    dest_BD = []
    source_noBD = []
    dest_noBD = []
    for i in range(inputData.shape[0]):
        source_BD_element, dest_BD_element, source_noBD_element, dest_noBD_element = createBDAndNoBDArrays(inputData[i])

        source_BD.append(source_BD_element)
        dest_BD.append(dest_BD_element)
        source_noBD.append(source_noBD_element)
        dest_noBD.append(dest_noBD_element)
    return np.array(source_BD), np.array(dest_BD), np.array(source_noBD), np.array(dest_noBD)

In [18]:
def createBDAndNoBDArrays(inputData):
    source_BD = []
    dest_BD = []
    source_noBD = []
    dest_noBD = []

    for pair in inputData:

        source_BD.append(pair[0])
        source_BD.append(pair[1])
        
        dest_BD.append(pair[1])
        dest_BD.append(pair[0])

        source_noBD.append(pair[0])
        dest_noBD.append(pair[1])
        
    return source_BD, dest_BD, source_noBD, dest_noBD