## Proof of Concept
- method for rolling up multiple "solutions" into the "best" solution

In [1]:
%config IPCompleter.greedy=True

In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
import numpy as np

print('numpy:', np.__version__)

numpy: 1.16.3


In [4]:
rows = [
    ['01', '00', '01', '11'],
    ['00', '00', '01', '01', '11'],
    ['01', '00', '01', '11'],
]

print(rows)

[['01', '00', '01', '11'], ['00', '00', '01', '01', '11'], ['01', '00', '01', '11']]


In [5]:
def normalize(rows, gene_length):
    col_lengths = list(map(lambda r: len(r), rows))
    max_cols = np.max(col_lengths)
    min_cols = np.min(col_lengths)

    if max_cols == min_cols:
        return rows
    
    for row in rows:
        while len(row) < max_cols:
            row.insert(0, '_' * gene_length)

normalize(rows, 2)

print(rows)

[['__', '01', '00', '01', '11'], ['00', '00', '01', '01', '11'], ['__', '01', '00', '01', '11']]


In [6]:
def generate_best_estimate(rows):
    best_estimate = []
    
    possible_solutions = {}
    
    n_rows, n_cols = np.array(rows).shape
    for c in range(0, n_cols):

        possible_solutions_for_column = {}
        for key in [ rows[r][c] for r in range(n_rows) ]:
            possible_solutions_for_column[key] = possible_solutions_for_column.get(key, 0) + 1

        
        sorted_solutions = sorted(
            possible_solutions_for_column.items(),
            key = lambda x: x[1],
            reverse = True
        )

        best = sorted_solutions[0][0]
        best_estimate.append(best)
    
    return best_estimate
        
print(generate_best_estimate(rows))

['__', '01', '00', '01', '11']
