In [1]:
###TODO NB! There are no methods, all should be referred to as functions!!!!!

In [2]:
import pandas as pd
from tqdm import tqdm
import ast
import itertools
import operator

In [3]:
### Functions for getting data from files

## Get raw data from file
def get_data_from_file(file_name):
    file = open(file_name)
    content = file.read()
    lines = content.split('\n')
    file.close()
    
    return lines

## Get trajectories (default file name is trajectories.csv)
def get_trajectories(file_name = 'trajectories.csv'):
    trajectories = dict()
    lines = get_data_from_file(file_name)
    for (index, line) in enumerate(lines):
        cols = line.split('\t')
        if index == 0 or len(cols) <= 1: #Ignore title row and empty rows
            continue
        trajectories[cols[0]] = cols[1:]
    
    return trajectories        
        
## Get sections (default file name is diagnoses.csv)
def get_sections(file_name = 'diagnoses.csv'):
    person_sections = dict()
    lines = get_data_from_file(file_name)
    for (index, line) in enumerate(lines):
        cols = line.split('\t')
        if index == 0 or len(cols) <= 1: #Ignore title row and empty rows
            continue
        sections = ast.literal_eval(cols[-2])
        person_sections[cols[0]] = sections
        
    return person_sections

In [4]:
### Create containers for data

sections = get_sections()
trajectories = get_trajectories()

In [5]:
### Helper methods for testing comparing methods

## Return n number of best matching trajectory pairs 
## (if n is bigger than the length of trajectories, than all trajectories will be returned)
def helper_get_n_similar_trajectories(data, compare_function, n=100, name='funtion', **kwargs):
    pairs = list(itertools.combinations(data, 2)) #Get all possible pairs
    results = {}
    percent = 0
    print('Started working on: %s' % (name))
    for pair in tqdm(pairs):
        results[pair] = compare_function(data[pair[0]], data[pair[1]], **kwargs)
        
    return dict(sorted(results.items(), key=operator.itemgetter(1), reverse=True)[:n])

In [6]:
### Function for geting the number of matches between two trajectories

def get_overall_similartity(t1, t2):
    score = 0
    for t in t1:
        if t in t2:
            score += 1
    
    return score

In [7]:
### Methods for comparing trajectories

## Import all algo implementations
from local_alignment import get_local_alignment
from local_stretch_alignment import get_local_stretch_alignment

In [8]:
### Test functions

n = 100 #Sample number

def local_alignment_template(name, match, mismatch, gap_penalty):
    return helper_get_n_similar_trajectories(
        sections, get_local_alignment, n, name, match=match, mismatch=mismatch, gap_penalty=gap_penalty)

control_set = helper_get_n_similar_trajectories(trajectories, get_overall_similartity, n, 'control_set')
get_local_alignment_test = local_alignment_template('get_local_alignment_test', 1, 0, 0)
get_local_alignment_test_1 = local_alignment_template('get_local_alignment_test_1', 1, 0, 0)
get_local_alignment_test_2 = local_alignment_template('get_local_alignment_test_2', 1, -1, 0)
get_local_alignment_test_3 = local_alignment_template('get_local_alignment_test_3', 4, -2, -1)
get_local_alignment_test_4 = local_alignment_template('get_local_alignment_test_4', 4, -1, -2)
get_local_alignment_test_5 = local_alignment_template('get_local_alignment_test_5', 1, -1, -1)

#get_local_stretch_alignment_test = helper_get_n_similar_trajectories(sections, get_local_stretch_alignment, n, 'get_local_stretch_alignment_test')

Started working on: control_set


100%|██████████| 5565/5565 [00:00<00:00, 1688462.22it/s]


Started working on: get_local_alignment_test


100%|██████████| 44850/44850 [03:38<00:00, 205.53it/s]


Started working on: get_local_alignment_test_1


100%|██████████| 44850/44850 [03:39<00:00, 204.71it/s]


Started working on: get_local_alignment_test_2


100%|██████████| 44850/44850 [23:58<00:00, 31.18it/s]   


Started working on: get_local_alignment_test_3


100%|██████████| 44850/44850 [05:58<00:00, 124.95it/s] 


Started working on: get_local_alignment_test_4


100%|██████████| 44850/44850 [03:44<00:00, 200.21it/s]


Started working on: get_local_alignment_test_5


100%|██████████| 44850/44850 [03:42<00:00, 201.48it/s]


In [9]:
### Find out how many inside treshold n are the same

def compare_pairs(pair1, pair2):
    return pair1[0] == pair2[0] and pair1[1] == pair2[1] or pair1[0] == pair2[1] and pair1[1] == pair2[0]

results_for = {
    'get_local_alignment_test': 0,
    'get_local_alignment_test_1': 0,
    'get_local_alignment_test_2': 0,
    'get_local_alignment_test_3': 0,
    'get_local_alignment_test_4': 0,
    'get_local_alignment_test_5': 0,
    #'get_local_stretch_alignment_test': 0
}

for res in control_set:
    for fun in results_for:
        elements = list(globals()[fun].keys())
        for elem in elements:
            if compare_pairs(res, elem):
                results_for[fun] += 1
                
results_for


{'get_local_alignment_test': 8,
 'get_local_alignment_test_1': 8,
 'get_local_alignment_test_2': 8,
 'get_local_alignment_test_3': 2,
 'get_local_alignment_test_4': 8,
 'get_local_alignment_test_5': 7}