In [1]:
import platform

print('You are using the recommended version for this script!' 
      if platform.python_version() == '3.9.6' else 
      'You are not using the recommended version for this script!')

You are using the recommended version for this script!


In [2]:
###TODO NB! There are no methods, all should be referred to as functions!!!!!

In [3]:
import pandas as pd
from tqdm import tqdm
import ast
import itertools
import operator

In [4]:
### Functions for getting data from files

## Get raw data from file
def get_data_from_file(file_name):
    file = open(file_name)
    content = file.read()
    lines = content.split('\n')
    file.close()
    
    return lines

## Get trajectories (default file name is trajectories.csv)
def get_trajectories(file_name = 'trajectories.csv'):
    trajectories = dict()
    lines = get_data_from_file(file_name)
    for (index, line) in enumerate(lines):
        cols = line.split('\t')
        if index == 0 or len(cols) <= 1: #Ignore title row and empty rows
            continue
        trajectories[cols[0]] = cols[1:]
    
    return trajectories        
        
## Get sections (default file name is diagnoses.csv)
def get_sections(file_name = 'diagnoses.csv'):
    person_sections = dict()
    lines = get_data_from_file(file_name)
    for (index, line) in enumerate(lines):
        cols = line.split('\t')
        if index == 0 or len(cols) <= 1: #Ignore title row and empty rows
            continue
        sections = ast.literal_eval(cols[-2])
        person_sections[cols[0]] = sections
        
    return person_sections

In [5]:
### Create containers for data

# sections = get_sections()
# trajectories = get_trajectories()

sections = get_sections('diagnoses_1.csv')
trajectories = get_trajectories('trajectories_1.csv')

In [6]:
### Helper methods for testing comparing methods

## Return n number of best matching trajectory pairs 
## (if n is bigger than the length of trajectories, than all trajectories will be returned)
def helper_get_n_similar_trajectories(data, compare_function, n=100, name='funtion', **kwargs):
    pairs = list(itertools.combinations(data, 2)) #Get all possible pairs
    results = {}
    print('Started working on: %s' % (name))
    for pair in tqdm(pairs):
        results[pair] = compare_function(data[pair[0]], data[pair[1]], **kwargs)
        
    return dict(sorted(results.items(), key=operator.itemgetter(1), reverse=True)[:n])

In [7]:
### Function for geting the number of matches between two trajectories

def get_overall_similartity(t1, t2):
    score = 0
    for t in t1:
        if t in t2:
            score += 1
    
    return score

In [8]:
### Methods for comparing trajectories

## Import all algo implementations
from local_alignment import get_local_alignment
from local_stretch_alignment import get_local_stretch_alignment
from model_alignment import get_model_alignment

In [9]:
results = {}

In [10]:
### Test functions

n = 100 #Sample number

def alignment_template(fun, name, match=None, mismatch=None, gap_penalty=None):
    results[name] = helper_get_n_similar_trajectories(
        sections, fun, n, name, match=match, mismatch=mismatch, gap_penalty=gap_penalty)

control_set = helper_get_n_similar_trajectories(trajectories, get_overall_similartity, n, 'control_set')
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_0_0', 1, 0, 0)
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_-1_-1', 1, -1, -1)
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_-1_0', 1, -1, 0)
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_0_-1', 1, 0, -1)
# results['get_model_alignment1'] = get_model_alignment(sections, n, 100, 100)
# results['get_model_alignment2'] = get_model_alignment(sections, n, 50, 50)
# results['get_model_alignment3'] = get_model_alignment(sections, n, 20, 20)
# results['get_model_alignment4'] = get_model_alignment(sections, n, 11, 100)
for j in range(40):
    name = 'get_model_alignment_' + str(j)
    results[name] = get_model_alignment(sections, threshold=j, max_length=100, order_reverse=True, n=n, name=name)



#get_local_stretch_alignment_test = helper_get_n_similar_trajectories(sections, get_local_stretch_alignment, n, 'get_local_stretch_alignment_test')

Started working on: control_set


100%|██████████| 5565/5565 [00:00<00:00, 1694099.42it/s]


Started working on: get_model_alignment_0


100%|██████████| 300/300 [00:01<00:00, 242.16it/s]


105
Started working on: get_model_alignment_1


100%|██████████| 300/300 [00:01<00:00, 244.44it/s]


105
Started working on: get_model_alignment_2


100%|██████████| 300/300 [00:01<00:00, 244.77it/s]


105
Started working on: get_model_alignment_3


100%|██████████| 300/300 [00:01<00:00, 246.62it/s]


105
Started working on: get_model_alignment_4


100%|██████████| 300/300 [00:01<00:00, 247.86it/s]


105
Started working on: get_model_alignment_5


100%|██████████| 300/300 [00:01<00:00, 247.31it/s]


105
Started working on: get_model_alignment_6


100%|██████████| 300/300 [00:01<00:00, 247.87it/s]


105
Started working on: get_model_alignment_7


100%|██████████| 300/300 [00:01<00:00, 248.39it/s]


105
Started working on: get_model_alignment_8


100%|██████████| 300/300 [00:01<00:00, 247.70it/s]


105
Started working on: get_model_alignment_9


100%|██████████| 300/300 [00:01<00:00, 247.05it/s]


105
Started working on: get_model_alignment_10


100%|██████████| 300/300 [00:01<00:00, 246.31it/s]


105
Started working on: get_model_alignment_11


100%|██████████| 300/300 [00:01<00:00, 247.00it/s]


105
Started working on: get_model_alignment_12


100%|██████████| 300/300 [00:01<00:00, 247.12it/s]


105
Started working on: get_model_alignment_13


100%|██████████| 300/300 [00:01<00:00, 247.41it/s]


105
Started working on: get_model_alignment_14


100%|██████████| 300/300 [00:01<00:00, 247.22it/s]


105
Started working on: get_model_alignment_15


100%|██████████| 300/300 [00:01<00:00, 246.07it/s]


105
Started working on: get_model_alignment_16


100%|██████████| 300/300 [00:01<00:00, 244.19it/s]


105
Started working on: get_model_alignment_17


100%|██████████| 300/300 [00:01<00:00, 246.08it/s]


105
Started working on: get_model_alignment_18


100%|██████████| 300/300 [00:01<00:00, 241.08it/s]


105
Started working on: get_model_alignment_19


100%|██████████| 300/300 [00:01<00:00, 244.71it/s]


105
Started working on: get_model_alignment_20


100%|██████████| 300/300 [00:01<00:00, 245.68it/s]


105
Started working on: get_model_alignment_21


100%|██████████| 300/300 [00:01<00:00, 245.48it/s]


105
Started working on: get_model_alignment_22


100%|██████████| 300/300 [00:01<00:00, 245.35it/s]


105
Started working on: get_model_alignment_23


100%|██████████| 300/300 [00:01<00:00, 245.53it/s]


105
Started working on: get_model_alignment_24


100%|██████████| 300/300 [00:01<00:00, 212.38it/s]


105
Started working on: get_model_alignment_25


100%|██████████| 300/300 [00:01<00:00, 240.54it/s]


105
Started working on: get_model_alignment_26


100%|██████████| 300/300 [00:01<00:00, 246.58it/s]


105
Started working on: get_model_alignment_27


100%|██████████| 300/300 [00:01<00:00, 246.45it/s]


105
Started working on: get_model_alignment_28


100%|██████████| 300/300 [00:01<00:00, 244.94it/s]


105
Started working on: get_model_alignment_29


100%|██████████| 300/300 [00:01<00:00, 245.46it/s]


105
Started working on: get_model_alignment_30


100%|██████████| 300/300 [00:01<00:00, 245.62it/s]


105
Started working on: get_model_alignment_31


100%|██████████| 300/300 [00:01<00:00, 244.79it/s]


105
Started working on: get_model_alignment_32


100%|██████████| 300/300 [00:01<00:00, 246.13it/s]


105
Started working on: get_model_alignment_33


100%|██████████| 300/300 [00:01<00:00, 230.55it/s]


105
Started working on: get_model_alignment_34


100%|██████████| 300/300 [00:01<00:00, 244.10it/s]


105
Started working on: get_model_alignment_35


100%|██████████| 300/300 [00:01<00:00, 233.05it/s]


105
Started working on: get_model_alignment_36


100%|██████████| 300/300 [00:01<00:00, 246.77it/s]


105
Started working on: get_model_alignment_37


100%|██████████| 300/300 [00:01<00:00, 245.88it/s]


105
Started working on: get_model_alignment_38


100%|██████████| 300/300 [00:01<00:00, 247.09it/s]


105
Started working on: get_model_alignment_39


100%|██████████| 300/300 [00:01<00:00, 246.80it/s]

105





In [11]:
### Find out how many inside treshold n are the same

def compare_pairs(pair1, pair2):
    return pair1[0] == pair2[0] and pair1[1] == pair2[1] or pair1[0] == pair2[1] and pair1[1] == pair2[0]

results_score = {}

for res in control_set:
    for function in results:
        elements = results[function]
        for elem in elements:
            if compare_pairs(res, elem):
                try:
                    results_score[function] += 1
                except:
                    results_score[function] = 1
                
results_score


{'get_model_alignment_0': 12,
 'get_model_alignment_1': 12,
 'get_model_alignment_2': 12,
 'get_model_alignment_3': 12,
 'get_model_alignment_4': 12,
 'get_model_alignment_5': 12,
 'get_model_alignment_6': 12,
 'get_model_alignment_7': 12,
 'get_model_alignment_8': 12,
 'get_model_alignment_9': 12,
 'get_model_alignment_10': 12,
 'get_model_alignment_11': 12,
 'get_model_alignment_12': 12,
 'get_model_alignment_13': 8,
 'get_model_alignment_14': 8,
 'get_model_alignment_15': 8,
 'get_model_alignment_16': 8,
 'get_model_alignment_17': 8,
 'get_model_alignment_18': 8,
 'get_model_alignment_19': 8,
 'get_model_alignment_20': 8,
 'get_model_alignment_21': 8,
 'get_model_alignment_22': 8,
 'get_model_alignment_23': 8,
 'get_model_alignment_24': 8,
 'get_model_alignment_25': 8,
 'get_model_alignment_26': 8,
 'get_model_alignment_27': 8,
 'get_model_alignment_28': 8,
 'get_model_alignment_29': 8,
 'get_model_alignment_30': 8,
 'get_model_alignment_31': 8,
 'get_model_alignment_32': 8,
 'get_m