In [1]:
import platform

print('You are using the recommended version for this script!' 
      if platform.python_version() == '3.9.6' else 
      'You are not using the recommended version for this script!')

You are using the recommended version for this script!


In [2]:
###TODO NB! There are no methods, all should be referred to as functions!!!!!

In [3]:
import pandas as pd
from tqdm import tqdm
import ast
import itertools
import operator

In [4]:
### Functions for getting data from files

## Get raw data from file
def get_data_from_file(file_name):
    file = open(file_name)
    content = file.read()
    lines = content.split('\n')
    file.close()
    
    return lines

## Get trajectories (default file name is trajectories.csv)
def get_trajectories(file_name = 'trajectories.csv'):
    trajectories = dict()
    lines = get_data_from_file(file_name)
    for (index, line) in enumerate(lines):
        cols = line.split('\t')
        if index == 0 or len(cols) <= 1: #Ignore title row and empty rows
            continue
        trajectories[cols[0]] = cols[1:]
    
    return trajectories        
        
## Get sections (default file name is diagnoses.csv)
def get_sections(file_name = 'diagnoses.csv'):
    person_sections = dict()
    lines = get_data_from_file(file_name)
    for (index, line) in enumerate(lines):
        cols = line.split('\t')
        if index == 0 or len(cols) <= 1: #Ignore title row and empty rows
            continue
        sections = ast.literal_eval(cols[-2])
        person_sections[cols[0]] = sections
        
    return person_sections

In [5]:
### Create containers for data

sections = get_sections()
trajectories = get_trajectories()

# sections = get_sections('diagnoses_1.csv')
# trajectories = get_trajectories('trajectories_1.csv')

In [6]:
### Helper methods for testing comparing methods

## Return n number of best matching trajectory pairs 
## (if n is bigger than the length of trajectories, than all trajectories will be returned)
def helper_get_n_similar_trajectories(data, compare_function, n=100, name='funtion', **kwargs):
    pairs = list(itertools.combinations(data, 2)) #Get all possible pairs
    results = {}
    print('Started working on: %s' % (name))
    for pair in tqdm(pairs):
        results[pair] = compare_function(data[pair[0]], data[pair[1]], **kwargs)
        
    return dict(sorted(results.items(), key=operator.itemgetter(1), reverse=True)[:n])

In [7]:
### Function for geting the number of matches between two trajectories

def get_overall_similartity(t1, t2):
    score = 0
    for t in t1:
        if t in t2:
            score += 1
    
    return score

In [8]:
### Methods for comparing trajectories

## Import all algo implementations
from local_alignment import get_local_alignment
from local_stretch_alignment import get_local_stretch_alignment
from model_alignment import get_model_alignment

In [9]:
results = {}

In [10]:
### Test functions

n = 100 #Sample number

def alignment_template(fun, name, match=None, mismatch=None, gap_penalty=None):
    results[name] = helper_get_n_similar_trajectories(
        sections, fun, n, name, match=match, mismatch=mismatch, gap_penalty=gap_penalty)

control_set = helper_get_n_similar_trajectories(trajectories, get_overall_similartity, n, 'control_set')
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_0_0', 1, 0, 0)
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_-1_-1', 1, -1, -1)
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_-1_0', 1, -1, 0)
# alignment_template(get_local_alignment, 'get_local_alignment_test_1_0_-1', 1, 0, -1)
# results['get_model_alignment1'] = get_model_alignment(sections, n, 100, 100)
# results['get_model_alignment2'] = get_model_alignment(sections, n, 50, 50)
# results['get_model_alignment3'] = get_model_alignment(sections, n, 20, 20)
# results['get_model_alignment4'] = get_model_alignment(sections, n, 11, 100)
results['get_model_alignment_reverse'] = get_model_alignment(
                                            sections, 
                                            motive_limit=(0, 999),
                                            max_motive_length=100,
                                            result_limit=(1, 10),
                                            order_reverse=True, 
                                            n=n, 
                                            name='get_model_alignment_reverse'
                                        )
    


#get_local_stretch_alignment_test = helper_get_n_similar_trajectories(sections, get_local_stretch_alignment, n, 'get_local_stretch_alignment_test')

Started working on: control_set


100%|██████████| 120786/120786 [00:00<00:00, 1750092.25it/s]


Started working on: get_model_alignment_reverse


100%|██████████| 1000/1000 [00:00<00:00, 17545.35it/s]

[('1', 0), ('2', 0), ('3', 0), ('4', 0), ('5', 0), ('6', 0), ('7', 0), ('8', 0), ('9', 0), ('10', 0), ('11', 0), ('12', 0), ('13', 0), ('14', 0), ('15', 0), ('16', 0), ('17', 0), ('18', 0), ('19', 0), ('20', 0), ('21', 0), ('22', 0), ('23', 0), ('24', 0), ('25', 0), ('26', 0), ('27', 0), ('28', 0), ('29', 0), ('30', 0), ('31', 0), ('32', 0), ('33', 0), ('34', 0), ('35', 0), ('36', 0), ('37', 0), ('38', 0), ('39', 0), ('40', 0), ('41', 0), ('42', 0), ('43', 0), ('44', 0), ('45', 0), ('46', 0), ('47', 0), ('48', 0), ('49', 0), ('50', 0), ('51', 0), ('52', 0), ('53', 0), ('54', 0), ('55', 0), ('56', 0), ('57', 0), ('58', 0), ('59', 0), ('60', 0), ('61', 0), ('62', 0), ('63', 0), ('64', 0), ('65', 0), ('66', 0), ('67', 0), ('68', 0), ('69', 0), ('70', 0), ('71', 0), ('72', 0), ('73', 0), ('74', 0), ('75', 0), ('76', 0), ('77', 0), ('78', 0), ('79', 0), ('80', 0), ('81', 0), ('82', 0), ('83', 0), ('84', 0), ('85', 0), ('86', 0), ('87', 0), ('88', 0), ('89', 0), ('90', 0), ('91', 0), ('92', 




TypeError: unhashable type: 'slice'

In [None]:
### Find out how many inside treshold n are the same

def compare_pairs(pair1, pair2):
    return pair1[0] == pair2[0] and pair1[1] == pair2[1] or pair1[0] == pair2[1] and pair1[1] == pair2[0]

results_score = {}

for res in control_set:
    for function in results:
        elements = results[function]
        for elem in elements:
            if compare_pairs(res, elem):
                try:
                    results_score[function] += 1
                except:
                    results_score[function] = 1
            else:
                results_score[function] = 0
                
results_score


In [11]:
control_set

{('8', '25'): 7,
 ('8', '235'): 7,
 ('8', '378'): 7,
 ('8', '490'): 7,
 ('8', '754'): 7,
 ('8', '864'): 7,
 ('25', '235'): 7,
 ('25', '378'): 7,
 ('25', '490'): 7,
 ('25', '754'): 7,
 ('25', '864'): 7,
 ('235', '378'): 7,
 ('235', '490'): 7,
 ('235', '754'): 7,
 ('235', '864'): 7,
 ('378', '490'): 7,
 ('378', '754'): 7,
 ('378', '864'): 7,
 ('490', '754'): 7,
 ('490', '864'): 7,
 ('754', '864'): 7,
 ('8', '37'): 6,
 ('8', '59'): 6,
 ('8', '65'): 6,
 ('8', '82'): 6,
 ('8', '83'): 6,
 ('8', '98'): 6,
 ('8', '153'): 6,
 ('8', '199'): 6,
 ('8', '216'): 6,
 ('8', '270'): 6,
 ('8', '290'): 6,
 ('8', '291'): 6,
 ('8', '303'): 6,
 ('8', '304'): 6,
 ('8', '319'): 6,
 ('8', '332'): 6,
 ('8', '348'): 6,
 ('8', '375'): 6,
 ('8', '414'): 6,
 ('8', '416'): 6,
 ('8', '423'): 6,
 ('8', '446'): 6,
 ('8', '458'): 6,
 ('8', '459'): 6,
 ('8', '470'): 6,
 ('8', '499'): 6,
 ('8', '506'): 6,
 ('8', '612'): 6,
 ('8', '716'): 6,
 ('8', '731'): 6,
 ('8', '789'): 6,
 ('8', '821'): 6,
 ('8', '845'): 6,
 ('8', '85