In [1]:
import itertools
import numpy as np
from tqdm import tqdm
from collections import Counter
from collections import ChainMap
import pandas as pd
import heapq
import matplotlib.pyplot as plt
from itertools import permutations as itertools_permutations

In [2]:
# lists containing individual ranked lists (i.e., event sequences)

ADNI = ['ABETA', 'LDEL', 'MMSE', 'LIMM','PTAU','TAU', 'ENTOR', 'HIPPO', 'VENT', 'FUSIF','MIDTEMP']

AIBL = ['LIMM', 'LDEL', 'MMSE','FIGC']

JADNI = ['ABETA','PTAU','LIMM','MMSE','VENT', 'LDEL', 'TAU','HIPPO', 'ENTOR', 'FUSIF','MIDTEMP']

ANM = ['MMSE', 'HIPPO', 'ENTOR', 'FUSIF', 'MIDTEMP', 'CSFVOL', 'VENT']

WMHAD = ['MMSE', 'FUSIF','VENT', 'ENTOR', 'CSFVOL', 'HIPPO', 'MIDTEMP']

ARWIBO = ['LDEL', 'VENT', 'MMSE', 'FUSIF','LIMM', 'ENTOR', 'HIPPO', 'MIDTEMP', 'CSFVOL','FIGC']

EMIF = ['MMSE', 'ABETA', 'TAU', 'PTAU', 'LDEL', 'HIPPO']

OASIS = ['MMSE', 'CSFVOL', 'MIDTEMP', 'HIPPO', 'ENTOR', 'FUSIF', 'VENT']

EDSD = ['MMSE', 'HIPPO', 'VENT','ENTOR', 'FIGC', 'MIDTEMP','FUSIF']

NACC = ['ABETA', 'LIMM', 'MMSE', 'TAU','PTAU','ENTOR', 'MIDTEMP', 'FUSIF', 'HIPPO', 'CSFVOL', 'VENT']

In [3]:
# list containing the underlying variables

space = ['ABETA', 'LDEL', 'MMSE', 'LIMM', 'ENTOR', 'HIPPO', 'VENT','FUSIF','MIDTEMP', 'FIGC', 'CSFVOL', 'TAU', 'PTAU']

In [4]:
n = 8

candidate_init_tuple = list(itertools.permutations(space,n))

In [5]:
all_cohorts = [ADNI,AIBL,ANM,WMHAD,ARWIBO,EMIF,OASIS,EDSD,NACC,JADNI] 

In [6]:
# Distance function

def Spearman_Footrule(list_1,list_2):
    
    dist = 0
    
    for feat in list_1:
        
        list_1_index = list_1.index(feat)
        list_2_index = list_2.index(feat)
        
        dist += abs(list_1_index - list_2_index)
    
    return dist 

In [7]:
# Calculating distance between the potential meta-sequence (i.e., pattern) and individual event sequence

def pattern_sequence_consistency (pattern,all_cohorts):
    
    pattern_copy = pattern.copy()
    
    distance_Spearman_Footrule = []
    
    patten_dist_Spearman_Footrule = {}

    # Handeling the partially overlapped ranked lists
    for seq in all_cohorts:
        
        pattern_not_seq = [ x for x in pattern_copy if x in seq ]
        seq_not_pattern = [ y for y in seq if y in pattern_copy ]

        if len(seq_not_pattern) == 0:
            continue
        
        # Calculating distance between the pattern and individual event sequence
        dists_Spearman_Footrule = (Spearman_Footrule(pattern_not_seq,seq_not_pattern)/len(seq_not_pattern))
        
        distance_Spearman_Footrule.append(dists_Spearman_Footrule)
    
    # Calculating the overall distance between the pattern and all cohorts
    dist_Spearman_Footrule = np.mean(distance_Spearman_Footrule)
    
    patten_dist_Spearman_Footrule[tuple(pattern)] = dist_Spearman_Footrule
    
    return dist_Spearman_Footrule

In [8]:
# Calculating distance between  all possible meta-sequences (i.e., patterns) and event sequence

patterns_scores_Spearman_Footrule = {}

for i in tqdm(range(len(candidate_init_tuple))):
    patterns = candidate_init_tuple[i]    
    
    patterns = list(patterns)
    
    dist_Spearman_Footrule = pattern_sequence_consistency(patterns,all_cohorts)
    
    patterns_scores_Spearman_Footrule[tuple(patterns)] = dist_Spearman_Footrule

min_score_Spearman_Footrule = min(patterns_scores_Spearman_Footrule.values())

100%|██████████| 51891840/51891840 [39:37<00:00, 21825.40it/s]  


In [9]:
# Extracting the meta-sequence with the minimum distance to all event sequences

for key, value in patterns_scores_Spearman_Footrule.items():
    if value == min_score_Spearman_Footrule:
        seq = list(key)

In [10]:
# Function for adding the remaining variables not included in the initial proposed pattern coming from permutation of variables in space list

def add_remaining_feats (feat,list1,all_cohorts):
    
    dict_total = []

    tup = tuple()
    dic_Spearman_Footrule = {}
    
    # adding new variable to the initial proposed pattern
    if feat not in list1:
        
        # Iterate over all possible postions that the new variable can have in initial proposed pattern
        for i in range(len(list1)):
            tup = tuple()
                
            list1.insert(i,feat)
            dist_Spearman_Footrule = pattern_sequence_consistency(list1,all_cohorts)
            tup = tup + (i,)
            tup = tup + (feat,)
            dic_Spearman_Footrule[tup] = dist_Spearman_Footrule
            list1.remove(feat)
            
            dict_total.append(dic_Spearman_Footrule)

        res = ChainMap(*dict_total)
        feat_min= min(res, key=lambda k: res[k])
        list1.insert(feat_min[0],feat_min[1])
    
    return list1,

In [11]:
not_common = [x for x in space if x not in seq]

In [12]:
n = 5
candidate_comple_tuple = list(itertools.permutations(not_common,n))

In [18]:
# Add remaining variables to initial proposed pattern coming from permutation of variables in space list having the 
# minimum distance with all event sequences


i = 0

for comb in candidate_comple_tuple:
    
    i = i + 1
    
    for feat in comb:
            
        complete = add_remaining_feats(feat,seq,all_cohorts)

print(complete)

(['ABETA', 'LIMM', 'MMSE', 'TAU', 'PTAU', 'LDEL', 'ENTOR', 'HIPPO', 'FUSIF', 'MIDTEMP', 'CSFVOL', 'VENT', 'FIGC'],)
