In [1]:
import numpy as np
from collections import ChainMap
from itertools import permutations

In [None]:
# lists containing individual ranked lists (i.e., base sequences)
ADNI = ['ABETA', 'LDEL', 'MMSE', 'LIMM','PTAU','TAU', 'ENTOR', 'HIPPO', 'VENT', 'FUSIF','MIDTEMP']
AIBL = ['LIMM', 'LDEL', 'MMSE','FIGC']
JADNI = ['ABETA','PTAU','LIMM','MMSE','VENT', 'LDEL', 'TAU','HIPPO', 'ENTOR', 'FUSIF','MIDTEMP']
ANM = ['MMSE', 'HIPPO', 'ENTOR', 'FUSIF', 'MIDTEMP', 'CSFVOL', 'VENT']
WMHAD = ['MMSE', 'FUSIF','VENT', 'ENTOR', 'CSFVOL', 'HIPPO', 'MIDTEMP']
ARWIBO = ['LDEL', 'VENT', 'MMSE', 'FUSIF','LIMM', 'ENTOR', 'HIPPO', 'MIDTEMP', 'CSFVOL','FIGC']
EMIF = ['MMSE', 'ABETA', 'TAU', 'PTAU', 'LDEL', 'HIPPO']
OASIS = ['MMSE', 'CSFVOL', 'MIDTEMP', 'HIPPO', 'ENTOR', 'FUSIF', 'VENT']
EDSD = ['MMSE', 'HIPPO', 'VENT','ENTOR', 'FIGC', 'MIDTEMP','FUSIF']
NACC = ['ABETA', 'LIMM', 'MMSE', 'TAU','PTAU','ENTOR', 'MIDTEMP', 'FUSIF', 'HIPPO', 'CSFVOL', 'VENT']

all_cohorts = [ADNI, AIBL, ANM, WMHAD, ARWIBO, EMIF, OASIS, EDSD, NACC, JADNI]

# list containing the underlying event space (ie. all possible events)
space = ['ABETA', 'LDEL', 'MMSE', 'LIMM', 'ENTOR', 'HIPPO', 'VENT','FUSIF','MIDTEMP', 'FIGC', 'CSFVOL', 'TAU', 'PTAU']

In [4]:
k = 8 # select length of starting sequence

# create all possible starting sequences using k variables
candidate_init_tuple = list(permutations(space, k))

In [6]:
def Spearman_Footrule(list_1,list_2):
    """Distance function"""
    
    dist = 0
    
    for feat in list_1:
        
        list_1_index = list_1.index(feat)
        list_2_index = list_2.index(feat)
        
        dist += abs(list_1_index - list_2_index)
    
    return dist 

In [7]:
def pattern_sequence_consistency (pattern,all_cohorts):
    """Calculate distance between the potential meta-sequence / starting sequence (ie., pattern) 
    and individual event sequence"""
    
    pattern_copy = pattern.copy()
    distance_Spearman_Footrule = []
    patten_dist_Spearman_Footrule = {}

    # Handeling the partially overlapping ranked lists
    for seq in all_cohorts:
        
        pattern_not_seq = [x for x in pattern_copy if x in seq]
        seq_not_pattern = [y for y in seq if y in pattern_copy]

        if len(seq_not_pattern) == 0:
            continue
        
        # Calculating distance between the pattern and individual event sequence
        dists_Spearman_Footrule = (Spearman_Footrule(pattern_not_seq, seq_not_pattern) / len(seq_not_pattern))
        distance_Spearman_Footrule.append(dists_Spearman_Footrule)
    
    # Calculating the overall distance between the pattern and all cohorts
    dist_Spearman_Footrule = np.mean(distance_Spearman_Footrule)
    
    return dist_Spearman_Footrule

In [None]:
# Calculating distance between all possible meta-sequences (ie., patterns) and base sequence

patterns_scores_Spearman_Footrule = {}

for patterns in candidate_init_tuple:   
    
    patterns = list(patterns)
    
    dist_Spearman_Footrule = pattern_sequence_consistency(patterns, all_cohorts)
    patterns_scores_Spearman_Footrule[tuple(patterns)] = dist_Spearman_Footrule

min_score_Spearman_Footrule = min(patterns_scores_Spearman_Footrule.values())

In [9]:
# Extracting the meta-sequence with the minimum distance to all event sequences

for key, value in patterns_scores_Spearman_Footrule.items():
    if value == min_score_Spearman_Footrule:
        seq = list(key)

In [10]:
# Function for adding the remaining variables not included in the initial 
# proposed pattern coming from permutation of variables in space list

def add_remaining_feats(feat, pattern, all_cohorts):
    """Add the remaining variables to the starting sequence (list1)"""
    list1 = pattern[::]
    
    for feat in perm:    
    
        dict_total = []

        tup = tuple()
        dic_Spearman_Footrule = {}

        # Iterate over all possible postions that the new variable can have in initial proposed pattern
        for i in range(len(list1)):
            tup = tuple()

            list1.insert(i, feat)
            tup += (i, feat)
            dic_Spearman_Footrule[tup] = pattern_sequence_consistency(list1, all_cohorts)
            list1.remove(feat)

            dict_total.append(dic_Spearman_Footrule)

        res = ChainMap(*dict_total)
        feat_min = min(res, key=lambda k: res[k])
        list1.insert(feat_min[0], feat_min[1])

    return list1, res[feat_min]

In [18]:
# Add remaining variables to initial proposed pattern coming from  
# permutation of variables in space list having the minimum distance with all event sequences
not_common = [x for x in space if x not in seq]
r = len(space) - n

meta_seqs = dict()

for comb in permutations(not_common, r):
    for feat in comb:
            
        seq, distance = add_remaining_feats(perm, pattern, ac)
        meta_seqs[tuple(seq)] = distance

# print meta-seqs with lowest distance
min_dist = min(meta_seqs.values())
min_meta_seqs = [key for key in meta_seqs.keys() if meta_seqs[key] == min_dist]

for meta_seq in min_meta_seqs:
    print(meta_seq)

(['ABETA', 'LIMM', 'MMSE', 'TAU', 'PTAU', 'LDEL', 'ENTOR', 'HIPPO', 'FUSIF', 'MIDTEMP', 'CSFVOL', 'VENT', 'FIGC'],)
