### Notebook to combine ranked lists using Borda voting

In [1]:
import os
ranked_list_dir = "./downloaded_data/ranked-drug-lists/"
ranked_list_file_info = [
    {'file': os.path.join(ranked_list_dir, "chembl_antiviral_cosine_sim.tsv"),
     'sep': ",", 'drug_field': 0,
     'header': False
    },
    {'file': os.path.join(ranked_list_dir, "RANKING_cNMF_top-50.tab"), 
     'sep': "\t", 'drug_field': 1,
     'header': True
    },
    {'file': os.path.join(ranked_list_dir, "RANKING_pStepKernel_top-100.tab"),
     'sep': "\t", 'drug_field': 1,
     'header': True
    },
]
ranked_list_borda_output = os.path.join(ranked_list_dir, "ranked_list_borda_output.tsv")

In [2]:
import itertools
import collections

def borda(ballot):
    n = len(ballot) - 1
    score = itertools.count(n, step = -1)
    result = {}
    for item in ballot:
        result[item] = next(score)/float(n)
    return result

def tally(ballots):
    result = collections.defaultdict(int)
    for ballot in ballots:
        for pref,score in borda(ballot).items():
            result[pref]+=score
    result = dict(result)
    return result

def make_ballots(files_info, downcase=True, drop_blanks=True):
    ballot_strings_list = []
    for file_info in files_info:
        this_ballot_drugs = []
        with open(file_info['file']) as f:
            if 'header' in file_info and file_info['header']:
                header = f.readline()
            for line in f:
                this_drug = line.split(file_info['sep'])[file_info['drug_field']]
                if downcase:
                    this_drug = this_drug.lower()
                if not drop_blanks or (drop_blanks and this_drug != ''):
                    this_ballot_drugs.append(this_drug)
        ballot_strings_list.append(this_ballot_drugs)
    return ballot_strings_list

In [3]:
ballots = make_ballots(ranked_list_file_info)

In [4]:
tallied_ballots = tally(ballots)

In [5]:
tallied_ballots_sorted = [[k,v] for k, v in sorted(tallied_ballots.items(), key=lambda item: item[1], reverse=True)]

In [6]:
with open(ranked_list_borda_output, 'w') as out:
    for item in tallied_ballots_sorted:
        out.write("\t".join([str(item[0]), str(item[1])]) + "\n")