In [67]:
import numpy as np
import scipy.stats as sstats
import pandas as pd 

from collections import namedtuple
drug_target_enrichment = open("DT_ACR_results.txt","w")

def drug_targets():
    mapped = pd.read_csv('mapped_DB_STITCH.tsv', sep='\t')
    mapped['protein'] = mapped['protein'].map(lambda x: x.lstrip('9606.'))
    dictio = {}
    
    biomart_data = pd.read_csv("biomart.tsv", 
                  sep='\t', 
                  names=["gene", "transcript", "protein", "entrez id", "Uniprot", "name"])
    
    get_entrez = pd.merge(mapped, biomart_data, on=["protein"])
    get_entrez = get_entrez.dropna(subset=['entrez id'])
    get_entrez['entrez id'] = get_entrez['entrez id'].astype(int)
    get_entrez_filtered = get_entrez.drop(["gene", "transcript", "Uniprot"], axis=1)
    
    for i in get_entrez_filtered['DrugBank ID'].unique(): 
        dictio[i] = [get_entrez_filtered['entrez id'][j] for j in get_entrez_filtered[get_entrez_filtered['DrugBank ID']==i].index]
        
    other_set1 = open("test_list_genes1.0.txt").readlines()[1:]
    other_set2 = [z.replace('\n', '') for z in other_set1]
    other_set2 = list(map(int, other_set2))
    universe1 = open("test_list_genes_homo_sapiens1.0.txt").readlines()
    universe2 = [g.replace('\n', '') for g in universe1]
    universe2 = list(map(int, universe2))
    for key, value in dictio.items():
        drug_target_enrichment.write(key + "\t") 
        results = set_enrichment(value, other_set2, universe2)
        drug_target_enrichment.write('\t'.join(str(item) for item in results))
        drug_target_enrichment.write("\n")

def set_enrichment(your_set, other_set, universe, abcd_values=False):
    
    resTuple = namedtuple("setEnrichmentResult", [ 'oddsratio', 'c2statistic', 'pvalue', 'table', 'method'])

    universe  = set(universe)
    your_set  = set(your_set) & universe
    other_set = set(other_set) & universe
    
    a = your_set & other_set
    b = other_set - your_set
    c = your_set - other_set
    d = universe - (your_set | other_set)
    
    table = [ [len(a), len(b)], [len(c), len(d)]]
    if min(min(table)) <= 5:
        method = 'fisher'
        oddsratio, p = sstats.fisher_exact(table)
        #print(oddsratio, p)
        chi2 = None
    else:
        method = 'chi2'
        chi2, p, dof, expected = sstats.chi2_contingency(table)
        oddsratio = 100
        if table[1][0] > 0 and table[0][1] > 0:
            oddsratio = table[0][0] * table[1][1] / (table[1][0] * table[0][1])
        else:
            oddsratio = np.inf
        #print(dof, p, expected, oddsratio)
        #fi
    #fi
    if abcd_values:
        return resTuple(oddsratio, chi2, p, [[a,b],[c,d]], method)
    else:
        return resTuple(oddsratio, chi2, p, table, method)
    #fi
#edef
def main():
    drug_targets()
    drug_target_enrichment.close() 


main()