# DrugBank Network Creation
1.) Download the current version of drugbank: https://www.drugbank.ca/releases/latest  (Version used 2018-07-03)  
2.) Parse through the XML file and create the DrugBank network  
3.) Create a subnetwork of only drug that are considered in the CLOUD library

## Create DrugBank network

In [1]:
#Import xml parser and network x
import xml.etree.ElementTree as ET
import sys
import networkx as nx

In [None]:
'''
Go trough the xml file and parse several features that drugbank offers
Drug (Node) Properties
1. Name
2. Identifiers
3. Physicochemical Properties
4. Targets (targets/transporters/carriers and enzymes)
5. Approved status
6. ATC code
7. DrugClass family


Drug Interaction
8. Known drug interactions
'''


print 'Read xml file'
tree = ET.parse('../data/Create_DrugBank_Network/DrugBank_FullDatabase.xml')
root = tree.getroot()

G = nx.Graph()
error_count = 0

print 'Start creating DrugBank network'
for drug in root.findall('{http://www.drugbank.ca}drug'):

    try:
        #Get the family of the drug (small molecule or biotech)
        family =  drug.attrib['type']
        
        #Check if at least once approved
        approved = 'False'
        if drug.find('{http://www.drugbank.ca}groups').find('{http://www.drugbank.ca}group').text == 'approved':
            approved = 'True'

        #Get the DrugBank ID
        drug_id = drug.find('{http://www.drugbank.ca}drugbank-id').text.encode('utf-8')
        
        #Get the DrugName
        name = drug.find('{http://www.drugbank.ca}name').text.encode('utf-8')
        
        #Get the type of the molecule e.g. benzene
        try:
            drug_class = drug.find('{http://www.drugbank.ca}classification').find('{http://www.drugbank.ca}class').text.encode('utf-8')
        except:
            drug_class = 'None'
        try:
            drug_subclass = drug.find('{http://www.drugbank.ca}classification').find('{http://www.drugbank.ca}subclass').text.encode('utf-8')
        except:
            drug_subclass = 'None'


        #Associated Pathway
        pathway_name_id = []
        try:
            pathways = drug.find('{http://www.drugbank.ca}pathways')
            for p in pathways:
                p_name = p.find('{http://www.drugbank.ca}name').text.encode('utf-8')
                p_id = p.find('{http://www.drugbank.ca}smpdb-id').text.encode('utf-8')
                pathway_name_id.append(p_name+'_'+str(p_id))
        except:
            pass


        #External References e.g. PubChem_ID, ChEMBL_ID
        external_references = {}
        try:
            external = drug.find('{http://www.drugbank.ca}external-identifiers')
            if external != None:
                for classifier in external:
                    classifier_name = classifier.find('{http://www.drugbank.ca}resource').text.encode('utf-8')
                    classifier_name_id = classifier.find('{http://www.drugbank.ca}identifier').text.encode('utf-8')
                    external_references[classifier_name] = classifier_name_id
        except:
            pass


        #Associated Targets
        G.add_node(drug_id)
        drug_targets = []
        targets = drug.find('{http://www.drugbank.ca}targets')
        if len(targets) > 0:
            for target in targets:
                if target.find('{http://www.drugbank.ca}polypeptide') != None:
                    if target.find('{http://www.drugbank.ca}polypeptide').find(
                        '{http://www.drugbank.ca}gene-name').text != None:
                        text = target.find('{http://www.drugbank.ca}polypeptide').find('{http://www.drugbank.ca}gene-name').text.encode('utf-8')


                    was_inside = False
                    for action in target.find('{http://www.drugbank.ca}actions'):
                        was_inside = True
                        drug_targets.append(text + '_' + action.text)
                    if was_inside == False:
                        drug_targets.append(text + '_Unkown')


                else:
                    drug_targets.append(target.find('{http://www.drugbank.ca}name').text.encode('utf-8'))
        
        #Associated Enzymes i.e. cytochromes
        drug_enzymes = []
        enzymes = drug.find('{http://www.drugbank.ca}enzymes')
        if len(enzymes) > 0:
            for enzyme in enzymes:
                if enzyme.find('{http://www.drugbank.ca}polypeptide') != None:
                    text = enzyme.find('{http://www.drugbank.ca}polypeptide').find('{http://www.drugbank.ca}gene-name').text.encode('utf-8')
                else:
                    text = enzyme.find('{http://www.drugbank.ca}name').text.encode('utf-8')

                was_inside = False
                for action in enzyme.find('{http://www.drugbank.ca}actions'):
                    was_inside = True
                    drug_enzymes.append(text+'_'+action.text)
                if was_inside == False:
                    drug_targets.append(text + '_Unkown')

        #Associated Carriers
        drug_carriers = []
        carriers = drug.find('{http://www.drugbank.ca}carriers')
        if len(carriers) > 0:
            for carrier in carriers:
                if carrier.find('{http://www.drugbank.ca}polypeptide') != None:
                    drug_carriers.append(carrier.find('{http://www.drugbank.ca}polypeptide').find('{http://www.drugbank.ca}gene-name').text.encode('utf-8'))
                else:
                    drug_carriers.append(carrier.find('{http://www.drugbank.ca}name').text.encode('utf-8'))

        #Associated Transporters
        drug_transporters = []
        transporters = drug.find('{http://www.drugbank.ca}transporters')
        if len(transporters) > 0:
            for transporter in transporters:

                if transporter.find('{http://www.drugbank.ca}polypeptide') != None:
                    text = transporter.find('{http://www.drugbank.ca}polypeptide').find(
                        '{http://www.drugbank.ca}gene-name').text.encode('utf-8')

                else:
                    text = transporter.find('{http://www.drugbank.ca}name').text.encode('utf-8')

                was_inside = False
                for action in transporter.find('{http://www.drugbank.ca}actions'):
                    was_inside = True
                    drug_transporters.append(text+'_'+action.text)

                if was_inside == False:
                    drug_transporters.append(text + '_Unkown')

        #Get known drug interactions with other drugs
        interaction = drug.find('{http://www.drugbank.ca}drug-interactions')
        if len(interaction) > 0:
            for drug_int in interaction:
                G.add_edge(drug_id,drug_int.find('{http://www.drugbank.ca}drugbank-id').text,int=drug_int.find('{http://www.drugbank.ca}description').text.encode('utf-8'))


        #Associated ATC codes
        ATC_code = drug.find('{http://www.drugbank.ca}atc-codes')
        atc_codes = []
        if len(ATC_code) > 0:
            for k in ATC_code:
                atc_codes.append(k.attrib['code'])



        #Chemical properties (predicted)
        calucalted_prop = drug.find('{http://www.drugbank.ca}calculated-properties')
        calculated_properties = {}
        if calucalted_prop !=  None:
            for property in calucalted_prop:
                prop_name = property.find('{http://www.drugbank.ca}kind').text.encode('utf-8')

                prop_name = prop_name.replace(" ", "")
                prop_name = prop_name.replace("(","")
                prop_name = prop_name.replace(")", "")
                prop_name = prop_name.replace("-", "")
                if 'IUPACName' in prop_name or 'InCh' in prop_name or 'SMILES' in prop_name:
                    continue

                value = property.find('{http://www.drugbank.ca}value').text.strip().split(' ')
                calculated_properties[prop_name] = value[0].encode('utf-8')

        
        
        #Create a networkx entry and add node and edges to the network
        if name != None:
            G.node[drug_id]['Name'] = name
        else:
            G.node[drug_id]['Name'] = 'None'

        if drug_class != None:
            G.node[drug_id]['DrugClass'] = drug_class
        else:
            G.node[drug_id]['DrugClass'] = 'None'

        if drug_subclass != None:
            G.node[drug_id]['DrugSubClass'] = drug_subclass
        else:
            G.node[drug_id]['DrugSubClass'] = 'None'


        G.node[drug_id]['ATCcode'] =  ', '.join(atc_codes)
        G.node[drug_id]['Targets'] =  ', '.join(drug_targets)
        G.node[drug_id]['Enzymes'] = ', '.join(drug_enzymes)
        G.node[drug_id]['Transporters'] = ', '.join(drug_transporters)
        G.node[drug_id]['Carriers'] = ', '.join(drug_carriers)
        G.node[drug_id]['Family'] = family
        G.node[drug_id]['Approved'] = approved
        G.node[drug_id]['PathwayNames'] = ', '.join(pathway_name_id)

        for property in calculated_properties:
            G.node[drug_id][property] = calculated_properties[property]

        for ext_ref in external_references:

            G.node[drug_id][ext_ref.replace(' ','').replace('(','').replace(')','')] = external_references[ext_ref]



    except AttributeError as inst:
        error_count +=1
        e = sys.exc_info()[1]
        print e
        print inst


print 'Number of errors: %d' %error_count
#Take care, one of the descriptions has "&" symbol --> remove manually! (Only in Nodes, not edges)
nx.write_gml(G,'../results/Create_DrugBank_Network/Drugbank_2018-07-03.gml')

## Create subnetwork including only CLOUD drugs
Takes the whole network and creates a subnetwork of only those drugs that also exist in the CLOUD library

In [2]:
fp = open('../data/Create_DrugBank_Network/CLOUD_DrugBank_PubChem_Chembl.csv', 'r')
fp.next()
drugBankIDs = []
for line in fp:
    tmp = line.strip().split(',')
    drugBankIDs.append(tmp[1])

G = nx.read_gml('../results/Create_DrugBank_Network/Drugbank_2018-07-03.gml')

H = G.subgraph(drugBankIDs)

nx.write_gml(H, '../results/Create_DrugBank_Network/Drugbank_2018-07-03_CLOUD_Only.gml')