In [8]:
from openeye.oechem import *
from openeye.oemedchem import *
import sys, os
import psycopg2
import psycopg2.extras
import traceback
import re
import parse

class Drug:
    def __init__(self, smiles,name,gvk_id):
        self.smiles = smiles
        self.name = name
        self.gvk_ids = [gvk_id]
        
    def addGVKIds(self,gvk_id):
        if gvk_id not in self.gvk_ids:
            self.gvk_ids.append(gvk_id)
            
    def setTarget(self,target):
        self.target = target
        
class BiogenDb:
    def __init__(self):
        self.conn = psycopg2.connect(database='GoStar',user='medchem',host='javelin',password='medchem')


    def getDrugFromDrugName(self,drug_name):
        drug_name = drug_name.upper()
        cursor = None
        try:
            cursor = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
            cursor.execute("SELECT structure_details.sub_smiles, structure_details.gvk_id FROM biogen.compound_synonyms, biogen.structure_details WHERE structure_details.str_id = compound_synonyms.str_id AND compound_synonyms.synonyms = %s;" ,(drug_name,))
            results = cursor.fetchall()
            drug = None
            if results is not None:
                for r in results:
                    gvk_id = r['gvk_id']
                    smiles = r['sub_smiles']
                    if drug is None:
                        drug = Drug(smiles,drug_name,gvk_id)
                    else:
                        drug.addGVKIds(gvk_id)
                return drug
            else:
                return None
        except:
            if cursor is not None:
                cursor.execute("rollback")
            print >>sys.stderr, drug_name +"\thas error."
            traceback.print_exc()
            return None
        finally:
            if cursor is not None:
                cursor.close()
    
    
    def getDrugTargets(self,gvk_id):
        targets = []
        cursor = None
        try:
            cursor = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
            cursor.execute("select primary_target from biogen.primary_target where gvk_id = %s",(gvk_id,))
            results = cursor.fetchall()
            if results is not None:
                for r in results:
                    target = r['primary_target']
                    if target not in targets:
                        targets.append(target)
                return targets
            else:
                return None
        except:
            if cursor is not None:
                cursor.execute("rollback")
            print >>sys.stderr, gvk_id +"\thas error."
            traceback.print_exc()
            return None
        finally:
            if cursor is not None:
                cursor.close()
    

    def getSmilesBySubstructure(self,sub_smi):
        smiList = []
        cursor = None
        try:
            print "finding "+sub_smi
            cursor = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
            cursor.execute("select m,gvk_id from rdk.mols where m@>%s",(sub_smi,))
            results = cursor.fetchall()
            if results is not None:
                for r in results:
                    smiles = r['m']
                    gvk_id = r['gvk_id']
                    smiList.append("%s %s"%(smiles,gvk_id))
                return smiList
            else:
                return None
        except:
            if cursor is not None:
                cursor.execute("rollback")
            print >>sys.stderr, sub_smi +"\thas error."
            traceback.print_exc()
            return None
        finally:
            if cursor is not None:
                cursor.close()

    def getSmilesBySmarts(self,sub_smi):
        smiList = []
        cursor = None
        try:
            sub_smi = sub_smi.replace("*","[*]").replace("[nH]","n")
            print "finding "+sub_smi
            cursor = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
            cursor.execute("select m,gvk_id from rdk.mols where m@>%s::qmol",(sub_smi,))
            results = cursor.fetchall()
            if results is not None:
                for r in results:
                    smiles = r['m']
                    gvk_id = r['gvk_id']
                    print smiles, gvk_id
                    smiList.append("%s %s"%(smiles,gvk_id))
                return smiList
            else:
                return None
        except:
            if cursor is not None:
                cursor.execute("rollback")
            print >>sys.stderr, sub_smi +"\thas error."
            traceback.print_exc()
            return None
        finally:
            if cursor is not None:
                cursor.close()

    def __del__(self):
        self.conn.close()

print "OK"

OK


In [9]:
db = BiogenDb()
drug = db.getDrugFromDrugName("Gleevec")
print drug.gvk_ids

[1076692L, 1076932L, 1039448L, 20180669L, 121349296L, 121829738L, 122256226L, 20057404L, 121286471L, 122233832L, 6060272L, 1076169L, 121760730L, 122810797L, 1156556L, 20802526L, 1360357L, 14615363L, 14646019L, 14651189L, 20182077L, 121915546L, 2591422L, 20108456L, 2454328L, 20802533L, 14705264L, 121714343L, 13190952L, 120950261L, 122490453L, 6100879L, 2602812L, 15090782L, 121761591L, 121405991L, 1100014L, 2614908L, 1081324L, 14644877L, 2540758L, 2430679L, 20802529L, 20802530L, 20474858L, 1088771L, 3120307L, 20475141L, 121351602L, 121407244L, 122701749L, 650008L, 121681999L, 2305350L, 6123764L, 1184539L, 2557396L, 1122918L, 2479209L, 120909334L, 122919511L, 1180314L, 120891427L, 122464385L, 13088885L, 20913675L, 20580074L, 2581134L, 2457626L, 2547106L, 122227015L, 120798308L, 121613561L, 121830827L, 122676188L, 6107476L, 6129490L, 20237004L, 122288797L, 1011791L, 2479210L, 13170739L, 122402593L, 122838979L, 3103186L, 1064366L, 120960187L, 653672L, 121175432L, 122930713L, 121556288L, 121

In [10]:
txt = open("/Users/jfeng1/CRL_CompoundNames.txt","r")
dict = {}
db = BiogenDb()
for lineno,line in enumerate(txt.read().splitlines()):
    p = parse.parse("{} ({})",line)
    if p is not None:
        drug = db.getDrugFromDrugName(p[1])
        
        if drug is None:
            drug = db.getDrugFromDrugName(p[0])
            if drug is not None:
                dict[lineno] = drug
        else:
            dict[lineno] = drug
    else:
        drug = db.getDrugFromDrugName(line)
        if drug is not None:
            dict[lineno] = drug
print dict.items()

[(1, <__main__.Drug instance at 0x1110dedd0>), (2, <__main__.Drug instance at 0x1110c5bd8>), (4, <__main__.Drug instance at 0x1110c5908>), (6, <__main__.Drug instance at 0x1110c5b90>), (7, <__main__.Drug instance at 0x1110c54d0>), (8, <__main__.Drug instance at 0x1110c5830>), (9, <__main__.Drug instance at 0x1110c5ab8>), (11, <__main__.Drug instance at 0x1110c5098>), (12, <__main__.Drug instance at 0x1110c5200>), (14, <__main__.Drug instance at 0x1110c5f38>), (17, <__main__.Drug instance at 0x1110c5a70>), (19, <__main__.Drug instance at 0x1110c5a28>), (20, <__main__.Drug instance at 0x1110c5c20>), (22, <__main__.Drug instance at 0x1110c5e60>), (23, <__main__.Drug instance at 0x1110c5fc8>), (25, <__main__.Drug instance at 0x1110c5320>), (27, <__main__.Drug instance at 0x1110c5cb0>), (29, <__main__.Drug instance at 0x1110c5368>), (34, <__main__.Drug instance at 0x1110c52d8>), (38, <__main__.Drug instance at 0x1110bf878>), (44, <__main__.Drug instance at 0x1110c5440>), (47, <__main__.Drug

In [12]:
keys = dict.keys()
file = open("/Users/jfeng1/drug_targets.csv","w")
keys.sort()
for key in keys:
    drug = dict[key]
    targets = []
    for gvk_id in drug.gvk_ids:
        targets.append("|".join(db.getDrugTargets(gvk_id)))
    file.write("%s\t%s\t%s\n"%(drug.smiles,drug.name,"|".join(targets)))
file.close()