In [7]:
def CreateDict(data):
    output = {}
    for line in data.split("\n"):
        if line and not line[0] == " ":
            info = line.split(" ", 1)
            if len(info) > 1:
                output[info[0]] = [info[1].lstrip(" ")]
        else:
            output[list(output.keys())[-1]].append(line.lstrip(" "))
    return output

In [8]:
from Bio.KEGG import REST
# from bioservices.kegg import KEGG

#Import pathways
GLY = REST.kegg_get("map00010").read()
TCA = REST.kegg_get("map00020").read()
PPP = REST.kegg_get("map00030").read()

#Import Enzymes for glycolysis
HEX = REST.kegg_get("2.7.1.1").read() 
PGI = REST.kegg_get("5.3.1.9").read()
PFK = REST.kegg_get("2.7.1.11").read()
TPI = REST.kegg_get("5.3.1.1").read()

#Import Enzymes for TCA
CS = REST.kegg_get("2.3.3.1").read() 
AH = REST.kegg_get("4.2.1.3").read()
FH = REST.kegg_get("4.2.1.2").read()
MD = REST.kegg_get("1.1.1.37").read()

#Import Enzymes for PPP
RPE = REST.kegg_get("5.1.3.1").read() 
RPI = REST.kegg_get("5.3.1.6").read()
TK = REST.kegg_get("2.2.1.1").read()
TA = REST.kegg_get("2.2.1.2").read()

#CreateDict(TAL)["PATHWAY"]

In [9]:
import sqlite3
conn = sqlite3.connect('my.db')
c = conn.cursor()

#Create Pathway Table    
c.execute("""DROP TABLE pathway;""")

c.execute("""CREATE TABLE pathway (name TEXT, description TEXT);""")

pathway_data = [(CreateDict(GLY)["NAME"][0], CreateDict(GLY)["DESCRIPTION"][0][:50]),
                (CreateDict(TCA)["NAME"][0], CreateDict(TCA)["DESCRIPTION"][0][:50]),
                (CreateDict(PPP)["NAME"][0], CreateDict(PPP)["DESCRIPTION"][0][:50])
               ]

c.executemany('INSERT INTO pathway VALUES (?,?)', pathway_data)

conn.commit()

c.execute("SELECT * FROM pathway;")
print(c.fetchall())
# for row in c.execute("SELECT * FROM pathway;"):
#     print(row)

[('Glycolysis / Gluconeogenesis', 'Glycolysis is the process of converting glucose in'), ('Citrate cycle (TCA cycle)', 'The citrate cycle (TCA cycle, Krebs cycle) is an i'), ('Pentose phosphate pathway', 'The pentose phosphate pathway is a process of gluc')]


In [10]:
# Create Enzyme Table    
# enzyme_ids = ['HEX','PGI','PFK','TPI','CS','AH','FH','MD','RPE','RPI','TK','TA']

c.execute("""DROP TABLE enzyme;""")

c.execute("""CREATE TABLE enzyme (name TEXT, description TEXT, EC number TEXT);""")

enzyme_data = [(CreateDict(HEX)["NAME"][0], CreateDict(HEX)["REACTION"][0], CreateDict(HEX)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(PGI)["NAME"][0], CreateDict(PGI)["REACTION"][0], CreateDict(PGI)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(PFK)["NAME"][0], CreateDict(PFK)["REACTION"][0], CreateDict(PFK)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(TPI)["NAME"][0], CreateDict(TPI)["REACTION"][0], CreateDict(TPI)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(CS)["NAME"][0], CreateDict(CS)["REACTION"][0], CreateDict(CS)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(AH)["NAME"][0], CreateDict(AH)["REACTION"][0], CreateDict(AH)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(FH)["NAME"][0], CreateDict(FH)["REACTION"][0], CreateDict(FH)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(MD)["NAME"][0], CreateDict(MD)["REACTION"][0], CreateDict(MD)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(RPE)["NAME"][0], CreateDict(RPE)["REACTION"][0], CreateDict(RPE)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(RPI)["NAME"][0], CreateDict(RPI)["REACTION"][0], CreateDict(RPI)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(TK)["NAME"][0], CreateDict(TK)["REACTION"][0], CreateDict(TK)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(TA)["NAME"][0], CreateDict(TA)["REACTION"][0], CreateDict(TA)["ENTRY"][0][3:15].replace(" ",""))
              ]

c.executemany('INSERT INTO enzyme VALUES (?,?,?)', enzyme_data)

conn.commit()

c.execute("SELECT * FROM enzyme;")
print(c.fetchall())


[('hexokinase;', 'ATP + D-hexose = ADP + D-hexose 6-phosphate [RN:R02848]', '2.7.1.1'), ('glucose-6-phosphate isomerase;', 'D-glucose 6-phosphate = D-fructose 6-phosphate [RN:R00771]', '5.3.1.9'), ('6-phosphofructokinase;', 'ATP + D-fructose 6-phosphate = ADP + D-fructose 1,6-bisphosphate [RN:R00756]', '2.7.1.11'), ('triose-phosphate isomerase;', 'D-glyceraldehyde 3-phosphate = glycerone phosphate [RN:R01015]', '5.3.1.1'), ('citrate (Si)-synthase;', 'acetyl-CoA + H2O + oxaloacetate = citrate + CoA [RN:R00351]', '2.3.3.1'), ('aconitate hydratase;', 'citrate = isocitrate (overall reaction) [RN:R01324];', '4.2.1.3'), ('fumarate hydratase;', '(S)-malate = fumarate + H2O [RN:R01082]', '4.2.1.2'), ('malate dehydrogenase;', '(S)-malate + NAD+ = oxaloacetate + NADH + H+ [RN:R00342]', '1.1.1.37'), ('ribulose-phosphate 3-epimerase;', 'D-ribulose 5-phosphate = D-xylulose 5-phosphate [RN:R01529]', '5.1.3.1'), ('ribose-5-phosphate isomerase;', 'D-ribose 5-phosphate = D-ribulose 5-phosphate [RN:R010

**Above are finished work on pathway table and enzyme table**  
**Below are work in progress**


In [12]:
# example = REST.kegg_get("hsa:9080").read()
Drosophila = REST.kegg_get("dme:K01539").read()
print(Drosophila)
# print(CreateDict(HEX)["GENES"][0:3])

ENTRY       K01539                      KO
NAME        ATP1A
DEFINITION  sodium/potassium-transporting ATPase subunit alpha [EC:3.6.3.9]
PATHWAY     ko04022  cGMP-PKG signaling pathway
            ko04024  cAMP signaling pathway
            ko04260  Cardiac muscle contraction
            ko04261  Adrenergic signaling in cardiomyocytes
            ko04911  Insulin secretion
            ko04918  Thyroid hormone synthesis
            ko04919  Thyroid hormone signaling pathway
            ko04925  Aldosterone synthesis and secretion
            ko04960  Aldosterone-regulated sodium reabsorption
            ko04961  Endocrine and other factor-regulated calcium reabsorption
            ko04964  Proximal tubule bicarbonate reclamation
            ko04970  Salivary secretion
            ko04971  Gastric acid secretion
            ko04972  Pancreatic secretion
            ko04973  Carbohydrate digestion and absorption
            ko04974  Protein digestion and absorption
            ko04976  Bi

In [13]:
# Create Gene Table    

c.execute("""DROP TABLE gene;""")

c.execute("""CREATE TABLE gene (name TEXT, description TEXT, organism TEXT, nucleotide_sequence TEXT);""")

enzyme_data = [(CreateDict(HEX)["NAME"][0], CreateDict(HEX)["REACTION"][0], CreateDict(HEX)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(PGI)["NAME"][0], CreateDict(PGI)["REACTION"][0], CreateDict(PGI)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(PFK)["NAME"][0], CreateDict(PFK)["REACTION"][0], CreateDict(PFK)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(TPI)["NAME"][0], CreateDict(TPI)["REACTION"][0], CreateDict(TPI)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(CS)["NAME"][0], CreateDict(CS)["REACTION"][0], CreateDict(CS)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(AH)["NAME"][0], CreateDict(AH)["REACTION"][0], CreateDict(AH)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(FH)["NAME"][0], CreateDict(FH)["REACTION"][0], CreateDict(FH)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(MD)["NAME"][0], CreateDict(MD)["REACTION"][0], CreateDict(MD)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(RPE)["NAME"][0], CreateDict(RPE)["REACTION"][0], CreateDict(RPE)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(RPI)["NAME"][0], CreateDict(RPI)["REACTION"][0], CreateDict(RPI)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(TK)["NAME"][0], CreateDict(TK)["REACTION"][0], CreateDict(TK)["ENTRY"][0][3:15].replace(" ","")),
               (CreateDict(TA)["NAME"][0], CreateDict(TA)["REACTION"][0], CreateDict(TA)["ENTRY"][0][3:15].replace(" ",""))
              ]

c.executemany('INSERT INTO enzyme VALUES (?,?,?)', enzyme_data)

conn.commit()

c.execute("SELECT * FROM enzyme;")
print(c.fetchall())

OperationalError: no such table: gene

In [None]:
from Bio import Entrez
Entrez.email = 'naser.abd@berkeley.edu'
TCA = Entrez.esearch(db="pubmed", retmax=10, term="homo sapiens [ORGN] BRCA1", sort="relevance", idtype="acc")
TCArecord = Entrez.read(TCA)
TCA.close()
TCArecord

In [None]:
import sqlite3
conn = sqlite3.connect('my.db')
c = conn.cursor()

c.execute("""DROP TABLE genes;""")

c.execute("""CREATE TABLE genes (id INT,
                                 name TEXT,
                                 description TEXT,
                                 chromosome TEXT,
                                 start INT,
                                 end INT,
                                 strand VARCHAR(1));""")

c.execute("""INSERT INTO genes (id,
                                name,
                                description,
                                chromosome,
                                start,
                                end,
                                strand)
                        VALUES (58341,
                                'BRCA1',
                                'Breast Cancer 1',
                                'chr17',
                                43044295,
                                43170245,
                                '-');""")

conn.commit()

c.execute("SELECT * FROM genes;")
print(c.fetchone())

In [None]:
from Bio import Entrez
Entrez.email = 'naser.abd@berkeley.edu'
# db options: nucleotide, gene, protein, genome
handle = Entrez.esearch(db="nucleotide", retmax=3, term="homo sapiens[ORGN] BRCA1", sort="relevance", idtype="acc")

for i in Entrez.read(handle)['IdList']:
  handle = Entrez.efetch(db='nucleotide', id=i, rettype='fasta', retmode='text')
  print(handle.read())