# # retrieveBRENDA
# Acces the web client and retrieves all EC data from BRENDA. Creates files with BRENDA output for all organisms and EC numbers for which there is data.

In [2]:
import cobra
import sys
sys.path.append(r'./code/GECKO/')
from GECKO_function import *

In [3]:
#INPUTS:
#1) Path in which you wish to store all BRENDA queries:
output_path = '../data/Brenda/'
#2) Last field processed (if the program was interrupted), e.g. 'KM'. If you
#   want to start from scratch, leave empty:
last_field = ''
#3) Last EC number processed (if the program was interrupted), e.g. '1.2.3.4'.
#   If you want to start from scratch, leave empty:
last_EC = ''
#4) E-mail in BRENDA:
email = 'mao_zt@tib.cas.cn'
#5) Password in BRENDA:
password = 'jxphmzt258'
#123360
################################################################################

In [5]:
import os
import string
import hashlib
from SOAPpy import SOAPProxy ## for usage without WSDL file
import time

if not os.path.exists(output_path):
    os.makedirs(output_path)  

#20200909
#endpointURL = 'http://www.brenda-enzymes.org/soap/brenda_server.php'
endpointURL = 'https://www.brenda-enzymes.org/soap/brenda_server.php'
client      = SOAPProxy(endpointURL)
password    = hashlib.sha256(password).hexdigest()
credentials = email + ',' + password

#Information to retrieve: km, M.W., pathway, sequence, specific activity and kcat.
fields = ['KM','MW','PATH','SA','KCAT']

#Loop that retrieves all fields. Starts by the last one queried:
start = 0
for field in fields:
    if not start and (field == last_field or last_field == ''):
        start = 1
    if start:
        retrieveBRENDA(field,last_EC,output_path)


# # createECfiles
# Reads all data in kinetic_data and creates all EC files.

In [4]:
import sys
sys.path.append(r'../code/')
from GECKO_function import *

In [13]:
#INPUTS:
#1) Path in which all BRENDA queries are (from script retrieveBRENDA.py):
input_path = '../data/Brenda/'
#2) Path in which you wish to store all EC files:
output_path = '../data/Brenda_EC/'

if not os.path.exists(output_path):
    os.makedirs(output_path)  

createECfiles(input_path,output_path)

cesfully constructed EC5.3.3.12 file.
Succesfully constructed EC5.3.3.13 file.
Succesfully constructed EC5.3.3.14 file.
Succesfully constructed EC5.3.3.16 file.
Succesfully constructed EC5.3.3.17 file.
Succesfully constructed EC5.3.3.18 file.
Succesfully constructed EC5.3.3.19 file.
Succesfully constructed EC5.3.3.1 file.
Succesfully constructed EC5.3.3.21 file.
Succesfully constructed EC5.3.3.2 file.
Succesfully constructed EC5.3.3.3 file.
Succesfully constructed EC5.3.3.4 file.
Succesfully constructed EC5.3.3.5 file.
Succesfully constructed EC5.3.3.6 file.
Succesfully constructed EC5.3.3.7 file.
Succesfully constructed EC5.3.3.8 file.
Succesfully constructed EC5.3.3.9 file.
Succesfully constructed EC5.3.3.B2 file.
Succesfully constructed EC5.3.3.M5 file.
Succesfully constructed EC5.3.4.1 file.
Succesfully constructed EC5.3.99.10 file.
Succesfully constructed EC5.3.99.11 file.
Succesfully constructed EC5.3.99.2 file.
Succesfully constructed EC5.3.99.3 file.
Succesfully constructed EC5

# # findMaxKcats
# Reads all EC files and finds the max value for each substrate for the chosen
# microorganism on the different enzymatic parameters [Kcat, KM, SA, MW]. 
# For each parameter Writes a table with the following columns:

In [1]:
import sys
import os
import csv
import numpy

sys.path.append(r'../code/')
from GECKO_function import *

In [2]:
#INPUTS:
#1) Enzymatic parameters
features_list = ['KCAT','SA', 'MW']
#2) Path in which the EC files are stored (from script createECfiles.py):
input_path = '../data/Brenda_EC/'
#3) Path in which you wish to store the final table:
output_path = '../data/Brenda_EC_max/'

In [3]:
if not os.path.exists(output_path):
    os.makedirs(output_path)  
    
dir_files = os.listdir(input_path)
dir_files.sort()
organism_list,taxonomy,organism_code = orgs_list(input_path,dir_files)

for feature_name in features_list:
    output = ''
    for ec in dir_files:
        ec_number  = ec[0:len(ec)-4]
        fid       = open(input_path+ec,'r')
        csv_fid   = csv.reader(fid,delimiter='\t')
        #Builds a string with all the information in the EC file
        data_string, ec_pathways = EC_string(csv_fid, organism_list, organism_code, taxonomy, feature_name)
        fid.close()
        substrates,org_strings,max_values = sub_max_std(data_string)
        for sub in substrates:
            i = substrates.index(sub)
            for org in org_strings[i]:
                j = org_strings[i].index(org)
                output = output+ec_number+'\t'+sub+'\t'+org+'\t'+str(max_values[i][j])+'\t'+ ec_pathways+'\n'
        print 'Processed file ' + ec + ' ' + feature_name
    #Write output:
    fid  = open(output_path+'max_' + feature_name + '.txt','w')
    fid.write(output)
    fid.close()

1.txt MW
Processed file EC5.1.2.2.txt MW
Processed file EC5.1.2.3.txt MW
Processed file EC5.1.2.4.txt MW
Processed file EC5.1.2.5.txt MW
Processed file EC5.1.2.7.txt MW
Processed file EC5.1.2.M1.txt MW
Processed file EC5.1.3.1.txt MW
Processed file EC5.1.3.10.txt MW
Processed file EC5.1.3.11.txt MW
Processed file EC5.1.3.12.txt MW
Processed file EC5.1.3.13.txt MW
Processed file EC5.1.3.14.txt MW
Processed file EC5.1.3.15.txt MW
Processed file EC5.1.3.16.txt MW
Processed file EC5.1.3.17.txt MW
Processed file EC5.1.3.18.txt MW
Processed file EC5.1.3.19.txt MW
Processed file EC5.1.3.2.txt MW
Processed file EC5.1.3.20.txt MW
Processed file EC5.1.3.21.txt MW
Processed file EC5.1.3.22.txt MW
Processed file EC5.1.3.23.txt MW
Processed file EC5.1.3.24.txt MW
Processed file EC5.1.3.25.txt MW
Processed file EC5.1.3.26.txt MW
Processed file EC5.1.3.27.txt MW
Processed file EC5.1.3.28.txt MW
Processed file EC5.1.3.29.txt MW
Processed file EC5.1.3.3.txt MW
Processed file EC5.1.3.30.txt MW
Processed


# # retrieveKEGG
# Access the KEGG API and retrieves all data available for each protein-coding
# gene of the "n" organisms specified. Creates a file for each succesful query.


In [None]:
#INPUTS:
#1) Organism KEGG codes (as many as you want). Full list at:
#   http://rest.kegg.jp/list/organism
organism_codes = ['sce']#['eco']
#2) Path for storing all generated files:
output_path = './data/KEGG/'
#3) Last organism processed (if the program was interrupted)
#   Starting form scratch?, leave empty:
last_organism = ''
#4) Last gene entry processed (if the program was interrupted),
#   Starting form scratch?, leave empty:
last_entry = ''

In [None]:
#Get current path:
import os
prev_path = os.getcwd()

#Remove organisms already queried from the list
if last_organism!='':
    organism_codes=organism_codes[organism_codes.index(last_organism):]

#extensible library for opening URLs
import urllib2
#Main loop: retrieves all genes found for every organism
for organism in organism_codes:
    
    #Creates (if not present) a subfolder for the organism inside the
    #specified output path
    org_path = output_path + '/' + organism
    if not os.path.exists(org_path):
        os.makedirs(org_path)
    #access to the created organism subfolder
    os.chdir(org_path)

    #gets and creates files for all the gene entries found for the organism
    organism_genes=retrieve_org_genesData(organism, last_entry)

os.chdir(prev_path)