# Gets list of candidate reactions for overexpression
## Written by: Wheaton Schroeder
### Latest version: 08/30/2023

#### make necessary imports

In [13]:
import pandas as pd
import cobra
import re
import shutil

#### Set run parameters

In [14]:
harvest = "H3"
drought_file = "./iPotri2999D.xml"

#### read in the FVA results

In [15]:
#read in the drought FVA result
drought_df = pd.read_table('./gams_files/FVA_result_'+harvest+'.txt')

#get list of reactions in the drought
drought_rxn_list = drought_df.rxn.to_list()

#clean up by removing the unnecessary spaces
for rxn in drought_rxn_list:

  #cleaned reaction
  cleaned_rxn = rxn.replace(" ","")

  drought_df.loc[drought_rxn_list.index(rxn),'rxn'] = cleaned_rxn
  drought_rxn_list[drought_rxn_list.index(rxn)] = cleaned_rxn

#set the reaction as the index
drought_df = drought_df.set_index('rxn')

#for debugging
drought_df

Unnamed: 0_level_0,LB,min,max,UB,ModelStat,SolveStat
rxn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RXN_UDPREDUCT_RXN_p__L___LIGHT,-10000.0,0.000000,0.000000,0.0,1.0,1.0
RXN_E2C_mg2__L___LIGHT,-10000.0,0.000000,0.000000,10000.0,1.0,1.0
RXN_ICOSANOYL_COA_SYNTHASE_RXN_er__L___LIGHT,-10000.0,0.000000,0.000000,10000.0,1.0,1.0
RXN_GPH_RXN_er__L___LIGHT,0.0,0.000000,14.785262,10000.0,1.0,1.0
RXN_RXN_15816__L___LIGHT,-10000.0,0.000000,146.804820,10000.0,1.0,1.0
...,...,...,...,...,...,...
SK_MET_carbon_dioxide_c__S___LIGHT,-1000.0,-0.574867,-0.574850,0.0,1.0,1.0
DM_MET_starch_p__L___LIGHT,0.0,0.351127,0.351149,1000.0,1.0,1.0
DM_MET_starch_p__S___LIGHT,0.0,0.176562,0.176577,1000.0,1.0,1.0
SK_MET_starch_p__L___DARK,-1000.0,-0.351149,-0.351127,0.0,1.0,1.0


In [16]:
cont_df = pd.read_table('./gams_files/FVA_result_C.txt')

#get list of reactions in the control
cont_rxn_list = cont_df.rxn.to_list()

#clean up by removing the unnecessary spaces
for rxn in cont_rxn_list:

  #cleaned reaction
  cleaned_rxn = rxn.replace(" ","")

  cont_df.loc[cont_rxn_list.index(rxn),'rxn'] = cleaned_rxn
  cont_rxn_list[cont_rxn_list.index(rxn)] = cleaned_rxn

#set the reaction as the index
cont_df = cont_df.set_index('rxn')

#for debugging
cont_df

Unnamed: 0_level_0,LB,min,max,UB,ModelStat,SolveStat
rxn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RXN_UDPREDUCT_RXN_p__L___LIGHT,-10000.0,0.000000,0.000000,0.0,1.0,1.0
RXN_E2C_mg2__L___LIGHT,-10000.0,0.000000,0.000000,10000.0,1.0,1.0
RXN_RXN_11755_er__L___LIGHT,0.0,0.000000,178.893509,10000.0,1.0,1.0
RXN_ICOSANOYL_COA_SYNTHASE_RXN_er__L___LIGHT,-10000.0,0.000000,0.000000,10000.0,1.0,1.0
RXN_GPH_RXN_er__L___LIGHT,0.0,0.000000,18.181554,10000.0,1.0,1.0
...,...,...,...,...,...,...
SK_MET_carbon_dioxide_c__S___LIGHT,-1000.0,-1.141754,-1.141753,0.0,1.0,1.0
DM_MET_starch_p__L___LIGHT,0.0,0.703488,0.703488,1000.0,1.0,1.0
DM_MET_starch_p__S___LIGHT,0.0,0.354921,0.354921,1000.0,1.0,1.0
SK_MET_starch_p__L___DARK,-1000.0,-0.703488,-0.703488,0.0,1.0,1.0


#### Read in reactions that are downregulated in the leaf

##### leaf

In [17]:
#read the list of genes downregulated in the leaf
down_leaf_f = open("./DGE_analysis/downRegulated_LEAF.txt")

#initialize a list for them
down_leaf = list()

#read the file into the list
for gene in down_leaf_f:

  #remove newline characters
  gene_temp = gene.replace("\n","")

  #remove unnecessary spaces
  gene_temp = gene_temp.replace(" ","")
  
  #add to the 
  down_leaf.append(gene_temp)

##### root

In [18]:
#read the list of genes downregulated in the root
down_root_f = open("./DGE_analysis/downRegulated_ROOT.txt")

#initialize a list for them
down_root = list()

#read the file into the list
for gene in down_root_f:

  #remove newline characters
  gene_temp = gene.replace("\n","")

  #remove unnecessary spaces
  gene_temp = gene_temp.replace(" ","")
  
  #add to the 
  down_root.append(gene_temp)

##### stem

In [19]:
#read the list of genes downregulated in the root
down_stem_f = open("./DGE_analysis/downRegulated_STEM.txt")

#initialize a list for them
down_stem = list()

#read the file into the list
for gene in down_stem_f:

  #remove newline characters
  gene_temp = gene.replace("\n","")

  #remove unnecessary spaces
  gene_temp = gene_temp.replace(" ","")
  
  #add to the 
  down_stem.append(gene_temp)

#### Read in upregulated gene sets

##### Leaf

In [20]:
#read the list of genes downregulated in the leaf
up_leaf_f = open("./DGE_analysis/upregulated_LEAF.txt")

#initialize a list for them
up_leaf = list()

#read the file into the list
for gene in up_leaf_f:

  #remove newline characters
  gene_temp = gene.replace("\n","")

  #remove unnecessary spaces
  gene_temp = gene_temp.replace(" ","")
  
  #add to the 
  up_leaf.append(gene_temp)

##### Root

In [21]:
#read the list of genes downregulated in the root
up_root_f = open("./DGE_analysis/upregulated_ROOT.txt")

#initialize a list for them
up_root = list()

#read the file into the list
for gene in up_root_f:

  #remove newline characters
  gene_temp = gene.replace("\n","")

  #remove unnecessary spaces
  gene_temp = gene_temp.replace(" ","")
  
  #add to the 
  up_root.append(gene_temp)

##### Stem

In [22]:
#read the list of genes downregulated in the root
up_stem_f = open("./DGE_analysis/upregulated_STEM.txt")

#initialize a list for them
up_stem = list()

#read the file into the list
for gene in up_stem_f:

  #remove newline characters
  gene_temp = gene.replace("\n","")

  #remove unnecessary spaces
  gene_temp = gene_temp.replace(" ","")
  
  #add to the 
  up_stem.append(gene_temp)

#### Read in the drought model for GPR

In [23]:
drought_model = cobra.io.read_sbml_model(drought_file)

print("success")

No objective coefficients in model. Unclear what should be optimized


success


#### identify candidates, report on results simultaneously

In [24]:
#open file to write candidates to
kd_cand_f = open("./kd_results/kd_candidates_"+harvest+".txt","w")

kd_cand_f.write("/")

#since we only want reactions in drought and control, iterating over a single one is OK
for rxn in cont_rxn_list:
    
    #check if in drought, do nothing it it is not
    if rxn in drought_rxn_list:

      print("\nanalyzing reaction ",rxn)
        
      #at this point, we know the reaction is in both drought and control
      #get min and max values in control and drought
      fva_min_d = drought_df.loc[rxn,'min']
      fva_max_d = drought_df.loc[rxn,'max']
      fva_min_c = cont_df.loc[rxn,'min']
      fva_max_c = cont_df.loc[rxn,'max']

      print("min_d: ",str(fva_min_d),"\tmax_d: ",str(fva_max_d),"min_c: ",str(fva_min_c),"\tmax_c: ",str(fva_max_c))

      #get the maximum absolute values for drought and control
      max_abs_d = max(abs(fva_min_d),abs(fva_max_d))
      max_abs_c = max(abs(fva_min_c),abs(fva_max_c))

      #do math to make the comparison
      comparison = max_abs_d - max_abs_c

      #ensure comparison is significant in tolerance
      if(comparison >= 1E-7):

        #for debugging
        print("increased flux space magnitude in drought: ",rxn,"\tdrought max abs: ",max_abs_d,"\tcontrol max abs: ",max_abs_c)

        #determine if it is downregulated in the DGE analysis
        downregulated = False

        #next, check GPR, see if downregulated in the tissue where the change in flux is present
        match = re.search(r'__(\w)___(LIGHT|DARK)', rxn)

        try:

          tissue = match.group(1)

          #get set of associated genes
          genes = drought_model.reactions[drought_model.reactions.index(rxn)].genes

          #get reaction tissue

          #for each gene
          for gene in genes:

            print(gene.id)

            #from the tissue, search if that is downregulated in that tissue
            if tissue == "S":

              if gene.id in down_stem:

                print("gene "+gene.id+" is downregulated in the stem!")

                downregulated = True

              elif gene.id in up_stem:

                print("gene "+gene.id+" is upregulated in the stem!")

              else:

                print("gene "+gene.id+" is not siginficantly differently regulated in the stem")

            elif tissue == "R":

              if gene.id in down_root:

                print("gene "+gene.id+" is downregulated in the root!")

                downregulated = True

              elif gene.id in up_stem:

                print("gene "+gene.id+" is upregulated in the root!")

              else:

                print("gene "+gene.id+" is not siginficantly differently regulated in the root")

            elif tissue == "L":

              if gene.id in down_root:

                print("gene "+gene.id+" is downregulated in the leaf!")

                downregulated = True

              elif gene.id in up_stem:

                print("gene "+gene.id+" is upregulated in the leaf!")

              else:

                print("gene "+gene.id+" is not siginficantly differently regulated in the leaf")

          #if here then the reaction a cadidate
          if downregulated:

            kd_cand_f.write("\n\""+rxn+"\"")

        except:

          print("need to analyze ",rxn," manually!!!")

kd_cand_f.write("\n/")

kd_cand_f.close()

#copy the file into the gams folder
shutil.copyfile("./kd_results/kd_candidates_"+harvest+".txt","./gams_files/kd_candidates_"+harvest+".txt")


analyzing reaction  RXN_UDPREDUCT_RXN_p__L___LIGHT
min_d:  0.0 	max_d:  0.0 min_c:  0.0 	max_c:  0.0

analyzing reaction  RXN_E2C_mg2__L___LIGHT
min_d:  0.0 	max_d:  0.0 min_c:  0.0 	max_c:  0.0

analyzing reaction  RXN_ICOSANOYL_COA_SYNTHASE_RXN_er__L___LIGHT
min_d:  0.0 	max_d:  0.0 min_c:  0.0 	max_c:  0.0

analyzing reaction  RXN_GPH_RXN_er__L___LIGHT
min_d:  0.0 	max_d:  14.78526183 min_c:  0.0 	max_c:  18.18155443

analyzing reaction  RXN_RXN_15816__L___LIGHT
min_d:  0.0 	max_d:  146.80481991 min_c:  0.0 	max_c:  130.67464335
increased flux space magnitude in drought:  RXN_RXN_15816__L___LIGHT 	drought max abs:  146.80481991 	control max abs:  130.67464335
Potri.001G191100
gene Potri.001G191100 is not siginficantly differently regulated in the leaf
Potri.017G092400
gene Potri.017G092400 is not siginficantly differently regulated in the leaf
Potri.004G118900
gene Potri.004G118900 is not siginficantly differently regulated in the leaf
Potri.003G162200
gene Potri.003G162200 is not 

'./gams_files/kd_candidates_H3.txt'