In [5]:
import pandas as pd
import json

# 1. Load the data
# Using sep='\t' because your file snippet indicates tab-separation
input_file = 'Ecoli_glcn.fluxes.csv'
df = pd.read_csv(input_file, sep=',')
#df1.columns=["Reaction_ID", 'Flux']

# 2. Inspect and Clean Columns
# Based on your snippet, the file might have an unnamed index column.
# We need to identify the column with IDs and the column with Fluxes.
# We assume the column with strings is the ID and the one with floats is the Flux.

# If pandas read the first row as a header and it looks like "0", we might need to adjust.
# Let's ensure we grab the right data. 
# Renaming columns assuming index 0 is ID and index 1 is Flux for safety:
if len(df.columns) >= 2:
    # Adjust column names for clarity
    df.columns.values[0] = 'Reaction_ID' 
    df.columns.values[1] = 'Flux'
else:
    print("Error: The file does not have enough columns.")

# 3. CRITICAL: Fix Reaction IDs for Escher
# Escher maps usually do NOT use the "R_" prefix (e.g., "EX_glc__D_e" instead of "R_EX_glc__D_e")
# We remove "R_" from the start of the string if it exists.
df['Reaction_ID'] = df['Reaction_ID'].astype(str).str.replace('^R_', '', regex=True)

In [3]:
df

Unnamed: 0,Reaction_ID,Flux
0,EX_cm_e,-6.775736e-11
1,EX_cmp_e,-5.002221e-11
2,EX_co2_e,4.378669e+00
3,EX_cobalt2_e,-1.560000e-05
4,DM_4crsol_c,1.449501e-04
...,...,...
2578,RNDR4,7.385324e-09
2579,RNDR4b,7.401241e-09
2580,RNTR1c2,1.612324e-02
2581,RNTR2c2,1.664779e-02


In [6]:
# 4. Convert to Escher Dictionary Format
# Escher expects a dictionary: { "Reaction_ID": Flux_Value }
escher_data = df.set_index('Reaction_ID')['Flux'].to_dict()

# 5. Save as JSON
output_file = 'escher_fluxes.json'
with open(output_file, 'w') as f:
    json.dump(escher_data, f)

print(f"Success! Data saved to {output_file}")
print("--- Preview of data for Escher ---")
# Print first 5 items to verify IDs look correct (no 'R_' prefix)
print(dict(list(escher_data.items())[:5]))

Success! Data saved to escher_fluxes.json
--- Preview of data for Escher ---
{'EX_cm_e': -6.798472895752639e-11, 'EX_cmp_e': -5.0022208597511047e-11, 'EX_co2_e': 5.597199118883736, 'EX_cobalt2_e': -1.4160000091578697e-05, 'DM_4crsol_c': 0.0001315700819759}


In [7]:
df

Unnamed: 0,Reaction_ID,Flux
0,EX_cm_e,-6.798473e-11
1,EX_cmp_e,-5.002221e-11
2,EX_co2_e,5.597199e+00
3,EX_cobalt2_e,-1.416000e-05
4,DM_4crsol_c,1.315701e-04
...,...,...
2578,RNDR4,6.425741e-03
2579,RNDR4b,8.209188e-03
2580,RNTR1c2,1.424155e-08
2581,RNTR2c2,1.367630e-08
