In [1]:
# Pay attention the split symbol of the file name and sep symbol when the csv data is separated by , or ;
csv_folder_path = './carbon_source'
split_symbol = '_R'
sep_symbol = ';'

csv_folder_path = './csv'
split_symbol = '.'
sep_symbol = ','


In [2]:
def smooth(a,WSZ=5):
    # a: NumPy 1-D array containing the data to be smoothed
    # WSZ: smoothing window size needs, which must be odd number,
    # as in the original MATLAB implementation
    out0 = np.convolve(a,np.ones(WSZ,dtype=int),'valid')/WSZ    
    r = np.arange(1,WSZ-1,2)

    start = np.cumsum(a[:WSZ-1])[::2]/r
    #print start,out0
    stop = (np.cumsum(a[:-WSZ:-1])[::2]/r)[::-1]
    return np.concatenate((  start , out0, stop  ))

def digitalOnly(s):
    import string
    al=string.maketrans('','')
    nodigs=al.translate(al, string.digits)
    return s.translate(al, nodigs)

def getLinearRegressionCoef(X,y):
    import numpy as np
    from sklearn.linear_model import LinearRegression
    reg = LinearRegression().fit(X, y)
    reg.score(X, y)    
    return reg.coef_


def computeDerivetive_od(t, sOD, wsz = 5):
    from numpy import isnan
    coef_OD = np.zeros(len(sOD))
    t_mean = np.zeros(len(sOD))
    j = 0
    nan_found = False
    for i in range(len(sOD)):
        t_wsz = t[i-wsz/2:i+wsz/2+1]        
        sOD_wsz = np.expand_dims(np.log(sOD[i-wsz/2:i+wsz/2+1]),axis=1)    
        if len(sOD_wsz)!= wsz:
            coef_OD[i] = 0
        else:                
            if len(t_wsz)==len(sOD_wsz):                
                where_are_NaNs = isnan(sOD_wsz)
                if any(where_are_NaNs):
                    sOD_wsz[where_are_NaNs] = np.mean(sOD_wsz[~where_are_NaNs])
                    print "nan Found in array, replaced by mean value of the rest",i
                coef_OD[i]=getLinearRegressionCoef(t_wsz,sOD_wsz)
                t_mean[i] = np.mean(t_wsz)
    
    dt = t_mean[wsz/2:len(t_mean)-wsz/2]
    dt_pad = np.pad(dt,[wsz/2,wsz/2],'reflect',reflect_type='odd')
    
    return coef_OD, dt_pad


def draw_tangent(x1,y1,ind,e=0.1,plot=False):
    from scipy import interpolate
    import numpy as np
    import matplotlib.pyplot as plt
    x = np.squeeze(x1)
    y = np.squeeze(y1)
    # interpolate the data with a spline
    spl = interpolate.splrep(x,y)
    a = x[ind]
    fa = interpolate.splev(a,spl,der=0)     # f(a)
    fprime = interpolate.splev(a,spl,der=1) # f'(a)
    left = 0
    for i in range(ind,0,-1):        
        a_i = x[i]
        y_i = y[i]        
        tan_i = fa+fprime*(a_i-a) # tangent                
        if abs(tan_i-y_i) > abs(y[ind]*e):
            left = i
            break    
    right = len(x) - 1
    for i in range(ind+1,len(x)):        
        a_i = x[i]
        y_i = y[i]
        tan_i = fa+fprime*(a_i-a) # tangent        
        if abs(tan_i-y_i) > abs(y[ind]*e):
            right = i
            break
    small_t = np.linspace(x[left],x[right],10)
    tan = fa+fprime*(small_t-a) # tangent    
    if plot:
        plt.plot(a,fa,'om',small_t,tan,'--r')
        plt.plot(x,y)
    return left, right,small_t,tan


def getTimepoints_old(coef_OD,sOD_log, dt_pad,start_ind, thres=0.03, thres_range=0.8):
    coef_OD_fromStart_ind = coef_OD[start_ind:]
    ind = np.argmax(coef_OD_fromStart_ind) + start_ind        
    max_v = coef_OD[ind]
    
    time_max_coefod = dt_pad[ind]        
    right_side_exp = len(coef_OD)-1
    left_side_exp = 0
    for k in range(ind+1,len(coef_OD)):
        v_od = coef_OD[k]
        if (max_v-v_od)/max_v > 1-thres_range:
            right_side_exp = k
            break

    for k in reversed(range(ind)):
        v_od = coef_OD[k]
        if (max_v-v_od)/max_v > 1-thres_range:
            left_side_exp = k
            break

    time_left = dt_pad[left_side_exp]
    time_right = dt_pad[right_side_exp]  
    
    return time_max_coefod,time_left,time_right,left_side_exp,right_side_exp

def getTimepoints(coef_OD,sOD_log,dt_pad,start_ind):
    coef_OD_fromStart_ind = coef_OD[start_ind:]
    ind = np.argmax(coef_OD_fromStart_ind) + start_ind        
    max_v = coef_OD[ind]
    time_max_coefod = dt_pad[ind]        
    left_side_exp, right_side_exp,small_t,tan = draw_tangent(dt_pad,sOD_log,ind)
    time_left = dt_pad[left_side_exp]
    time_right = dt_pad[right_side_exp]  
    return time_max_coefod,time_left,time_right,left_side_exp,right_side_exp,small_t,tan, ind


def getPostiveStart(sOD):
    start_positive = 0
    for o in range(start_positive, len(sOD)):
        if sOD[o] <= 0:
            start_positive = o + 1
    if start_positive >= len(sOD):
        start_positive = -1
    return start_positive

def mkdir_p(path):
    import errno    
    import os
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise
            
            
def getStationaryTime(sOD):
    sOD = np.asarray(sOD)
    sta_index = len(sOD)-1
    last_value = sOD[sta_index]
    for i in range(len(sOD)-2,0,-1):
        if (abs(last_value-sOD[i])/last_value)>0.05:
            sta_index = i
            #print "stationary phase found"
            break
    return sta_index

def mkdir_p(path):
    import errno    
    import os
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

In [44]:
import os
import pandas as pd

def getfiles(folderPath):
    pths = []
    for root, dirs, files in os.walk(folderPath):
        for fn in files:            
            if fn.endswith('.csv') and 'stats' not in fn:
                pth = os.path.join(root, fn)
                pths.append(pth)
    return pths

pths = getfiles(csv_folder_path)

for path in pths:
    fn = os.path.split(path)[1]
    folder = fn.split('.')[0]
    folder = os.path.join(csv_folder_path,folder)
    print folder
    mkdir_p(folder)
    # pay attention of the csv format, if sep = ',' or ";"
    data = pd.read_csv(path,sep=sep_symbol)    
    
    import numpy as np
    import matplotlib.pyplot as plt
    import os
    time = data.iloc[:,0]
    m_od = data.iloc[0:,1:]
    strains = list(m_od.columns)
    t = time.values

    t_k = (t[1:len(t)] - t[0:len(t)-1])/3600
    t_k = np.cumsum(t_k)
    t_k = np.insert(t_k,0,0)
    t_k = t_k.reshape(-1,1)
    t = t_k

    blank_od = 0.04
    pre_max_v = 0.7

    growth_rate_dict = dict()
    lag_phase_dict = dict()
    yield_dict = dict()
    for name in strains:
        if "Unnamed" in name:
            print "jump empty or unnamed strain"
            continue
        od = m_od[name].values
        od_array = od - np.min(blank_od)
        od_array = np.squeeze(np.asarray(od_array))
        sOD = smooth(od_array)    
        
        start_positive = getPostiveStart(sOD)
        t = t_k
        if start_positive > 0:
            sOD = sOD[start_positive:]
            od = od[start_positive:]
            t = t_k[start_positive:]
            print name,'found negative od ', start_positive
        
        
        sOD_log = np.log(sOD)            
        coef_OD, dt_pad = computeDerivetive_od(t, sOD)
        
        #try:
        #    coef_OD, dt_pad = computeDerivetive_od(t, sOD)
        #except:
        #    continue
            
        ind_max_dODdt = np.argmax(coef_OD)
        OD_exp = sOD[ind_max_dODdt]
        time_max_dODdt = dt_pad[ind_max_dODdt]
  
        lag_phase_pre = time_max_dODdt - np.log(OD_exp/sOD[start_positive])/pre_max_v
        m = t < lag_phase_pre
        start_ind = np.sum(m)

        time_max_od,time_left, time_right,left_side_exp,right_side_exp,small_t,tan,ind_max = getTimepoints(coef_OD,sOD_log,dt_pad,start_ind)

        time_max_dODdt = dt_pad[ind_max_dODdt]

        #od_max_coef = coef_OD[ind_max_dODdt]

        #ind = np.argmax(coef_OD)        
        max_v = coef_OD[ind_max]  
        #print "diffrence from estimate and real : ", ind, ind_max
        growth_rate = max_v

        
        
        # Update lag_phase using growth rate in an estimated window
        lag_phase_pre = time_max_dODdt - np.log(OD_exp/sOD[start_positive])/max_v
        lag_phase = lag_phase_pre

        sta_index = getStationaryTime(sOD)
        t_sta = dt_pad[sta_index]

        yield_mean = np.mean(sOD[sta_index:])
        print name+' yield'+"="+str(yield_mean)
        print name+' growth_rate'+"="+str(growth_rate)
        print "---------------------------------------"

        x = t
        y = sOD_log
        plt.plot(x,y,label='log(OD)')
        plt.plot(x,y,small_t,tan,'--r',label='log(OD)')
        plt.title(name)
        plt.xlabel("time")
        w = coef_OD
        plt.plot(x, w,label='dOD/dt/OD') # can c                
        plt.axvline(x=time_max_od,linestyle=':',color='orange')
        plt.legend()
        plt.savefig(os.path.join(folder,name +'-dODdtOD.png'))
        plt.close()   

        plt.plot(x,sOD,label='OD')    
        plt.axvline(x=time_left,linestyle=':',color='red')
        plt.axvline(x=time_max_od,linestyle=':',color='green')
        plt.axvline(x=t_sta,linestyle=':',color='yellow')
        plt.axvline(x=time_right,linestyle=':',color='red')
        plt.axvline(x=lag_phase,linestyle=':',color='purple')    
        plt.xlabel('time')
        plt.title(name)        
        plt.legend()
        plt.savefig(os.path.join(folder,name +'-OD.png'))
        plt.close()
        
        growth_rate_dict[name] = growth_rate
        lag_phase_dict[name] = lag_phase
        yield_dict[name] = yield_mean

    df = pd.DataFrame.from_dict([growth_rate_dict,lag_phase_dict, yield_dict])
    df = df.transpose()
    df2 = df.reindex(strains)
    df2.columns = ['Growth Rate', 'Lag Phase', 'yield']
    df2.to_csv(folder +'-stats.csv')
    

./csv/MG1655 14062019
alr yield=0.39226238095238103
alr growth_rate=0.6079346636233247
---------------------------------------
alr.1 yield=0.4148306666666667
alr.1 growth_rate=0.5885050663519565
---------------------------------------
alr.2 yield=0.40272864583333334
alr.2 growth_rate=0.5829030720469603
---------------------------------------
alr.3 yield=0.40975190476190476
alr.3 growth_rate=0.5663529978963175
---------------------------------------
alr.4 yield=0.4050523333333334
alr.4 growth_rate=0.566144137664822
---------------------------------------
alr.5 yield=0.40559493333333335
alr.5 growth_rate=0.5523075142934734
---------------------------------------
alr.6 yield=0.39551231746031745
alr.6 growth_rate=0.5466463040340803
---------------------------------------
alr.7 yield=0.41720654088050324
alr.7 growth_rate=0.5525209644312993
---------------------------------------
cbpM yield=0.3963681481481482
cbpM growth_rate=0.6595236644013542
---------------------------------------
cbpM.1 

ydcP.3 yield=0.4020963636363636
ydcP.3 growth_rate=0.5268058931393982
---------------------------------------
ydcP.4 yield=0.3815136805555556
ydcP.4 growth_rate=0.5254570448590031
---------------------------------------
ydcP.5 yield=0.3953481944444445
ydcP.5 growth_rate=0.5224372201077162
---------------------------------------
ydcP.6 yield=0.37746853932584273
ydcP.6 growth_rate=0.5134592208100583
---------------------------------------
ydcP.7 yield=0.39277181434599157
ydcP.7 growth_rate=0.4993664372500656
---------------------------------------
./csv/MG1655 02082019
uxaA yield=0.30367685714285714
uxaA growth_rate=0.5542799388096489
---------------------------------------
uxaA.1 yield=0.31956470085470084
uxaA.1 growth_rate=0.5444557957431314
---------------------------------------
uxaA.2 yield=0.3193667632850242
uxaA.2 growth_rate=0.5474790182856848
---------------------------------------
uxaA.3 yield=0.3300003980099503
uxaA.3 growth_rate=0.5550848564412892
----------------------------

uxaC.6 yield=0.3529005633802818
uxaC.6 growth_rate=0.5069955085560018
---------------------------------------
uxaC.7 yield=0.46412423076923087
uxaC.7 growth_rate=0.51656985566169
---------------------------------------
WT yield=0.3720386666666667
WT growth_rate=0.5221438151413547
---------------------------------------
WT.1 yield=0.33038768361581916
WT.1 growth_rate=0.4994210444985371
---------------------------------------
WT.2 yield=0.31589705128205126
WT.2 growth_rate=0.4968415370143071
---------------------------------------
WT.3 yield=0.3144204255319149
WT.3 growth_rate=0.48096634627596524
---------------------------------------
WT.4 yield=0.35363488372093027
WT.4 growth_rate=0.4910364511087066
---------------------------------------
WT.5 yield=0.3396099290780142
WT.5 growth_rate=0.4835846569189552
---------------------------------------
WT.6 yield=0.35088494623655914
WT.6 growth_rate=0.4815058648535623
---------------------------------------
WT.7 yield=0.3220820430107527
WT.7 gro

ybda.2 yield=0.3298207619047619
ybda.2 growth_rate=0.5332860765135229
---------------------------------------
ybda.3 yield=0.32218083333333336
ybda.3 growth_rate=0.5160101258321326
---------------------------------------
ybda.4 yield=0.32036375
ybda.4 growth_rate=0.5223006586733366
---------------------------------------
ybda.5 yield=0.3176093827160494
ybda.5 growth_rate=0.5050315757690912
---------------------------------------
ybda.6 yield=0.31862123456790126
ybda.6 growth_rate=0.5161900925061645
---------------------------------------
ybda.7 yield=0.3202027586206897
ybda.7 growth_rate=0.5043213790480287
---------------------------------------
ydcP yield=0.3606803571428571
ydcP growth_rate=0.4885273910358847
---------------------------------------
ydcP.1 yield=0.3565158490566038
ydcP.1 growth_rate=0.47660896798161084
---------------------------------------
ydcP.2 yield=0.354551975308642
ydcP.2 growth_rate=0.484305993269289
---------------------------------------
ydcP.3 yield=0.352055

WT.6 yield=0.26955405797101456
WT.6 growth_rate=0.47403184796884473
---------------------------------------
WT.7 yield=0.2680728787878788
WT.7 growth_rate=0.4661094598549042
---------------------------------------
ybdk yield=0.22478333333333325
ybdk growth_rate=0.47237192534572064
---------------------------------------
ybdk.1 yield=0.22234857142857142
ybdk.1 growth_rate=0.4666453175362432
---------------------------------------
ybdk.2 yield=0.22127813953488365
ybdk.2 growth_rate=0.46669654378855907
---------------------------------------
ybdk.3 yield=0.21187093333333334
ybdk.3 growth_rate=0.44352889908360377
---------------------------------------
ybdk.4 yield=0.21530506666666668
ybdk.4 growth_rate=0.43818165517029106
---------------------------------------
ybdk.5 yield=0.2101914074074074
ybdk.5 growth_rate=0.4324985697663764
---------------------------------------
ybdk.6 yield=0.21738046511627906
ybdk.6 growth_rate=0.4367547574429971
---------------------------------------
ybdk.7 yie

ykfI.1 yield=0.4
ykfI.1 growth_rate=0.5330140603339291
---------------------------------------
ykfI.2 yield=0.4156906666666667
ykfI.2 growth_rate=0.5238060002862175
---------------------------------------
ykfI.3 yield=0.4024613333333334
ykfI.3 growth_rate=0.5061999283146882
---------------------------------------
ykfI.4 yield=0.4038300000000001
ykfI.4 growth_rate=0.5184875280053562
---------------------------------------
ykfI.5 yield=0.4043722222222222
ykfI.5 growth_rate=0.5092463205904836
---------------------------------------
ykfI.6 yield=0.4082077777777778
ykfI.6 growth_rate=0.502212259168844
---------------------------------------
ykfI.7 yield=0.4079693333333333
ykfI.7 growth_rate=0.5127255441058752
---------------------------------------
ytfF yield=0.39364571428571427
ytfF growth_rate=0.5416209567453428
---------------------------------------
ytfF.1 yield=0.39775333333333335
ytfF.1 growth_rate=0.5365920453734169
---------------------------------------
ytfF.2 yield=0.3988026666666

appA.4 yield=0.08481555555555557
appA.4 growth_rate=0.5319318683653994
---------------------------------------
appA.5 yield=0.135625
appA.5 growth_rate=0.4898860782740515
---------------------------------------
appA.6 yield=0.04281111111111111
appA.6 growth_rate=0.4906799007523451
---------------------------------------
appA.7 yield=0.04298222222222222
appA.7 growth_rate=0.47721411466933567
---------------------------------------
yecP yield=0.35734831275720164
yecP growth_rate=0.5322173253650642
---------------------------------------
yecP.1 yield=0.34047520325203257
yecP.1 growth_rate=0.5222530675332917
---------------------------------------
yecP.2 yield=0.34247283333333334
yecP.2 growth_rate=0.5227065659338088
---------------------------------------
yecP.3 yield=0.33473261261261267
yecP.3 growth_rate=0.5163385914874793
---------------------------------------
yecP.4 yield=0.3312457264957265
yecP.4 growth_rate=0.5053556740117031
---------------------------------------
yecP.5 yield=0.3

rsd.7 yield=0.4134214814814815
rsd.7 growth_rate=0.5595464686971139
---------------------------------------
uxaB yield=0.3948123280423281
uxaB growth_rate=0.6159525596775004
---------------------------------------
uxaB.1 yield=0.40042533333333347
uxaB.1 growth_rate=0.6098366247730198
---------------------------------------
uxaB.2 yield=0.4059697478991597
uxaB.2 growth_rate=0.5920897686706592
---------------------------------------
uxaB.3 yield=0.40719533333333335
uxaB.3 growth_rate=0.595680085614322
---------------------------------------
uxaB.4 yield=0.400034245014245
uxaB.4 growth_rate=0.5892825545043172
---------------------------------------
uxaB.5 yield=0.4019557357357358
uxaB.5 growth_rate=0.5822225525008367
---------------------------------------
uxaB.6 yield=0.41630416666666675
uxaB.6 growth_rate=0.5829509228460802
---------------------------------------
uxaB.7 yield=0.4076661728395062
uxaB.7 growth_rate=0.5834684174448204
---------------------------------------
WT yield=0.4136

yagH.2 yield=0.23205960784313726
yagH.2 growth_rate=0.4723169430292765
---------------------------------------
yagH.3 yield=0.22570108843537412
yagH.3 growth_rate=0.4701337892908436
---------------------------------------
yagH.4 yield=0.2279425333333333
yagH.4 growth_rate=0.4814571812726146
---------------------------------------
yagH.5 yield=0.23258541666666666
yagH.5 growth_rate=0.47883944557217006
---------------------------------------
yagH.6 yield=0.2327966013071895
yagH.6 growth_rate=0.46471994932110566
---------------------------------------
yagH.7 yield=0.22643908496732026
yagH.7 growth_rate=0.46840580929035563
---------------------------------------
yafX yield=0.2444077333333333
yafX growth_rate=0.2932659021631272
---------------------------------------
yafX.1 yield=0.2475185964912281
yafX.1 growth_rate=0.30477945683796226
---------------------------------------
yafX.2 yield=0.24892202898550722
yafX.2 growth_rate=0.30432106481637566
---------------------------------------
yafX

osmy.5 yield=0.3105116129032257
osmy.5 growth_rate=0.4711933931474711
---------------------------------------
osmy.6 yield=0.3137371717171717
osmy.6 growth_rate=0.4685273070965083
---------------------------------------
osmy.7 yield=0.30824183908045977
osmy.7 growth_rate=0.44381133669059114
---------------------------------------
Rph yield=0.309426274509804
Rph growth_rate=0.6479456692392739
---------------------------------------
Rph.1 yield=0.3049247311827957
Rph.1 growth_rate=0.6422596508893924
---------------------------------------
Rph.2 yield=0.3128862626262626
Rph.2 growth_rate=0.6434216269788188
---------------------------------------
Rph.3 yield=0.2982059047619048
Rph.3 growth_rate=0.63203653357447
---------------------------------------
Rph.4 yield=0.29841737373737376
Rph.4 growth_rate=0.6317663406389056
---------------------------------------
Rph.5 yield=0.29821904761904755
Rph.5 growth_rate=0.6273498937411354
---------------------------------------
Rph.6 yield=0.30554323232

glpX yield=0.3309274074074074
glpX growth_rate=0.545893572628746
---------------------------------------
glpX.1 yield=0.31513927927927926
glpX.1 growth_rate=0.552403643289282
---------------------------------------
glpX.2 yield=0.3254923529411765
glpX.2 growth_rate=0.5445721999439099
---------------------------------------
glpX.3 yield=0.3218211764705882
glpX.3 growth_rate=0.544208742337336
---------------------------------------
glpX.4 yield=0.3221458823529412
glpX.4 growth_rate=0.5399762501449993
---------------------------------------
glpX.5 yield=0.31710606060606067
glpX.5 growth_rate=0.560701649502294
---------------------------------------
glpX.6 yield=0.32622747474747477
glpX.6 growth_rate=0.5412904398768941
---------------------------------------
glpX.7 yield=0.34137714285714293
glpX.7 growth_rate=0.5456346134889728
---------------------------------------
fkpA yield=0.35896325925925926
fkpA growth_rate=0.41372286795653174
---------------------------------------
fkpA.1 yield=0.3

mprA.3 yield=0.24970202898550722
mprA.3 growth_rate=0.5713860800627862
---------------------------------------
mprA.4 yield=0.25276454545454546
mprA.4 growth_rate=0.5597246632624381
---------------------------------------
mprA.5 yield=0.2510455555555556
mprA.5 growth_rate=0.5654481664831787
---------------------------------------
mprA.6 yield=0.2496237908496732
mprA.6 growth_rate=0.5680430387483155
---------------------------------------
mprA.7 yield=0.25466000000000005
mprA.7 growth_rate=0.5575190669745319
---------------------------------------
narD yield=0.2738385777777777
narD growth_rate=0.5272843349682435
---------------------------------------
narD.1 yield=0.2692609950248757
narD.1 growth_rate=0.5091353768163697
---------------------------------------
narD.2 yield=0.2703616425120774
narD.2 growth_rate=0.5164772699733048
---------------------------------------
narD.3 yield=0.26340722222222224
narD.3 growth_rate=0.5028594810986713
---------------------------------------
narD.4 yie

ygjP.6 yield=0.26688428571428574
ygjP.6 growth_rate=0.6581614789178077
---------------------------------------
ygjP.7 yield=0.19565229629629632
ygjP.7 growth_rate=0.4460894128617693
---------------------------------------
yigZ yield=0.2643344827586207
yigZ growth_rate=0.5267828664354833
---------------------------------------
yigZ.1 yield=0.25189172413793104
yigZ.1 growth_rate=0.5223617209876122
---------------------------------------
yigZ.2 yield=0.2569994047619048
yigZ.2 growth_rate=0.47662638774765204
---------------------------------------
yigZ.3 yield=0.2602273684210527
yigZ.3 growth_rate=0.5124943904650203
---------------------------------------
yigZ.4 yield=0.2594886419753087
yigZ.4 growth_rate=0.5067668173624531
---------------------------------------
yigZ.5 yield=0.26356333333333337
yigZ.5 growth_rate=0.5071075750192275
---------------------------------------
yigZ.6 yield=0.26028266666666666
yigZ.6 growth_rate=0.5042758847412973
---------------------------------------
yigZ.7 y

treC.5 yield=0.3178107407407407
treC.5 growth_rate=0.46358022217064343
---------------------------------------
treC.6 yield=0.3342439285714286
treC.6 growth_rate=0.47311223963393445
---------------------------------------
treC.7 yield=0.31769022222222226
treC.7 growth_rate=0.4630774396078393
---------------------------------------
WT yield=0.315135918367347
WT growth_rate=0.5121376193299018
---------------------------------------
WT.1 yield=0.29395266666666675
WT.1 growth_rate=0.49649050885814017
---------------------------------------
WT.2 yield=0.2990885897435897
WT.2 growth_rate=0.48572731038735584
---------------------------------------
WT.3 yield=0.29804011695906435
WT.3 growth_rate=0.4863101079946573
---------------------------------------
WT.4 yield=0.30531157232704403
WT.4 growth_rate=0.47984074936161986
---------------------------------------
WT.5 yield=0.2976300628930818
WT.5 growth_rate=0.47519648072176285
---------------------------------------
WT.6 yield=0.3041925490196078

eptA yield=0.29822181818181814
eptA growth_rate=0.5161183289887813
---------------------------------------
eptA.1 yield=0.29475977777777784
eptA.1 growth_rate=0.5077380206382988
---------------------------------------
eptA.2 yield=0.2958197916666667
eptA.2 growth_rate=0.446989726382389
---------------------------------------
eptA.3 yield=0.2945563218390805
eptA.3 growth_rate=0.5089629448380466
---------------------------------------
eptA.4 yield=0.2894284444444445
eptA.4 growth_rate=0.5039565865668176
---------------------------------------
eptA.5 yield=0.28968288888888893
eptA.5 growth_rate=0.49404697434049977
---------------------------------------
eptA.6 yield=0.28380404040404034
eptA.6 growth_rate=0.5026633290316298
---------------------------------------
eptA.7 yield=0.2848583333333334
eptA.7 growth_rate=0.49436629920905906
---------------------------------------
eutQ yield=0.29203111111111113
eutQ growth_rate=0.5086014412563211
---------------------------------------
eutQ.1 yield

yajQ.4 yield=0.3049288288288288
yajQ.4 growth_rate=0.48182005379079773
---------------------------------------
yajQ.5 yield=0.29742407407407406
yajQ.5 growth_rate=0.4779180796407703
---------------------------------------
yajQ.6 yield=0.2987099099099099
yajQ.6 growth_rate=0.4753204306963222
---------------------------------------
yajQ.7 yield=0.2990899047619048
yajQ.7 growth_rate=0.492329263489287
---------------------------------------
ybit yield=0.34706740740740744
ybit growth_rate=0.5248941105704824
---------------------------------------
ybit.1 yield=0.34016839506172836
ybit.1 growth_rate=0.5071823216675152
---------------------------------------
ybit.2 yield=0.32828000000000007
ybit.2 growth_rate=0.500556882659548
---------------------------------------
ybit.3 yield=0.32335812500000005
ybit.3 growth_rate=0.48895968663799183
---------------------------------------
ybit.4 yield=0.3235996296296296
ybit.4 growth_rate=0.46242028027051096
---------------------------------------
ybit.5 y

yhbS.7 yield=0.2752139047619048
yhbS.7 growth_rate=0.48130361406244404
---------------------------------------
ssuE yield=0.30001365079365083
ssuE growth_rate=0.5314391556869159
---------------------------------------
ssuE.1 yield=0.2932992380952381
ssuE.1 growth_rate=0.5275921489757012
---------------------------------------
ssuE.2 yield=0.2928862015503876
ssuE.2 growth_rate=0.5203956039411145
---------------------------------------
ssuE.3 yield=0.28084954954954955
ssuE.3 growth_rate=0.4987584866765071
---------------------------------------
ssuE.4 yield=0.27920883720930234
ssuE.4 growth_rate=0.49799307755450783
---------------------------------------
ssuE.5 yield=0.2746452252252252
ssuE.5 growth_rate=0.4916298215017724
---------------------------------------
ssuE.6 yield=0.2806279365079365
ssuE.6 growth_rate=0.5013587885680423
---------------------------------------
ssuE.7 yield=0.28133192982456134
ssuE.7 growth_rate=0.4941151307655728
---------------------------------------
yhcM yie

cstA.2 yield=0.3039082352941177
cstA.2 growth_rate=0.40769126138700484
---------------------------------------
cstA.3 yield=0.29177761904761906
cstA.3 growth_rate=0.5043693951819371
---------------------------------------
cstA.4 yield=0.27768306306306306
cstA.4 growth_rate=0.4917639257663001
---------------------------------------
cstA.5 yield=0.27548817204301074
cstA.5 growth_rate=0.47238910049174
---------------------------------------
cstA.6 yield=0.28076
cstA.6 growth_rate=0.48274494115592587
---------------------------------------
cstA.7 yield=0.28287518518518523
cstA.7 growth_rate=0.47341060544962427
---------------------------------------
dcp yield=0.3292920833333334
dcp growth_rate=0.5479998233416713
---------------------------------------
dcp.1 yield=0.3351385185185186
dcp.1 growth_rate=0.5606847558724297
---------------------------------------
dcp.2 yield=0.33492482758620684
dcp.2 growth_rate=0.5490926313207998
---------------------------------------
dcp.3 yield=0.32960820512

yicM.6 yield=0.3181278160919541
yicM.6 growth_rate=0.5399183227300187
---------------------------------------
yicM.7 yield=0.3156772222222222
yicM.7 growth_rate=0.5216767758675694
---------------------------------------
./csv/MG1655 14082019 N
appY yield=0.3318521518987342
appY growth_rate=0.5334941938464306
---------------------------------------
appY.1 yield=0.36099486590038316
appY.1 growth_rate=0.5438167192172908
---------------------------------------
appY.2 yield=0.4913502380952381
appY.2 growth_rate=0.5371773157485458
---------------------------------------
appY.3 yield=0.3135820091324201
appY.3 growth_rate=0.5267719211691012
---------------------------------------
appY.4 yield=0.31876941176470597
appY.4 growth_rate=0.5125245824641775
---------------------------------------
appY.5 yield=0.31714666666666663
appY.5 growth_rate=0.5084045999012257
---------------------------------------
appY.6 yield=0.31929202020202024
appY.6 growth_rate=0.507883294042709
---------------------------

yjfL.1 yield=0.2574398198198198
yjfL.1 growth_rate=0.507724361494198
---------------------------------------
yjfL.2 yield=0.2615090140845071
yjfL.2 growth_rate=0.5061291605038144
---------------------------------------
yjfL.3 yield=0.2589381220657277
yjfL.3 growth_rate=0.4937880444711992
---------------------------------------
yjfL.4 yield=0.26619373134328356
yjfL.4 growth_rate=0.4933215209257095
---------------------------------------
yjfL.5 yield=0.26393420289855063
yjfL.5 growth_rate=0.48836807224092704
---------------------------------------
yjfL.6 yield=0.26368646153846154
yjfL.6 growth_rate=0.49125648525954896
---------------------------------------
yjfL.7 yield=0.2571115343915344
yjfL.7 growth_rate=0.4793716779874895
---------------------------------------
./csv/MG1655 24062019
caiB yield=0.2591917948717949
caiB growth_rate=0.5284843006476786
---------------------------------------
caiB.1 yield=0.25984
caiB.1 growth_rate=0.5126621169067546
---------------------------------------

yhcM.4 yield=0.28205219512195123
yhcM.4 growth_rate=0.5021845175550852
---------------------------------------
yhcM.5 yield=0.2827031666666667
yhcM.5 growth_rate=0.4893566653142614
---------------------------------------
yhcM.6 yield=0.27821783333333333
yhcM.6 growth_rate=0.48296340042069585
---------------------------------------
yhcM.7 yield=0.2793175
yhcM.7 growth_rate=0.48069525146301945
---------------------------------------
yhdX yield=0.30135000000000006
yhdX growth_rate=0.5123250590386449
---------------------------------------
yhdX.1 yield=0.2908234042553191
yhdX.1 growth_rate=0.49144476412501725
---------------------------------------
yhdX.2 yield=0.29866740740740744
yhdX.2 growth_rate=0.4954417068795806
---------------------------------------
yhdX.3 yield=0.2937252564102565
yhdX.3 growth_rate=0.4804086966400639
---------------------------------------
yhdX.4 yield=0.2985412244897959
yhdX.4 growth_rate=0.49077031383743186
---------------------------------------
yhdX.5 yield=0.

ykfl.7 yield=0.279698962962963
ykfl.7 growth_rate=0.48104748617287507
---------------------------------------
yncI yield=0.28134349206349213
yncI growth_rate=0.49993879775010236
---------------------------------------
yncI.1 yield=0.2741691666666667
yncI.1 growth_rate=0.49132908396441366
---------------------------------------
yncI.2 yield=0.3318731428571429
yncI.2 growth_rate=0.5721655669402586
---------------------------------------
yncI.3 yield=0.27111999999999997
yncI.3 growth_rate=0.47807643151234624
---------------------------------------
yncI.4 yield=0.2745782113821138
yncI.4 growth_rate=0.468781827133617
---------------------------------------
yncI.5 yield=0.27778
yncI.5 growth_rate=0.4296897173816499
---------------------------------------
yncI.6 yield=0.2710193650793651
yncI.6 growth_rate=0.4705280738433129
---------------------------------------
yncI.7 yield=0.2685154471544715
yncI.7 growth_rate=0.46262672582161707
---------------------------------------
yoaF yield=0.3262714

# Set sep symbol and split symbol correctly

In [50]:
import os
import pandas as pd
from itertools import groupby 

def getfiles(folderPath):
    pths = []
    for root, dirs, files in os.walk(folderPath):
        for fn in files:            
            if fn.endswith('.csv') and 'stats' in fn:
                pth = os.path.join(root, fn)
                pths.append(pth)
    return pths

csv_folder_path = './csv'
pths = getfiles(csv_folder_path)

extrac_gr = "Growth Rate"
extrac_yi = "yield"

yield_list = []
growth_rate_list = []
name_list = []
for path in pths:
    print path
    data = pd.read_csv(path)
    data2 = data.set_index(data.columns[0])
    index = data.iloc[:,0]
    # initializing list  
    test_list = list(index)
    # sort list  
    # essential for grouping 
    test_list.sort() 
    # printing the original list  
    # using lambda + itertools.groupby() + split() 
    # group similar substrings 
    group_ind = [list(i) for j, i in groupby(test_list, 
                      lambda a: a.split(split_symbol)[0])]    
    
    for g in group_ind:
        #print g
        yield_dict = dict()
        growth_dict = dict()
        i = 1
        for strain in g:
            init = 'R'+str(i)
            yield_dict[init] = data2.loc[strain,extrac_yi]
            growth_dict[init] = data2.loc[strain, extrac_gr]
            i = i +1
        yield_list.append(yield_dict)
        growth_rate_list.append(growth_dict)
        name_list.append(g[0].split('_')[0])



./csv/MG1655 10082019 N-stats.csv
./csv/MG1655 25062019-stats.csv
./csv/MG1655 22062019-stats.csv
./csv/MG1655 14082019 N-stats.csv
./csv/MG1655 20082019 N-stats.csv
./csv/MG1655 23062019-stats.csv
./csv/MG1655 17082019 N-stats.csv
./csv/MG1655 13082019 N-stats.csv
./csv/MG1655 24062019-stats.csv
./csv/MG1655 19062019-stats.csv
./csv/MG1655 07072019-stats.csv
./csv/MG1655 29062019-stats.csv
./csv/MG1655 03082019-stats.csv
./csv/MG1655 14062019-stats.csv
./csv/MG1655 26062019-stats.csv
./csv/MG1655 08072019-stats.csv
./csv/MG1655 28062019-stats.csv
./csv/MG1655 02082019-stats.csv
./csv/MG1655 27062019-stats.csv
./csv/MG1655 05082019-stats.csv


In [51]:
fn = os.path.split(csv_folder_path)[1]

df = pd.DataFrame(yield_list) 
df = df.set_index([pd.Index(name_list)])
df
df.to_csv('./results/yield_all_strains_'+fn+'.csv')

df = pd.DataFrame(growth_rate_list) 
df = df.set_index([pd.Index(name_list)])
df
df.to_csv('./results/growth_rate_all_strains_'+fn+'.csv')