# Proteomics data BioLector

## Import data

In [1]:
import pandas as pd

In [2]:
# import data from text file
df = pd.read_csv('data.txt', delimiter = "\t")

In [3]:
# obtain a column with protein numbers
df["protein_num"] = df["protein"].str.replace("SCO", "")
df['protein_num'] = df['protein_num'].str.split(';').str[0]

In [4]:
# convert all strings to numbers
cols = df.columns.drop(['Fasta headers', 'protein'])
df[cols] = df[cols].astype(float)

In [5]:
# replace NaN by empty string
df_nan = df.fillna('')

In [6]:
df

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
0,SCO0022 IS element ATP-binding protein,SCO0022,,,,,,,22.0
1,SCO0047 transcriptional regulator,SCO0047,,,,,,,47.0
2,SCO0069 hypothetical protein,SCO0069,,,,,,,69.0
3,SCO0118 xylosidase/arabinosidase,SCO0118,0.878093,0.745832,1.634815,2.269035,0.412571,0.258051,118.0
4,SCO0127 beta keto-acyl synthase,SCO0127,,,,,,,127.0
...,...,...,...,...,...,...,...,...,...
2315,SCO7719 TetR family transcriptional regulator,SCO7719,,,,,,,7719.0
2316,SCO7721 hypothetical protein,SCO7721,,,,,,,7721.0
2317,SCO7730 hypothetical protein,SCO7730,0.852793,1.068718,1.025809,1.226304,0.808623,0.618508,7730.0
2318,SCO7766 hypothetical protein,SCO7766,,,,,,,7766.0


## Select genes from paper Urem (201

In [7]:
def list_to_df(file_name):
    
    # open txt file with gene numbers and convert to list
    with open(file_name) as data:
        list_proteins = data.read().splitlines()
    
    # convert gene numbers to SCO numbers
    list_proteins = [f"{x:0>4}" for x in list_proteins]
    list_proteins = ['SCO' + x for x in list_proteins]
    
    # select only the genes of development
    df_proteins = df[df['protein'].isin(list_proteins)]
    df_proteins = df_proteins.fillna('')
    
    return df_proteins

In [8]:
# select developmental genes
df_dev = list_to_df('development.txt')
df_dev.to_csv('df_dev.txt', sep='\t')

In [9]:
# select genes of osdRK regulon
df_osd = list_to_df('osdRK_regulon.txt')
df_osd.to_csv('df_osd.txt', sep='\t')

In [10]:
# select stress genes
df_stress = list_to_df('stress.txt')
df_stress.to_csv('df_stress.txt', sep='\t')

In [11]:
# select anaerobic genes
df_ana = list_to_df('anaerobic-growth.txt')
df_ana.to_csv('df_ana.txt', sep='\t')

In [12]:
df_ana

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
8,SCO0168 regulator protein,SCO0168,0.899778,0.115466,0.445195,1.06819,5.97352,16.7504,168.0
20,SCO0203 two-component sensor,SCO0203,0.667857,0.964529,1.30556,0.956523,1.56351,1.05061,203.0
26,SCO0216 nitrate reductase subunit alpha NarG2,SCO0216,1.53606,0.424182,0.340399,0.41538,5.89953,16.2341,216.0
27,SCO0217 nitrate reductase subunit beta NarH2,SCO0217,,,,,,,217.0
914,SCO3012 two-component system histidine kinase,SCO3012,,,,,,,3012.0
915,SCO3013 two-component system response regulator,SCO3013,1.43049,1.27029,1.21063,0.966005,0.524911,0.594753,3013.0
1216,SCO3945 cytochrome oxidase subunit I,SCO3945,0.975812,0.679652,1.05116,1.56845,0.981722,0.771775,3945.0


## Select genes by SCO number

In [13]:
# select a specific protein
df_nan[df_nan['protein'] == "SCO0203"]

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
20,SCO0203 two-component sensor,SCO0203,0.667857,0.964529,1.30556,0.956523,1.56351,1.05061,203.0


In [14]:
# select a range for proteins -> actinorhodin
df_nan.loc[(df_nan['protein_num'] >= 5072) & (df_nan['protein_num'] <= 5092)]

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
1620,SCO5072 hydroxylacyl-CoA dehydrogenase,SCO5072,1.53911,1.31392,0.246044,0.483323,,0.0177494,5072.0
1621,SCO5073 oxidoreductase,SCO5073,2.09944,1.62009,0.28642,0.46229,,,5073.0
1622,SCO5074 dehydratase,SCO5074,2.19222,2.13239,0.190368,0.357603,0.0909009,,5074.0
1623,SCO5075 oxidoreductase,SCO5075,1.86897,1.70467,0.255306,0.381531,,,5075.0
1624,SCO5077 hypothetical protein,SCO5077,1.98109,1.53698,0.397692,0.549567,0.0467421,0.00436895,5077.0
1625,SCO5078 hypothetical protein,SCO5078,2.20405,2.88521,0.22235,0.22984,,0.0180237,5078.0
1626,SCO5079 hypothetical protein,SCO5079,2.0701,1.67666,0.365621,0.49083,,,5079.0
1627,SCO5080 hydrolase,SCO5080,2.22878,1.60232,0.277692,0.350691,,0.0124362,5080.0
1628,SCO5081 hypothetical protein,SCO5081,,,,,,,5081.0
1629,SCO5083 actinorhodin transporter,SCO5083,0.835583,,,,,,5083.0


In [15]:
# select a range for proteins -> undecylprodigiosin
df_nan.loc[(df['protein_num'] >= 5879) & (df_nan['protein_num'] <= 5898)]

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
1957,SCO5879 acyl-coa dehydrogenase RedW,SCO5879,2.79646,2.12394,0.278107,0.539216,,,5879.0
1958,SCO5880 RedY protein,SCO5880,,,1.14213,,,,5880.0
1959,SCO5884 hypothetical protein,SCO5884,1.83668,,,,,,5884.0
1960,SCO5886 3-oxoacyl-ACP synthase,SCO5886,1.90924,,0.220873,,,,5886.0
1961,SCO5888 3-oxoacyl-ACP synthase,SCO5888,1.66744,,,,,,5888.0
1962,SCO5889 hypothetical protein,SCO5889,,,,,,,5889.0
1963,SCO5890 8-amino-7-oxononanoate synthase,SCO5890,,,,,,,5890.0
1964,SCO5892 polyketide synthase,SCO5892,1.87295,,0.512986,,,,5892.0
1965,SCO5893 oxidoreductase,SCO5893,1.24661,,,,,,5893.0
1966,SCO5895 methyltransferase,SCO5895,1.99262,3.64617,0.273694,0.222825,,,5895.0


In [16]:
# select a range for proteins -> dormancy
df_nan.loc[(df['protein_num'] >= 167) & (df_nan['protein_num'] <= 217)]

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
7,SCO0167 hypothetical protein,SCO0167,0.569485,0.266676,,0.643491,,13.1393,167.0
8,SCO0168 regulator protein,SCO0168,0.899778,0.115466,0.445195,1.06819,5.97352,16.7504,168.0
9,SCO0169 hypothetical protein,SCO0169,,,,,,,169.0
10,SCO0170 hypothetical protein,SCO0170,,,,,,,170.0
11,SCO0171 nicotinate phosphoribosyltransferase,SCO0171,,,,,,,171.0
12,SCO0174 DNA-binding protein,SCO0174,,,,,,,174.0
13,SCO0179 zinc-containing dehydrogenase,SCO0179,1.40281,0.242025,0.322671,0.480149,16.1935,31.4843,179.0
14,SCO0180 hypothetical protein,SCO0180,,,,,,,180.0
15,SCO0197 hypothetical protein,SCO0197,,,,,,,197.0
16,SCO0198 hypothetical protein,SCO0198,1.08859,,,,,,198.0


## Select proteins UPregulated with jasmonic acid

In [17]:
# At 31 and 50 h
df_sel = df.loc[(df['JAS/WT_31'] >= 2) & (df['JAS/WT_50'] >= 2)]
df_sel = df_sel.fillna('')
df_sel

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
1622,SCO5074 dehydratase,SCO5074,2.192222,2.132387,0.190368,0.357603,0.0909009,,5074.0
1625,SCO5078 hypothetical protein,SCO5078,2.204055,2.88521,0.22235,0.22984,,0.0180237,5078.0
1633,SCO5087 actinorhodin polyketide beta-ketoacyl ...,SCO5087,2.752695,3.796078,0.100807,0.111264,0.0234344,0.00506545,5087.0
1634,SCO5088 actinorhodin polyketide beta-ketoacyl ...,SCO5088,2.888316,3.392462,0.140336,0.168592,0.0965024,0.0458292,5088.0
1636,SCO5090 actinorhodin polyketide synthase bifun...,SCO5090,4.081331,2.598029,0.133971,0.231844,,0.171071,5090.0
1750,SCO5389 hypothetical protein,SCO5389,2.261527,2.284982,0.43861,0.690508,11.9363,3.06591,5389.0
1957,SCO5879 acyl-coa dehydrogenase RedW,SCO5879,2.796461,2.123944,0.278107,0.539216,,,5879.0


In [18]:
# Only at 31 h
df_sel = df.loc[(df['JAS/WT_31'] >= 2) & (df['JAS/WT_50'] < 2)]
df_sel = df_sel.fillna('')
df_sel

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
21,SCO0204 LuxR family transcriptional regulator,SCO0204,2.10473,0.337371,0.495661,0.991513,1.88691,4.66572,204.0
215,SCO1174 aldehyde dehydrogenase,SCO1174,2.383201,1.290883,0.430405,0.580595,2.80841,3.78137,1174.0
219,SCO1192 hypothetical protein,SCO1192,2.033552,0.882772,0.592869,0.946271,1.52536,3.61621,1192.0
296,SCO1453 hypothetical protein,SCO1453,2.192253,1.993753,0.389393,0.490447,1.46761,2.56459,1453.0
368,SCO1577 acetylornithine aminotransferase,SCO1577,2.084604,0.965689,1.06329,0.963055,0.800932,1.64123,1577.0
465,SCO1814 enoyl-ACP reductase,SCO1814,3.010836,1.70176,0.614124,0.468621,0.837258,0.863978,1814.0
597,SCO2097 hypothetical protein,SCO2097,2.372943,1.572537,0.362324,0.523103,2.91491,2.70262,2097.0
648,SCO2194 lipoyl synthase,SCO2194,2.252479,0.61062,0.950739,1.17383,1.12557,1.07198,2194.0
655,SCO2210 glutamine synthetase,SCO2210,2.102717,1.674353,0.685537,0.50184,4.0284,1.97484,2210.0
774,SCO2577 hypothetical protein,SCO2577,3.404397,1.320672,1.04058,0.744292,0.691546,0.600128,2577.0


In [19]:
# Only at 50 h
df_sel = df.loc[(df['JAS/WT_31'] < 2) & (df['JAS/WT_50'] >= 2)]
df_sel = df_sel.fillna('')
df_sel

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
77,SCO0493 ABC-transporter transmembrane protein,SCO0493,0.878655,2.699919,0.555272,0.296538,1.64575,,493.0
80,SCO0498 peptide monooxygenase,SCO0498,1.742436,2.134993,0.57447,0.19824,1.22455,0.437776,498.0
89,SCO0563 hypothetical protein,SCO0563,0.983714,2.411361,1.28073,0.324595,1.98435,1.3081,563.0
104,SCO0659 DeoR family transcriptional regulator,SCO0659,1.494241,2.008064,0.470404,,0.920573,,659.0
211,SCO1162 methyltransferase,SCO1162,1.261264,2.057889,0.647293,0.429791,0.923854,1.33129,1162.0
217,SCO1181 hypothetical protein,SCO1181,1.111084,5.578871,1.22684,,0.920478,,1181.0
227,SCO1213 hypothetical protein,SCO1213,0.684605,3.085795,0.803225,0.283673,1.53901,2.34297,1213.0
264,SCO1361 hypothetical protein,SCO1361,1.606575,2.056974,0.754921,0.214961,1.70162,1.13336,1361.0
347,SCO1543 hypothetical protein,SCO1543,0.626518,2.074891,0.595128,0.336338,1.51659,1.35053,1543.0
360,SCO1563 acetyltransferase,SCO1563,0.664945,2.466117,1.33006,0.290066,1.75908,2.87421,1563.0


## Select proteins DOWNregulated with jasmonic acid

In [23]:
# At 31 and 50 h
df_sel = df.loc[(df['JAS/WT_31'] <= 0.5) & (df['JAS/WT_50'] <= 0.5)]
df_sel = df_sel.fillna('')
df_sel

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
117,SCO0730 hypothetical protein,SCO0730,0.254695,0.466736,1.19989,2.620518,1.13498,0.838794,730.0
140,SCO0838 hypothetical protein,SCO0838,0.449209,0.469884,,2.242125,,0.804746,838.0
2076,SCO6279 diaminobutyrate-pyruvate aminotransferase,SCO6279,0.023206,0.398436,34.1638,0.363915,,0.0204926,6279.0
2079,SCO6283 hypothetical protein,SCO6283,0.058195,0.489202,9.06544,0.678198,,,6283.0


In [24]:
# Only at 31 h
df_sel = df.loc[(df['JAS/WT_31'] <= 0.5) & (df['JAS/WT_50'] > 0.5)]
df_sel = df_sel.fillna('')
df_sel

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
56,SCO0396 hypothetical protein,SCO0396,0.402525,1.457176,1.151374,0.578723,0.516631,0.579434,396.0
63,SCO0409 spore-associated protein,SCO0409,0.377109,0.666796,0.965517,2.006343,1.7372,0.427714,409.0
82,SCO0506 NAD synthetase,SCO0506,0.475341,1.432408,1.183308,0.872219,1.17824,0.833398,506.0
163,SCO0932 hypothetical protein,SCO0932,0.473546,0.82293,1.237884,1.917963,,0.132727,932.0
177,SCO1026 transcriptional regulator,SCO1026,0.499504,0.915395,0.940802,1.032162,1.24849,0.738932,1026.0
255,SCO1335 oxidoreductase,SCO1335,0.484396,1.02305,1.009222,0.936848,0.928625,0.950734,1335.0
260,SCO1346 3-oxoacyl-ACP reductase,SCO1346,0.475956,0.839156,1.09612,1.445908,0.91109,1.14554,1346.0
300,SCO1464 ribulose-phosphate 3-epimerase,SCO1464,0.450922,1.20184,0.939413,0.98849,0.578857,0.803792,1464.0
431,SCO1733 acetyltransferase,SCO1733,0.411219,0.522334,1.281945,1.599108,1.07031,2.56612,1733.0
440,SCO1767 DNA hydrolase,SCO1767,0.369059,1.727542,1.061111,0.564894,0.521519,0.80286,1767.0


In [25]:
# Only at 50 h
df_sel = df.loc[(df['JAS/WT_31'] > 0.5) & (df['JAS/WT_50'] <= 0.5)]
df_sel = df_sel.fillna('')
df_sel

Unnamed: 0,Fasta headers,protein,JAS/WT_31,JAS/WT_50,mat/WT_31,mat/WT_50,1152/WT_31,1152/WT_50,protein_num
7,SCO0167 hypothetical protein,SCO0167,0.569485,0.266676,,0.643491,,13.13935,167.0
8,SCO0168 regulator protein,SCO0168,0.899778,0.115466,0.445195,1.068193,5.97352,16.750414,168.0
13,SCO0179 zinc-containing dehydrogenase,SCO0179,1.402807,0.242025,0.322671,0.480149,16.1935,31.484253,179.0
17,SCO0199 alcohol dehydrogenase,SCO0199,0.841332,0.299541,0.143308,0.361709,25.7812,25.432539,199.0
18,SCO0200 hypothetical protein,SCO0200,0.865738,0.249147,0.0895777,0.526447,87.8897,31.08009,200.0
21,SCO0204 LuxR family transcriptional regulator,SCO0204,2.10473,0.337371,0.495661,0.991513,1.88691,4.665721,204.0
26,SCO0216 nitrate reductase subunit alpha NarG2,SCO0216,1.536057,0.424182,0.340399,0.41538,5.89953,16.23413,216.0
137,SCO0803 RNA polymerase sigma factor,SCO0803,1.345076,0.47521,1.23435,2.418644,0.550096,0.603447,803.0
307,SCO1480 hypothetical protein,SCO1480,0.934167,0.473461,1.63393,3.824222,0.511345,0.565291,1480.0
334,SCO1513 GTP pyrophosphokinase,SCO1513,1.084849,0.478761,1.628,3.528541,0.380825,0.284545,1513.0
