### Testing: Data Cleaning and Quality Control for Pregnancy Deep Phenotyping Metabolomics Data
### Piekos Lab, Kayla Xu
### 01/26/2026

In [2]:
# environment
import pandas as pd 
import numpy as np 


In [2]:
# Metabolomics Files:
### 050725_Sadovsky DP3 Placenta Polar Untargeted_ALL copy.xlsx
### 050725_Sadovsky DP3 Plasma Polar Untargeted_ALL copy.xlsx
placentaPos_raw = pd.read_excel('/Users/kaylaxu/Desktop/data/raw_data/050725_Sadovsky DP3 Placenta Polar Untargeted_ALL copy.xlsx', sheet_name="POS Compounds",skiprows=2)
plasmaPos_raw = pd.read_excel('/Users/kaylaxu/Desktop/data/raw_data/050725_Sadovsky DP3 Plasma Polar Untargeted_ALL copy.xlsx', sheet_name="POS Compounds",skiprows=2)

In [6]:
# extract "Area" information
placentaPos = placentaPos_raw
not_unnamed =["Unnamed" not in s for s in placentaPos.columns]
sample_area = placentaPos.iloc[0:1, not_unnamed]
sample_area = sample_area.set_index("Sample ID").transpose()
sample_area.columns = ["Area"]
sample_area.to_csv("/Users/kaylaxu/Desktop/data/clean_data/MTBL_placenta/pos_sample_area.csv")

In [7]:
# Extract compound metadata
not_sample =["DP3" not in s and "Pooled" not in s for s in placentaPos.columns]
placentaPos_comps = placentaPos.iloc[:, not_sample]
placentaPos_comps = placentaPos_comps.rename(columns=placentaPos_comps.iloc[0,:]).drop([0])
placentaPos_comps.iloc[0,0] = "p01" # D3-alanine-ISTD
placentaPos_comps.iloc[1,0] = "p02" # 13C1-creatinine_ISTD
placentaPos_comps.to_csv("/Users/kaylaxu/Desktop/data/clean_data/MTBL_placenta/pos_compounds.csv")


In [9]:
# Extract expression data
is_expression = ["DP3" in s or "Pooled" in s for s in placentaPos.columns]
expression = placentaPos.iloc[:, is_expression]
expression = expression.drop([0])
expression.index = placentaPos_comps["Export Order"]
expression = expression.transpose()
expression.to_csv("/Users/kaylaxu/Desktop/data/clean_data/MTBL_placenta/pos_expression.csv")

In [12]:
expression.index

Index(['Pooled Cntrl', 'Pooled Cntrl.1', 'Pooled Cntrl.2', 'Pooled Cntrl.3',
       'Pooled Cntrl.4', 'Pooled Cntrl.5', 'Pooled Cntrl.6', 'Pooled Cntrl.7',
       'Pooled Cntrl.8', 'Pooled Cntrl.9',
       ...
       'DP3-0387', 'DP3-0389', 'DP3-0398E', 'DP3-0399', 'DP3-0404', 'DP3-0409',
       'DP3-0416', 'DP3-0419', 'DP3-0420', 'DP3-0423E'],
      dtype='str', length=137)

In [None]:
def get_batch(df):
    is_sample = df.columns.notna()
    temp = df.iloc[0:1,is_sample]
    temp = temp.set_index("Sample ID").transpose()
    temp.columns = ["batch"]
    temp["batch"] = [s.split(": ")[1].split("_")[0] for s in temp["batch"]]
    temp.index = temp.index.rename("Sample_ID")
    return temp




In [3]:
# extract compound metadata
def get_compounds(df):
    not_sample = df.columns.isna()
    temp = df.iloc[:, not_sample]
    temp.columns = temp.iloc[0,:]
    temp = temp.drop(temp.index[0])
    temp.index = temp.index.rename("Export Order")
    return temp


In [4]:

# extract expression data
def get_expression(df, ids):
    is_sample = df.columns.notna()
    temp = df.iloc[:, is_sample].drop(columns="Sample ID").drop(df.index[0])
    temp.index = ids
    temp = temp.transpose()
    temp.index = temp.index.rename("Sample_ID")
    return temp



In [4]:
# call all csv generating function
def generate_files(df, file_output, e):
    get_batch(df).to_csv(file_output + "/" + e +"_batch.csv")
    comp = get_compounds(df)
    comp.to_csv(file_output + "/" + e + "_compounds.csv")
    get_expression(df, comp.index).to_csv(file_output + "/" + e + "_expression.csv")

def clean_df(df):
    df.columns = df.iloc[0,:]
    df = df.iloc[1:,:]
    df.iloc[1,0] = "01" # two moleclues not in the export order
    df.iloc[2,0] = "02" # slightly different for each file
    df.index = df.iloc[:,0]
    df = df.iloc[:, 1:]
    return df

# helper function
def extract_data(file_input, file_output):
    #file = open(file_input, mode="r")
    file = file_input
    file_pos = pd.read_excel(file, sheet_name="POS Compounds",header=None).dropna(how='all')
    file_neg = pd.read_excel(file, sheet_name="NEG Compounds",header=None).dropna(how='all')
  
    # remove empty rows and set index/columns
    file_pos = clean_df(file_pos)
    file_neg = clean_df(file_neg)

    #generate files
    generate_files(file_pos, file_output, "pos")
    generate_files(file_neg, file_output, "neg")



In [11]:
extract_data('/Users/kaylaxu/Desktop/data/raw_data/050725_Sadovsky DP3 Placenta Polar Untargeted_ALL copy.xlsx', '/Users/kaylaxu/Desktop/data/clean_data/MTBL_placenta')

In [53]:

placentaPos_raw = pd.read_excel('/Users/kaylaxu/Desktop/data/raw_data/050725_Sadovsky DP3 Placenta Polar Untargeted_ALL copy.xlsx', sheet_name="POS Compounds",header=None).dropna(how='all')
placentaPos_raw.columns = placentaPos_raw.iloc[0,:]
placentaPos_raw = placentaPos_raw.iloc[1:,:]
placentaPos_raw.iloc[1,0] = "01" # two moleclues not in the export order
placentaPos_raw.iloc[2,0] = "02" # slightly different for each file
placentaPos_raw.index = placentaPos_raw.iloc[:,0]
placentaPos_raw = placentaPos_raw.iloc[:, 1:]
placentaPos_raw

2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,DP3-0398E,DP3-0399,DP3-0404,DP3-0409,DP3-0416,DP3-0419,DP3-0420,DP3-0423E,NaN,NaN
nan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Export Order,Name,Formula,Annot. Source: Predicted Compositions,Annot. Source: mzCloud Search,Annot. Source: mzVault Search,Annot. Source: Metabolika Search,Annot. Source: ChemSpider Search,Annot. Source: MassList Search,Annot. DeltaMass [ppm],Calc. MW,...,Area: 032425_Sadovsky_Polar_Pos_45.raw (F115),Area: 032425_Sadovsky_Polar_Pos_46.raw (F116),Area: 032425_Sadovsky_Polar_Pos_47.raw (F117),Area: 032425_Sadovsky_Polar_Pos_48.raw (F118),Area: 032425_Sadovsky_Polar_Pos_49.raw (F119),Area: 032425_Sadovsky_Polar_Pos_50.raw (F120),Area: 032425_Sadovsky_Polar_Pos_51.raw (F121),Area: 032425_Sadovsky_Polar_Pos_52.raw (F122),Peak Rating (Max.),Peak Rating QC (Max.)
01,D3-alanine-ISTD,,,,,,,,,,...,401151.873605,373625.231405,364593.320492,399287.829141,435095.699504,272278.020734,436893.81994,354308.6104,,
02,13C1-creatinine_ISTD,,,,,,,,,,...,22865033.147103,23508151.282216,23660342.335612,18189295.140684,22990171.893712,18648420.254607,22036274.984124,19718638.033679,,
p1,,C4 H4 O7 P2,Full match,No results,No results,No results,No match,No match,4.39,225.94422,...,1299977851.71867,1319932558.19584,1415801523.75981,1354812550.03793,1246002605.39726,1280463349.87845,1365353348.293,1331074984.32775,9.2,5.5
p2,Phosphate,H3 O4 P,No results,No results,No results,Full match,Full match,Full match,-0.08,97.97689,...,284659338.91841,224398236.582277,260621107.991995,152190993.760215,144181458.718533,259967809.838309,118226509.757648,297492629.450178,8,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p3700,,C26 H38 N2 O11 S,Full match,No results,No results,No results,No match,No match,3.47,586.22167,...,38495.799017,59486.156988,43756.364913,30018.671239,23856.126696,50097.705991,50517.907992,37939.473894,5.9,5.8
p3701,"1,2,4-Trimethylbenzene",C9 H12,Full match,No results,No results,No results,Partial match,Full match,-0.79,120.09381,...,7232.991172,6344.848361,4051.1737,3779.421277,5112.78309,5257.716925,5248.152774,34064.236283,6.4,6.4
p3702,,C12 H5 N4 O P,Full match,No results,No results,No results,No results,No results,1.15,252.02039,...,1809.39338,28695.953183,6014.48284,112197.881939,1738.518821,7674.30238,1616.330299,37309.011028,5.2,3.2
p3703,,C25 H53 N2 O16 P3 S5,Full match,No results,No results,No results,No results,No results,0.25,890.12137,...,38854.578523,48452.878976,2228.747872,2141.515525,22113.724282,8809.518392,28628.893514,8574.670295,6.2,6.2


In [34]:
df = placentaPos_raw
is_sample = df.columns.notna()
temp = df.iloc[0:1,is_sample]
temp = temp.set_index("Sample ID").transpose()
temp.columns = ["batch"]
temp["batch"] = [s.split(": ")[1].split("_")[0] for s in temp["batch"]]
temp
#test

Unnamed: 0_level_0,batch
nan,Unnamed: 1_level_1
Pooled Cntrl,062323
Pooled Cntrl,062323
Pooled Cntrl,062323
Pooled Cntrl,062323
Pooled Cntrl,062323
...,...
DP3-0409,032425
DP3-0416,032425
DP3-0419,032425
DP3-0420,032425


In [44]:
df = placentaPos_raw
not_sample = df.columns.isna()
temp = df.iloc[:, not_sample]
temp.columns = temp.iloc[0,:]
#temp = temp.drop(temp.index[0])
#temp.index[0] = "01" # two moleclues not in the export order
#temp.index[1] = "02" # slightly different for each file
temp

Export Order,Name,Formula,Annot. Source: Predicted Compositions,Annot. Source: mzCloud Search,Annot. Source: mzVault Search,Annot. Source: Metabolika Search,Annot. Source: ChemSpider Search,Annot. Source: MassList Search,Annot. DeltaMass [ppm],Calc. MW,...,# Usable QC,RSD QC Areas [%],RSD Corr. QC Areas [%],Mass List Match: HMDB All metabolites (v5. 217719 cpds),Mass List Match: PFAS_NIST,Mass List Match: Natural Products Atlas 2023_06,Mass List Match: Polar ISTD,MS2,Peak Rating (Max.),Peak Rating QC (Max.)
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Export Order,Name,Formula,Annot. Source: Predicted Compositions,Annot. Source: mzCloud Search,Annot. Source: mzVault Search,Annot. Source: Metabolika Search,Annot. Source: ChemSpider Search,Annot. Source: MassList Search,Annot. DeltaMass [ppm],Calc. MW,...,# Usable QC,RSD QC Areas [%],RSD Corr. QC Areas [%],Mass List Match: HMDB All metabolites (v5. 217...,Mass List Match: PFAS_NIST,Mass List Match: Natural Products Atlas 2023_06,Mass List Match: Polar ISTD,MS2,Peak Rating (Max.),Peak Rating QC (Max.)
,D3-alanine-ISTD,,,,,,,,,,...,,,,,,,,,,
,13C1-creatinine_ISTD,,,,,,,,,,...,,,,,,,,,,
p1,,C4 H4 O7 P2,Full match,No results,No results,No results,No match,No match,4.39,225.94422,...,13,5,1,Single match found,No matches found,No matches found,No matches found,DDA for preferred ion,9.2,5.5
p2,Phosphate,H3 O4 P,No results,No results,No results,Full match,Full match,Full match,-0.08,97.97689,...,13,22,7,Multiple matches found,No matches found,No matches found,No matches found,DDA for other ion,8,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p3700,,C26 H38 N2 O11 S,Full match,No results,No results,No results,No match,No match,3.47,586.22167,...,13,14,3,No matches found,No matches found,Single match found,No matches found,DDA for preferred ion,5.9,5.8
p3701,"1,2,4-Trimethylbenzene",C9 H12,Full match,No results,No results,No results,Partial match,Full match,-0.79,120.09381,...,8,21,7,Multiple matches found,No matches found,Single match found,No matches found,No MS2,6.4,6.4
p3702,,C12 H5 N4 O P,Full match,No results,No results,No results,No results,No results,1.15,252.02039,...,8,24,6,No matches found,No matches found,No matches found,No matches found,No MS2,5.2,3.2
p3703,,C25 H53 N2 O16 P3 S5,Full match,No results,No results,No results,No results,No results,0.25,890.12137,...,10,27,7,No matches found,No matches found,No matches found,No matches found,No MS2,6.2,6.2


In [5]:
placentaPos_raw = pd.read_excel('/Users/kaylaxu/Desktop/data/raw_data/050725_Sadovsky DP3 Placenta Polar Untargeted_ALL copy.xlsx', sheet_name="POS Compounds",header=None).dropna(how='all')
df = clean_df(placentaPos_raw)
df

2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,DP3-0398E,DP3-0399,DP3-0404,DP3-0409,DP3-0416,DP3-0419,DP3-0420,DP3-0423E,NaN,NaN
nan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Export Order,Name,Formula,Annot. Source: Predicted Compositions,Annot. Source: mzCloud Search,Annot. Source: mzVault Search,Annot. Source: Metabolika Search,Annot. Source: ChemSpider Search,Annot. Source: MassList Search,Annot. DeltaMass [ppm],Calc. MW,...,Area: 032425_Sadovsky_Polar_Pos_45.raw (F115),Area: 032425_Sadovsky_Polar_Pos_46.raw (F116),Area: 032425_Sadovsky_Polar_Pos_47.raw (F117),Area: 032425_Sadovsky_Polar_Pos_48.raw (F118),Area: 032425_Sadovsky_Polar_Pos_49.raw (F119),Area: 032425_Sadovsky_Polar_Pos_50.raw (F120),Area: 032425_Sadovsky_Polar_Pos_51.raw (F121),Area: 032425_Sadovsky_Polar_Pos_52.raw (F122),Peak Rating (Max.),Peak Rating QC (Max.)
01,D3-alanine-ISTD,,,,,,,,,,...,401151.873605,373625.231405,364593.320492,399287.829141,435095.699504,272278.020734,436893.81994,354308.6104,,
02,13C1-creatinine_ISTD,,,,,,,,,,...,22865033.147103,23508151.282216,23660342.335612,18189295.140684,22990171.893712,18648420.254607,22036274.984124,19718638.033679,,
p1,,C4 H4 O7 P2,Full match,No results,No results,No results,No match,No match,4.39,225.94422,...,1299977851.71867,1319932558.19584,1415801523.75981,1354812550.03793,1246002605.39726,1280463349.87845,1365353348.293,1331074984.32775,9.2,5.5
p2,Phosphate,H3 O4 P,No results,No results,No results,Full match,Full match,Full match,-0.08,97.97689,...,284659338.91841,224398236.582277,260621107.991995,152190993.760215,144181458.718533,259967809.838309,118226509.757648,297492629.450178,8,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p3700,,C26 H38 N2 O11 S,Full match,No results,No results,No results,No match,No match,3.47,586.22167,...,38495.799017,59486.156988,43756.364913,30018.671239,23856.126696,50097.705991,50517.907992,37939.473894,5.9,5.8
p3701,"1,2,4-Trimethylbenzene",C9 H12,Full match,No results,No results,No results,Partial match,Full match,-0.79,120.09381,...,7232.991172,6344.848361,4051.1737,3779.421277,5112.78309,5257.716925,5248.152774,34064.236283,6.4,6.4
p3702,,C12 H5 N4 O P,Full match,No results,No results,No results,No results,No results,1.15,252.02039,...,1809.39338,28695.953183,6014.48284,112197.881939,1738.518821,7674.30238,1616.330299,37309.011028,5.2,3.2
p3703,,C25 H53 N2 O16 P3 S5,Full match,No results,No results,No results,No results,No results,0.25,890.12137,...,38854.578523,48452.878976,2228.747872,2141.515525,22113.724282,8809.518392,28628.893514,8574.670295,6.2,6.2


In [8]:
b =get_batch(df)
b

Unnamed: 0_level_0,batch
Sample_ID,Unnamed: 1_level_1
Pooled Cntrl,062323
Pooled Cntrl,062323
Pooled Cntrl,062323
Pooled Cntrl,062323
Pooled Cntrl,062323
...,...
DP3-0409,032425
DP3-0416,032425
DP3-0419,032425
DP3-0420,032425


In [9]:
t = get_compounds(df)
t

Export Order,Name,Formula,Annot. Source: Predicted Compositions,Annot. Source: mzCloud Search,Annot. Source: mzVault Search,Annot. Source: Metabolika Search,Annot. Source: ChemSpider Search,Annot. Source: MassList Search,Annot. DeltaMass [ppm],Calc. MW,...,# Usable QC,RSD QC Areas [%],RSD Corr. QC Areas [%],Mass List Match: HMDB All metabolites (v5. 217719 cpds),Mass List Match: PFAS_NIST,Mass List Match: Natural Products Atlas 2023_06,Mass List Match: Polar ISTD,MS2,Peak Rating (Max.),Peak Rating QC (Max.)
Export Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01,D3-alanine-ISTD,,,,,,,,,,...,,,,,,,,,,
02,13C1-creatinine_ISTD,,,,,,,,,,...,,,,,,,,,,
p1,,C4 H4 O7 P2,Full match,No results,No results,No results,No match,No match,4.39,225.94422,...,13,5,1,Single match found,No matches found,No matches found,No matches found,DDA for preferred ion,9.2,5.5
p2,Phosphate,H3 O4 P,No results,No results,No results,Full match,Full match,Full match,-0.08,97.97689,...,13,22,7,Multiple matches found,No matches found,No matches found,No matches found,DDA for other ion,8,7
p3,Acetyl-L-carnitine,C9 H17 N O4,Full match,Full match,No results,No results,Full match,Full match,-0.18,203.11572,...,13,8,2,Multiple matches found,No matches found,No matches found,No matches found,DDA for preferred ion,8.6,6.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p3700,,C26 H38 N2 O11 S,Full match,No results,No results,No results,No match,No match,3.47,586.22167,...,13,14,3,No matches found,No matches found,Single match found,No matches found,DDA for preferred ion,5.9,5.8
p3701,"1,2,4-Trimethylbenzene",C9 H12,Full match,No results,No results,No results,Partial match,Full match,-0.79,120.09381,...,8,21,7,Multiple matches found,No matches found,Single match found,No matches found,No MS2,6.4,6.4
p3702,,C12 H5 N4 O P,Full match,No results,No results,No results,No results,No results,1.15,252.02039,...,8,24,6,No matches found,No matches found,No matches found,No matches found,No MS2,5.2,3.2
p3703,,C25 H53 N2 O16 P3 S5,Full match,No results,No results,No results,No results,No results,0.25,890.12137,...,10,27,7,No matches found,No matches found,No matches found,No matches found,No MS2,6.2,6.2


In [10]:
x = get_expression(df, t.index)
x

Export Order,01,02,p1,p2,p3,p4,p5,p6,p7,p8,...,p3695,p3696,p3697,p3698,p3699,p3700,p3701,p3702,p3703,p3704
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Pooled Cntrl,,,1529697732.10807,960934888.044903,696936755.396744,627363570.926264,449668831.673356,162770297.257194,165682327.3918,272186578.146829,...,27488.284023,7203.65275,7677.504597,1342.107419,9610.930373,39079.646816,32186.048274,1601.922548,4823.045917,2013.510488
Pooled Cntrl,,,1368644592.09096,1017308011.36853,729792167.056774,621643667.284019,403261823.48879,165892722.199743,165892722.199743,275887926.754399,...,23529.385751,7018.815397,10839.720606,1334.575697,2107.552582,37595.24349,14915.13476,1483.575237,4975.115785,1822.346712
Pooled Cntrl,,,1529095145.48298,1024927927.19107,753789577.578159,652890742.20923,417611579.673277,169149197.147215,169113596.962671,279940341.598381,...,32523.477547,6763.429783,16598.414538,1365.399845,9541.553119,41865.668458,30473.225647,1565.337035,10850.491229,1234.04052
Pooled Cntrl,,,1594294947.22906,1061045171.20838,772700856.622067,661938012.249315,366175193.09328,167827940.733792,167827940.733792,291608557.228313,...,35486.556219,5281.358186,6002.694725,1960.021509,3823.302236,27640.514842,34969.834441,2063.441685,3151.114966,1291.215513
Pooled Cntrl,,,1558061941.6777,1075289478.05843,765537519.039136,662626010.589532,332441499.966989,163415873.214918,165649181.253878,293934261.957957,...,36362.8842,5176.081144,3629.736853,1781.069954,3374.391045,23745.862385,23107.849268,9204.205117,5361.216328,1412.393842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DP3-0409,399287.829141,18189295.140684,1354812550.03793,152190993.760215,532127285.031174,16176491.955754,484760406.100637,2317691.90016,2798594.164069,68270486.335193,...,2476.141617,1718.449783,47208.704015,21672.795377,13064.466449,30018.671239,3779.421277,112197.881939,2141.515525,4221.543097
DP3-0416,435095.699504,22990171.893712,1246002605.39726,144181458.718533,370908093.932855,18217452.812185,396853229.594914,2166746.231881,2166746.231881,116052676.064916,...,6645.010324,5135.848653,43469.674374,2321.027123,36859.753509,23856.126696,5112.78309,1738.518821,22113.724282,1038.665688
DP3-0419,272278.020734,18648420.254607,1280463349.87845,259967809.838309,759781534.707318,28312429.832977,692531193.914315,2621542.013437,2621542.013437,215965292.405955,...,12111.085549,13529.689066,21175.352211,12954.678928,51933.051727,50097.705991,5257.716925,7674.30238,8809.518392,1021.711583
DP3-0420,436893.81994,22036274.984124,1365353348.293,118226509.757648,434586591.750981,16210035.846246,479596289.054038,2465282.108742,2465282.108742,136756255.468873,...,10298.368202,6236.180483,48622.160435,6333.78612,48198.68169,50517.907992,5248.152774,1616.330299,28628.893514,12789.849155


In [8]:
is_sample = df.columns.notna()
temp = df.iloc[:, is_sample].drop(columns="Sample ID")
#temp.index = ids
#temp = temp.transpose()
#temp.index = temp.index.rename("Sample_ID")
temp


2,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,Pooled Cntrl,...,DP3-0387,DP3-0389,DP3-0398E,DP3-0399,DP3-0404,DP3-0409,DP3-0416,DP3-0419,DP3-0420,DP3-0423E
nan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Export Order,Area: 062323_Sadovsky_placenta_pospool1.raw (F65),Area: 062323_Sadovsky_placenta_pospool2.raw (F66),Area: 062323_Sadovsky_placenta_pospool3.raw (F67),Area: 062323_Sadovsky_placenta_pospool4.raw (F68),Area: 062323_Sadovsky_placenta_pospool5.raw (F69),Area: 062323_Sadovsky_placenta_pospool6.raw (F70),Area: 032425_Sadovsky_Polar_Pos_Pool1.raw (F134),Area: 032425_Sadovsky_Polar_Pos_Pool2.raw (F135),Area: 032425_Sadovsky_Polar_Pos_Pool3.raw (F136),Area: 032425_Sadovsky_Polar_Pos_Pool4.raw (F137),...,Area: 032425_Sadovsky_Polar_Pos_43.raw (F113),Area: 032425_Sadovsky_Polar_Pos_44.raw (F114),Area: 032425_Sadovsky_Polar_Pos_45.raw (F115),Area: 032425_Sadovsky_Polar_Pos_46.raw (F116),Area: 032425_Sadovsky_Polar_Pos_47.raw (F117),Area: 032425_Sadovsky_Polar_Pos_48.raw (F118),Area: 032425_Sadovsky_Polar_Pos_49.raw (F119),Area: 032425_Sadovsky_Polar_Pos_50.raw (F120),Area: 032425_Sadovsky_Polar_Pos_51.raw (F121),Area: 032425_Sadovsky_Polar_Pos_52.raw (F122)
01,,,,,,,,,,,...,454705.358703,436747.959268,401151.873605,373625.231405,364593.320492,399287.829141,435095.699504,272278.020734,436893.81994,354308.6104
02,,,,,,,,,,,...,19094360.053184,27696422.442596,22865033.147103,23508151.282216,23660342.335612,18189295.140684,22990171.893712,18648420.254607,22036274.984124,19718638.033679
p1,1529697732.10807,1368644592.09096,1529095145.48298,1594294947.22906,1558061941.6777,1310787636.26796,1328624426.10309,1344193102.16676,1364611344.9006,1286360927.71433,...,1400241997.33195,1415624952.17838,1299977851.71867,1319932558.19584,1415801523.75981,1354812550.03793,1246002605.39726,1280463349.87845,1365353348.293,1331074984.32775
p2,960934888.044903,1017308011.36853,1024927927.19107,1061045171.20838,1075289478.05843,1139913107.67262,124139614.511546,227930546.262139,88045281.776889,248049614.116663,...,155063633.967029,218510011.336255,284659338.91841,224398236.582277,260621107.991995,152190993.760215,144181458.718533,259967809.838309,118226509.757648,297492629.450178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p3700,39079.646816,37595.24349,41865.668458,27640.514842,23745.862385,28983.02243,49275.668837,45309.560097,42586.358396,48846.685818,...,58239.195285,51006.320238,38495.799017,59486.156988,43756.364913,30018.671239,23856.126696,50097.705991,50517.907992,37939.473894
p3701,32186.048274,14915.13476,30473.225647,34969.834441,23107.849268,37536.088718,7480.893067,3667.167423,3550.747425,3968.712862,...,9320.302387,12289.337498,7232.991172,6344.848361,4051.1737,3779.421277,5112.78309,5257.716925,5248.152774,34064.236283
p3702,1601.922548,1483.575237,1565.337035,2063.441685,9204.205117,1842.760244,4239.181509,4944.474686,3977.297126,12219.016851,...,2253.014273,1309.799457,1809.39338,28695.953183,6014.48284,112197.881939,1738.518821,7674.30238,1616.330299,37309.011028
p3703,4823.045917,4975.115785,10850.491229,3151.114966,5361.216328,6775.526502,24014.717468,16646.426722,26260.233556,15190.699689,...,30129.065414,42199.254867,38854.578523,48452.878976,2228.747872,2141.515525,22113.724282,8809.518392,28628.893514,8574.670295


In [7]:
df

2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,DP3-0398E,DP3-0399,DP3-0404,DP3-0409,DP3-0416,DP3-0419,DP3-0420,DP3-0423E,NaN,NaN
nan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Export Order,Name,Formula,Annot. Source: Predicted Compositions,Annot. Source: mzCloud Search,Annot. Source: mzVault Search,Annot. Source: Metabolika Search,Annot. Source: ChemSpider Search,Annot. Source: MassList Search,Annot. DeltaMass [ppm],Calc. MW,...,Area: 032425_Sadovsky_Polar_Pos_45.raw (F115),Area: 032425_Sadovsky_Polar_Pos_46.raw (F116),Area: 032425_Sadovsky_Polar_Pos_47.raw (F117),Area: 032425_Sadovsky_Polar_Pos_48.raw (F118),Area: 032425_Sadovsky_Polar_Pos_49.raw (F119),Area: 032425_Sadovsky_Polar_Pos_50.raw (F120),Area: 032425_Sadovsky_Polar_Pos_51.raw (F121),Area: 032425_Sadovsky_Polar_Pos_52.raw (F122),Peak Rating (Max.),Peak Rating QC (Max.)
01,D3-alanine-ISTD,,,,,,,,,,...,401151.873605,373625.231405,364593.320492,399287.829141,435095.699504,272278.020734,436893.81994,354308.6104,,
02,13C1-creatinine_ISTD,,,,,,,,,,...,22865033.147103,23508151.282216,23660342.335612,18189295.140684,22990171.893712,18648420.254607,22036274.984124,19718638.033679,,
p1,,C4 H4 O7 P2,Full match,No results,No results,No results,No match,No match,4.39,225.94422,...,1299977851.71867,1319932558.19584,1415801523.75981,1354812550.03793,1246002605.39726,1280463349.87845,1365353348.293,1331074984.32775,9.2,5.5
p2,Phosphate,H3 O4 P,No results,No results,No results,Full match,Full match,Full match,-0.08,97.97689,...,284659338.91841,224398236.582277,260621107.991995,152190993.760215,144181458.718533,259967809.838309,118226509.757648,297492629.450178,8,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p3700,,C26 H38 N2 O11 S,Full match,No results,No results,No results,No match,No match,3.47,586.22167,...,38495.799017,59486.156988,43756.364913,30018.671239,23856.126696,50097.705991,50517.907992,37939.473894,5.9,5.8
p3701,"1,2,4-Trimethylbenzene",C9 H12,Full match,No results,No results,No results,Partial match,Full match,-0.79,120.09381,...,7232.991172,6344.848361,4051.1737,3779.421277,5112.78309,5257.716925,5248.152774,34064.236283,6.4,6.4
p3702,,C12 H5 N4 O P,Full match,No results,No results,No results,No results,No results,1.15,252.02039,...,1809.39338,28695.953183,6014.48284,112197.881939,1738.518821,7674.30238,1616.330299,37309.011028,5.2,3.2
p3703,,C25 H53 N2 O16 P3 S5,Full match,No results,No results,No results,No results,No results,0.25,890.12137,...,38854.578523,48452.878976,2228.747872,2141.515525,22113.724282,8809.518392,28628.893514,8574.670295,6.2,6.2
